Google Interview Question
Software EngineersCountry: United States
Hi Scott.
Can you please explain me the question. I am not able to comprehend it correctly.
My confusion is isn't "cat" and "CATapult" a continuous match ?
Scott, could you please explain why are you adding the top and top-left cells in case of equals?
Let me try to explain. f[i][j] means how many times target[0,i-1] appears in source[0,j-1]. So, if target[i] == source[j], f[i][j] = f[i-1][j-1](how many times target[0,i-2] appears in source[0,j-2]) + f[i-1][j] (how many times target[0,i-2] appears in source[0,j-1]).
if target[i] != source[j], f[i][j] = f[i][j-1](how many times target[0,i-1] appears in source[0,j-2]).
Another recursive option called with both strings and 0 (for s1Idx and s2Idx).
int countMatches(String str1, String str2, int s1Idx, int s2Idx) {
if(s1Idx >= str1.length()) return 0;
int count = 0;
for(int index=s2Idx; index<str2.length(); index++) {
if(str1.charAt(s1Idx) == str2.charAt(index)) {
if(str1.length()-1 == s1Idx) {
count++;
else
count += findMatches(str1, str2, s1Idx+1, index+1);
}
}
return count;
}
One solution is recursion where each instance is to find all discontinuous matches of a string s1 in a string s2 for a certain substring of s1 and substring of s2.
Ex. Looking for "cat" in "catapault" involves looking for "at" in "atapault", "t" in "tapault", "t" in "pault", and "t" in "ult".
Base case is where the length of substring of s1 becomes 0 or the length of substring of s2 becomes 0
Pseudocode where s1 is smaller string, s2 is larger string, i is index in smaller string to start from, j is index in larger string to start looking from (i and j are just to reduce making new copies of strings)
fn(String s1, String s2, int i, int j) {
if (i > s1.length - 1 || j > s2.length - 1) {
return 0;
}
int total = 0;
char firstChar = s1.get(i);
for (int k = 0; k < s2.length; k++) {
if (s2.get[k] == firstChar) {
total += fn(s1, s2, i+1, k);
}
}
return total;
}
Runtime: O(n^2)
I did the same thing, except i compare the last characters instead of the first. Good solution.
Recursive solution::
Apologize for method and variable names
public class DiscontMatches {
public static void main(String[] args) {
Set<String> ans = findDiscMatches("cat", "catapult");
for(String str : ans)
System.out.println(str);
}
public static Set<String> findDiscMatches(String str1, String str2){
Set<String> set = new TreeSet<>();
Map<Character, List<Integer>> map = new LinkedHashMap<>();
char [] arr = str2.toCharArray();
char [] arr2 = str1.toCharArray();
for(char c: arr2){
int i = 0;
for(char c2 : arr){
if(c==c2){
if(!map.containsKey(c))
map.put(c, new ArrayList<Integer>());
map.get(c).add(i);
}
i++;
}
}
createSet(arr2, arr, set, map, 0, -1);
return set;
}
private static void createSet(char [] str1, char [] str2, Set<String> set,
Map<Character, List<Integer>> map, int str1Pos, int prevCharPos) {
if(str1Pos == str1.length-1) {
List<Integer> locations = map.get(str1[str1Pos]);
for(int i : locations){
if(i<=prevCharPos)
continue;
char [] clone = str2.clone();
clone[i]-=32;
set.add(new String(clone));
}
} else if(str1Pos < str1.length-1) {
List<Integer> locations = map.get(str1[str1Pos]);
for(int i : locations){
if(i<=prevCharPos)
continue;
char [] clone = str2.clone();
clone[i]-=32;
createSet(str1, clone, set, map, str1Pos+1, i);
}
}
}
}
My solution is similar except I computed only the "count" as the question asks only the
number of discontinuous matches.
Logic behind the code:
If "cat" is present in "catapult" then "at is present in "atapult" and "t" is present in "tapult". So by recursively adding the number of occurrences of each character in the substring we can get the number of discontinuous matches.
public static int discontinousMatch(String s1, String s2)
{
int total = 0;
if(s1.length() == 1)
{
int count =0;
while(s2.contains(s1))
{
count++;
s2 = s2.substring(s2.indexOf(s1.charAt(0))+1);
}
return count;
}
char c = s1.charAt(0);
int index = s2.indexOf(c);
while (index < s2.length() && index > -1)
{
total += discontinousMatch(s1.substring(1), s2.substring(index+1));
index = s2.indexOf(c, index+1);
}
return total;
}
DP Python code (only # of matches)
def DisconMatch(strA,strB):
if not strA:
return 0
if not strB:
return 0
if len(strA)>len(strB):
return 0
m=len(strA)
n=len(strB)
DPmatrix=[[(0,-1) for x in range(n)] for y in range(m)] # DP will remember (number, index) for matches found
for i in range(m):
for j in range(n):
if strA[i]==strB[j]:
if j==0 and i==0:
DPmatrix[i][j]=(1,j)
elif i==0:
DPmatrix[i][j]=(DPmatrix[0][j-1][0]+1,j)
elif j<i:
DPmatrix[i][j]=(0,-1)
else:
DPmatrix[i][j]=(DPmatrix[i-1][j-1][0]+DPmatrix[i][j-1][0],j)
else:
if j==0:
DPmatrix[i][j]=(0,-1)
else:
DPmatrix[i][j]=(DPmatrix[i][j-1][0],-1)
print(DPmatrix)
return DPmatrix[m-1][n-1]
print(DisconMatch("cat","catapult"))
Outputs the results in the requested format:
short string:
cat
long string:
catapult
3 => CATapult, CAtapulT, CatApulT,
std::vector<std::string> results;
char shortStr[128], longStr[128];
void DisconMatch(char* pShort, char* pLong)
{
while ((pLong = strchr(pLong, *pShort)) != NULL)
{
*pLong = toupper(*pLong);
if (!pShort[1])
results.push_back(longStr);
else
DisconMatch(pShort+1, pLong+1);
*pLong++ = tolower(*pLong);
}
}
void main()
{
while (1)
{
printf("short string:\n");
gets(shortStr);
printf("long string:\n");
gets(longStr);
DisconMatch(shortStr, longStr);
printf("%d => ", results.size());
for (int i = 0; i < results.size(); i++)
printf("%s, ", results[i].c_str());
printf("\n");
results.clear();
}
}
public static int countDiscontinuousMatches(String input, String word) {
if (input == null || word == null || input.isEmpty() || word.isEmpty() || input.length() > word.length()) {
return 0;
}
int count = 0;
for (int i = 0; i < word.length(); i++) {
if (word.charAt(i) == input.charAt(0)) {
count++;
int n = countDiscontinuousMatches(input.substring(1), word.substring(i + 1));
if (n != 0) {
count *= n;
}
}
}
return count;
}
public static int countDiscontinuousMatches(String input, String word) {
if (input == null || word == null || input.isEmpty() || word.isEmpty() || input.length() > word.length()) {
return 0;
}
int count = 0;
for (int i = 0; i < word.length(); i++) {
if (word.charAt(i) == input.charAt(0)) {
count++;
int n = countDiscontinuousMatches(input.substring(1), word.substring(i + 1));
if (n != 0) {
count *= n;
}
}
}
return count;
}
Recursive Solution:
public int findDisc(String a, String b) {
return findDiscHelper(a, b);
}
public int findDiscHelper(String a, String b) {
int count = 0;
for (int i = b.length() - 1; i >= 0; i--) {
if (b.charAt(i) == a.charAt(a.length()- 1)) {
count += (a.length() <= 1) ? 1 : findDiscHelper(a.substring(0, a.length() - 1), b.substring(0 , i) );
}
}
return count;
}
}
public class DiscontinuedMatch {
public static void main (String[] args) {
System.out.println(discMatch("cat", "catapult"));
}
public static int discMatch(String a, String b) {
if(a == null || b == null || a.length() > b.length()) return 0;
return match(a, b, 0, 0);
}
public static int match(String a, String b, int ai, int bi) {
if(ai >= a.length()) return 1;
if(bi >= b.length()) return 0;
int count = 0;
for(int i=bi; i<b.length(); i++) {
if(a.charAt(ai) == b.charAt(i)) {
count+= match(a,b,ai+1,i+1);
}
}
return count;
}
}
Efficient solution in Java below (uses helper function that tracks starting indices in order to avoid creating new substrings when calling the substring method on a String).
public class DiscontinuousMatches {
public static int numMatches(String s1, String s2) {
return numMatchesHelper(s1, s2, 0, 0);
}
private static int numMatchesHelper(String s1, String s2, int start1, int start2) {
if (s1.length() - start1 > s2.length() - start2) return 0;
if (s1.length() - start1 == 0) return 0;
int nMatches = 0;
if (s1.charAt(start1) == s2.charAt(start2)) {
if (start1 == s1.length() - 1) {
nMatches += 1;
} else {
nMatches += numMatchesHelper(s1, s2, start1 + 1, start2 + 1);
}
}
nMatches += numMatchesHelper(s1, s2, start1, start2 + 1);
return nMatches;
}
public static void main(String[] args) {
System.out.println(numMatches(args[0], args[1]));
}
}
import java.util.Arrays;
public class DiscontiniousMatches {
public static int matches(String s, String t){
int m = s.length();
int n = t.length();
int p = Math.min(m,n);
int [][] res = new int[m+1][n+1];
for(int i = 0;i<m;i++){
char c = s.charAt(i);
int x = 0;
for(int j = 0;j<n;j++){
char d = t.charAt(j);
if(c==d){
if(i == 0)
res[i+1][j+1] = ++x;
else
res[i+1][j+1] = res[i][j+1] + res[i+1][j];
}
else{
res[i+1][j+1] = res[i+1][j];
}
}
System.out.println(Arrays.toString(res[i]));
}
System.out.println(Arrays.toString(res[m]));
return res[m][n];
}
public static void main(String arg[]){
System.out.println(matches("cata","caataat"));
}
}
Succinct C++ recursive version
#include <iostream>
int count = 0;
void findSubstring(char* target, char* space)
{
if(strlen(target) == 0) //all characters are found
count ++;
for (int i = 0; i < strlen (space) ; i++)
if (target[0] == space [i])
findSubstring(target + 1, space + i);
}
int main()
{
findSubstring("cat", "catapult");
std::cout<<count;
return 0;
}
This question can be solved with recursion. Lets take the given example, for every character in the substring "CAT" :
'T' is at index (2->1), (7->1) , return a HashMap<index, count>
'A' has to come before T so A's acceptable indexes should be smaller than T's, and index 1 can be paired two times with index 2 and 7 so at this level return (1->2),(3->1)
Similarly, for 'C', index 0 can be paired with both 1,3 so adding the values of key 1 and key 3, return (0->3)
The returning value of the function would be a HashMap<Integer, Integer>, the value of key at index 0 will give the answer.
/*
1. scan from left to right, if a character matches, look for the next char to match
2. if the whole string found, find occurrences of the last matched char to the end
3. when end is reached, come back the previous location where entire string match found,
and start scanning for the previous char until end
4. continue the recursion
*/
public class FindDiscontinuousMatches {
char[] contentBuf = null;
char[] matchBuf = null;
public FindDiscontinuousMatches(String content, String toMatch) {
contentBuf = content.toCharArray();
matchBuf = toMatch.toCharArray();
}
public void countDiscontinuousMatch() {
System.out.println(findCharMatchFromString(contentBuf, matchBuf, 0, 0));
}
public int findCharMatchFromString(char[] contentBuf, char[] matchBuf, int matchIndex, int contentIndex) {
int count = 0;
if(contentIndex < contentBuf.length && matchIndex < matchBuf.length) {
if((matchIndex == matchBuf.length - 1) && (matchBuf[matchIndex] == contentBuf[contentIndex])) {
count++;
}
if(matchBuf[matchIndex] == contentBuf[contentIndex]) {
count += findCharMatchFromString(contentBuf, matchBuf, matchIndex + 1, contentIndex + 1);
}
count += findCharMatchFromString(contentBuf, matchBuf, matchIndex, contentIndex + 1);
}
return count;
}
public static void main(String [] args) {
new FindDiscontinuousMatches("CATAPULT", "CAT").countDiscontinuousMatch();
}
}
/*
1. scan from left to right, if a character matches, look for the next char to match
2. if the whole string found, find occurrences of the last matched char to the end
3. when end is reached, come back the previous location where entire string match found,
and start scanning for the previous char until end
4. continue the recursion
*/
public class FindDiscontinuousMatches {
char[] contentBuf = null;
char[] matchBuf = null;
public FindDiscontinuousMatches(String content, String toMatch) {
contentBuf = content.toCharArray();
matchBuf = toMatch.toCharArray();
}
public void countDiscontinuousMatch() {
System.out.println(findCharMatchFromString(contentBuf, matchBuf, 0, 0));
}
public int findCharMatchFromString(char[] contentBuf, char[] matchBuf, int matchIndex, int contentIndex) {
int count = 0;
if(contentIndex < contentBuf.length && matchIndex < matchBuf.length) {
if((matchIndex == matchBuf.length - 1) && (matchBuf[matchIndex] == contentBuf[contentIndex])) {
count++;
}
if(matchBuf[matchIndex] == contentBuf[contentIndex]) {
count += findCharMatchFromString(contentBuf, matchBuf, matchIndex + 1, contentIndex + 1);
}
count += findCharMatchFromString(contentBuf, matchBuf, matchIndex, contentIndex + 1);
}
return count;
}
public static void main(String [] args) {
new FindDiscontinuousMatches("CATAPULT", "CAT").countDiscontinuousMatch();
}
}
private int discontinuousMatch(String pattern, String str) {
return discontinuousMatch(pattern, str, 0, 0);
}
private int discontinuousMatch(String pattern, String str, int patternIndex, int strIndex) {
if (patternIndex == pattern.length()) {
return 1;
} else if (strIndex == str.length()) {
return 0;
} else {
int count = 0;
for (int i = strIndex; i < str.length(); ++i) {
if (pattern.charAt(patternIndex) == str.charAt(i)) {
count += discontinuousMatch(pattern, str, patternIndex + 1,i + 1);
}
}
return count;
}
}
This sounds immediately like a recursive problem, so let's think about the base cases. First off, if either string is empty, then there are no possible matches. If the two strings are equal, then there's only one match. If the strings have the same length but aren't equal, there's no match. If they're of different lengths, then we must be looking for all the matches of the shorter string in the longer string. So let's call the shorter string the search string and the longer string the target string.
What's the recursion? Well, if the first characters of both strings don't match, then we need to look for the full search string starting at the second character of the target string. In other words, given "act" and "react", we can recurse to ("act", "eact") and then to ("act", "act"). If the first characters do match, then we have two options: match the first character and recurse with both strings shortened by the matched character, or don't match the first character, hoping to find another match later. E.g, given ("act", "acting") we recurse to both ("ct", "cting") and ("act", "cting"). If we ever use up the last character in the search string, we add a match. Let's write up this recursive solution:
def match(search, target):
if not search or not target:
return 0
if search == target:
return 1
if len(search) == len(target):
return 0
if len(search) > len(target):
search, target = target, search
if search[0] == target[0]:
if len(search) == 1:
return 1 + match(search, target[1:])
else:
return match(search[1:], target[1:]) + match(search, target[1:])
else:
return match(search, target[1:])
This gets us the right answer but is unfortunately rather slow on long strings. Why? Because the recursive stack can get very deep - any time there's a character match, we're splitting into two recursive calls, giving us O(N^2) worst case behavior. However, we'll note that there are only M possible search strings and N possible target strings. Our recursive calls must be repeatedly evaluating the same subproblems. That means we can use a dynamic programming approach - we'll store the results of previous calls by allocating an array.
import numpy as np
def match(search, target):
if len(search) > len(target):
search, target = target, search
m, n = len(search), len(target)
#Some base cases
if not search or not target:
return 0
if search == target:
return 1
if m == n:
return 0
#Allocate the array
table = np.zeros((m, n), dtype=int)
#Fill in the bottom row (matching the last character of the search string)
i = m - 1
for j in range(n)[::-1]:
if j < n - 1:
table[i,j] = table[i,j+1]
if search[i] == target[j]:
table[i,j] += 1
#Fill in the rest of the table
for i in range(m - 1)[::-1]:
for j in range(n - 1)[::-1]:
if search[i] == target[j]:
table[i,j] = table[i+1,j+1] + table[i,j+1]
else:
table[i,j] = table[i,j+1]
#Return the final value
return table[0,0]
Now this runs quite fast --- it takes only O(MN) time with the nested loop. However, it also takes O(MN) space to store the table. But we don't actually need the whole table! At each point, we only need the next column over. So we can reduce this to O(M) space by shrinking the table:
import numpy as np
def match(search, target):
if len(search) > len(target):
search, target = target, search
m, n = len(search), len(target)
#Some base cases
if not search or not target:
return 0
if search == target:
return 1
if m == n:
return 0
#Allocate the array
table = np.zeros((m, 2), dtype=int)
#Fill in the bottom right element (match the last two characters)
if search[-1] == target[-1]:
table[-1, 0] = 1 #First column because it will be shifted over in the loop
#Iterate through all the columns
for j in range(n - 1)[::-1]:
#Copy the left column over to the right
table[:,1] = table[:,0]
#Bottom row
if search[m - 1] == target[j]:
table[m - 1, 0] += 1
#Remaining rows
for i in range(m - 1)[::-1]:
if search[i] == target[j]:
table[i,0] = table[i+1,1] + table[i,1]
else:
table[i,0] = table[i,1]
#Return the final value
return table[0,0]
This is equivalent to finding longest common subsequences. Specifically, any discontinuous occurrence of a pattern P in a search string S is by definition a longest common subsequence. We just want the number of matches rather than the length, and we want to return zero in case the length of the LCS is less than that of P (i.e., no full match exists).
We build an array c[i,j] which represents the number of matches of P[0, ..., i-1] in S[0, ..., j-1]. This naturally satisfies the following recurrences:
c[i,j] = c[i-1, j-1] + c[i, j-1] if P[i] = S[j] (number of matches using the current character, plus number of matches using only prior characters);
c[i,j] = c[i, j-1] if P[i] != S[j].
Here's a direct implementation of this in Python:
def discontinuous_matches(p, s):
c = [[0 for i in range(0, len(s) + 1)] for j in range(0, len(p) + 1)]
for i in range(0, len(s) + 1):
c[0][i] = 1
for i in range(1, len(p) + 1):
for j in range(1, len(s) + 1):
if p[i-1] == s[j-1]:
c[i][j] = c[i-1][j-1] + c[i][j-1]
else:
c[i][j] = c[i][j-1]
return c[len(p)][len(s)]
I personally find a recursive approach a lot more comprehensible, though. The idea is simpler. We look at each index of the search string and find the number of occurrences of the pattern starting at that index. We add up all these results.
The number of occurrences of the pattern p and index i of the search string s, if s[i] == p[0], is simply the number of occurrences of the *rest* of the pattern in the remainder of the string.
def discontinuous_matches_recursive(p, s):
if len(p) == 0:
return 1
count = 0
for i, c in enumerate(s):
if p[0] == c:
count += discontinuous_matches_recursive(p[1:], s[i+1:])
return count
Recursive and DP solution
int num_dc_matches_dp(const std::string& s, const std::string& t)
{
const auto sn = s.length();
const auto tn = t.length();
auto d = new int*[sn + 1];
for (auto i = 0; i < sn + 1; ++i) d[i] = new int[tn + 1];
for (auto i = 0; i < sn + 1; ++i) d[i][0] = 1;
for (auto j = 1; j < tn + 1; ++j) d[0][j] = 0;
for (auto i = 1; i < sn + 1; ++i)
{
for (auto j = 1; j < tn + 1; ++j)
{
if (s[i - 1] == t[j - 1])
{
d[i][j] = d[i - 1][j - 1] + d[i - 1][j];
}
else
{
d[i][j] = d[i - 1][j];
}
}
}
auto rv = d[sn][tn];
for (auto i = 0; i < sn + 1; ++i) delete[] d[i];
delete[] d;
return rv;
}
int num_dc_matches(char* s, char* t, int sn, int tn)
{
if (0 == s || 0 == t) return 0;
if (tn > sn) return 0;
if (tn == sn) return (strcmp(s, t) == 0) ? 1 : 0;
if (0 == tn) return 1;
if (*s == *t)
{
return num_dc_matches(s + 1, t + 1, sn - 1, tn - 1) + num_dc_matches(s + 1, t, sn - 1, tn);
}
else
{
return num_dc_matches(s + 1, t, sn - 1, tn);
}
}
It's a simple DP problem
int NumberOfMatch(string s, string t)
{
int m = s.size(), n = t.size(), *dp[2];
dp[0] = new int[n](); dp[1] = new int[n](); dp[0][0] = s[0] == t[0] ? 1 : 0;
for (int i = 1; i < m; ++i)
{
dp[i & 1][0] = dp[(i - 1) & 1][0] + (s[i] == t[0] ? 1 : 0);
for (int j = 1; j < n; ++j) dp[i & 1][j] = dp[(i - 1) & 1][j] + (s[i] == t[j] ? dp[(i - 1) & 1][j - 1] : 0);
}
int ans = dp[(m - 1) & 1][n - 1]; delete[] dp[0]; delete[] dp[1];
return ans;
}
A dp solution I can think of is:
iterate through string, at each character determine if it completes a match or adds to a match. If it completes a match, save the Indexes (use it display later on). If it adds to an incomplete match save indexes in a separate data structure to use in the case that a later character completes the match. T = O(n^2)
void findDiscontinuousMatches(string s, string p, int slength, intplength)
{
if(slength == 0 || plength == 0 || plength > slength)
return null;
list<string> incompleteMatches;
vector<vector<int>> incompleteIndices;
vector<vecotr<int>> completeIndices;
incompleteMatches.push_front("");
vector<int> placeHolder v;
incompleteIndices.push_back(v);
for(int i = 0; i<slength; i++)
{
auto it2 = incompleteIndices.begin();
for(auto it = incompleteMatches.begin(); it != incompleteMatches.end(); it++)
{
if(it2 = incompleteIndices.end())
break;
//completes a match
if( (it->first + s[i]).equals(p) )
{
vector<int> completedIndex (it2->first);
completedIndex.push_back(i);
completeIndices.push_back(completedIndex);
}
//adds to an incomplete match
if( p.startsWith(it->first + s[i]) )
{
incompleteMatches.push_front(it->first + s[i]);
vector<int> incompleteIndex (it2->first);
incompleteIndex.push_back(i);
completeIndices.push_back(incompleteIndex);
}
it2++;
}
}
printDiscontinuous(s, completeIndices); //Didn't write this method
}
public static int findMatches(String source, String target) {
if(source.length() == 0) {
return 1;
}
if(target.length() == 0)
return 0;
int num = 0;
for(int i = 0; i < target.length() - source.length() + 1; i++) {
if(source.charAt(0) == target.charAt(i)) {
num += findMatches(source.substring(1), target.substring(i+1));
}
}
return num;
}
dp solution
}
- Scott March 04, 2015