Facebook Interview Question
Software Engineer / DevelopersCountry: United States
Interview Type: Phone Interview
code in python, idea is to use kmp on individual entries to get all occurances, then creating a dictionary from these entries to check if the we can find a permutation of the patterns in string
from bisect import bisect_left
def findall(t,p):
pos=list()
val=0
while True:
try:
val=t.index(p,val+1)
pos.append(val)
except:
break
return pos
def check(t,glob,k,num):
keys=glob.keys()
keys.sort()
n=len(keys)
print keys
for i in range(len(keys)):
cover=set(glob[keys[i]])
key=keys[i]
covered=1
for j in range(num-1):
key=key+k
nxt=bisect_left(keys,key)
if nxt<n and keys[nxt]==key:
covered+=1
cover.update(glob[key])
if(len(cover))<j: break
if covered==num and len(cover)==num:
print keys[i],t[keys[i]:keys[i]+num*k]
def search_complex(t,plst):
glob={}
for i in range(len(plst)):
ppos=findall(t,plst[i])
for pos in ppos:
try:
glob[pos].add(i)
except:
glob[pos]=set([i])
check(t,glob,len(plst[0]),len(plst))
def main():
t="lingmindraboofooowingdingbarrwingfooomonkeypoundcakewingdingbarrwingfooowing"
plst=["fooo", "barr", "wing", "ding", "wing"]
search_complex(t,plst)
if __name__=='__main__':
main()
code in python, idea is to use kmp on individual entries to get all occurances, then creating a dictionary from these entries to check if the we can find a permutation of the patterns in string
from bisect import bisect_left
def findall(t,p):
pos=list()
val=0
while True:
try:
val=t.index(p,val+1)
pos.append(val)
except:
break
return pos
def check(t,glob,k,num):
keys=glob.keys()
keys.sort()
n=len(keys)
print keys
for i in range(len(keys)):
cover=set(glob[keys[i]])
key=keys[i]
covered=1
for j in range(num-1):
key=key+k
nxt=bisect_left(keys,key)
if nxt<n and keys[nxt]==key:
covered+=1
cover.update(glob[key])
if(len(cover))<j: break
if covered==num and len(cover)==num:
print keys[i],t[keys[i]:keys[i]+num*k]
def search_complex(t,plst):
glob={}
for i in range(len(plst)):
ppos=findall(t,plst[i])
for pos in ppos:
try:
glob[pos].add(i)
except:
glob[pos]=set([i])
check(t,glob,len(plst[0]),len(plst))
def main():
t="lingmindraboofooowingdingbarrwingfooomonkeypoundcakewingdingbarrwingfooowing"
plst=["fooo", "barr", "wing", "ding", "wing"]
search_complex(t,plst)
if __name__=='__main__':
main()
That's a pretty good hint, but I don't understand is how you can handle one pass in (O(string length) time)
the order of the words in L is arbitrary and we definitely need to check every segment of S following the first match because we could face the following situation:
fooo barr wing ding mehh nota matc hyet fooo wing ding barr wing
We can use well known KMP pattern matching.
Search for the all of the strings in the list L for its starting position and save it. If there are multiple matches save all of them.
Find the current lowest index. As long as the current lowest index + current string length = next lowest index.. move on to the next string and do the same. until you have exhausted all the strings in the list L. If you exhaust all the strings return the starting position of first matching string.
Actually, we even do not need to check the concatenation for this particular task. It is enough to find the MIN element, which is exactly our index.
//took about O(N) where N is the length of the long string
//each word in String[] v must be different!
public void findString(String L,String[] v)
{
Hashtable<String,Integer> table = new Hashtable<String,Integer>();
int all = 0;
for(int i=0;i<v.length;i++)
{
table.put(v[i], i+1);
all += i+1;
}
int sizeV = v[0].length();
int sizeVL = v.length;
int [] o = new int[L.length() - sizeV + 1];
StringBuffer sb = new StringBuffer();
for(int i=0;i<o.length;i++)
{
sb.setLength(0);
for(int j=0;j<v[0].length();j++)
sb.append(L.charAt(i+j));
if( table.containsKey(sb.toString()) )
{
o[i] = table.get(sb.toString());
}else
{
o[i] = 0;
}
}
for(int i=sizeVL*sizeV;i<o.length;i++)
{
int sum = o[i];
for(int j=1;j<sizeVL;j++)
{
sum += o[i - sizeV*j];
}
if( sum == all )
{
System.out.println("find at" + (i - v.length*sizeV));
System.out.println(L.subSequence(i-v.length*sizeV, i+sizeV));
return;
}
}
}
import java.util.*;
import java.io.*;
import java.lang.*;
class stringproject {
public static void main(String[] args)throws IOException
{
String s,f,t;
int z=0;
DataInputStream x=new DataInputStream(System.in);
String[] a={"fooo", "barr", "wing", "ding", "wing"};
String m=Arrays.toString(a);
char[] k=m.toCharArray();
java.util.Arrays.sort(k);
t=process(k);
System.out.println("Enter the string");
s=x.readLine();
try
{
for(int i=0;i<=s.length()-1;i++)
{
f=s.substring(i,i+t.length());
char[] l=f.toCharArray();
java.util.Arrays.sort(l);
if(process(l).equalsIgnoreCase(t))
{
z=1;
}
}
}catch(StringIndexOutOfBoundsException e){}
if(z>0)
{
System.out.println("The given string is present at position");
}
else
{
System.out.println("The given string is not present");
}
}
public static String process(char[] k)
{
StringBuffer n=new StringBuffer();
for(int j=0;j<=k.length-1;j++)
{
if(Character.isLetter(k[j]))
{
n.append(k[j]);
}
}
return n.toString();
}
}
public class SubStringConcatentationTest {
private static String[] L = {"fooo", "barr", "wing", "ding", "wing"};
private static String S = "lingmindraboofooowingdingbarrwingmonkeypoundcake";
public static void main(String[] args) {
Map<String, Integer> lastFoundIndexOf = new HashMap<String, Integer>();
Integer minStartIndex = null, maxEndIndex = null;
int allLengthSum = 0;
for(String l : L) {
int i = S.indexOf(l, lastFoundIndexOf.get(l) == null ? 0 : lastFoundIndexOf.get(l) + l.length());
if(i < 0) {
System.out.println("Not found at all.");
return;
}
lastFoundIndexOf.put(l, i);
allLengthSum += l.length();
if(minStartIndex == null || minStartIndex > i)
minStartIndex = i;
if(maxEndIndex == null || maxEndIndex < i + l.length())
maxEndIndex = i + l.length();
}
if(allLengthSum > 0 && maxEndIndex != null && (allLengthSum == (maxEndIndex - minStartIndex)))
System.out.println("Found at start index: " + minStartIndex + ", substring = " + S.substring(minStartIndex, maxEndIndex));
else {
System.out.println("Not found contigously.");
}
}
}
for(i=0; i<S.length ; i++)
for(j=0; j<L.length; j++)
if(!strncmp(&S[i],L[j]))
{
printf("%s", L[j]);
L[j]=""; // don't revisit
i+=4; // next word
}
public int GetPositionStringContainsMultipleStrings(string str, string[] substrings)
{
if (String.IsNullOrEmpty(str)) return -1;
if (substrings == null || substrings.Length == 0) return 0;
var positions = new SortedDictionary<int, string>();
foreach (var substring in substrings)
{
int index = 0;
string copy = str;
while (index < copy.Length)
{
index = copy.IndexOf(substring, index);
if (index == -1)
{
break;
}
positions.Add(index, substring);
copy = copy.Substring(index + substring.Length);
}
}
if (positions.Count < substrings.Length) return -1;
for (int i = 0; i < positions.Count; i++)
{
var keys = new List<int>();
int key = positions.Keys.ElementAt(i);
keys.Add(key);
for (int j = 1; j < substrings.Length; j++)
{
key += substrings[0].Length;
if (positions.ContainsKey(key))
{
keys.Add(key);
}
}
if (keys.Count == substrings.Length &&
IsDifferent(keys.ToArray(), positions))
{
return positions.Keys.ElementAt(i);
}
}
return -1;
}
private bool IsDifferent(int[] keys, SortedDictionary<int, string> positions)
{
for (int i = 0; i < keys.Length - 1; i++)
{
for (int j = i + 1; j < keys.Length; j++)
{
if (positions[keys[i]] == positions[keys[j]])
{
return false;
}
}
}
return true;
}
public int GetPositionStringContainsMultipleStrings(string str, string[] substrings)
{
if (String.IsNullOrEmpty(str)) return -1;
if (substrings == null || substrings.Length == 0) return 0;
var positions = new SortedDictionary<int, string>();
foreach (var substring in substrings)
{
int index = 0;
string copy = str;
while (index < copy.Length)
{
index = copy.IndexOf(substring, index);
if (index == -1)
{
break;
}
positions.Add(index, substring);
copy = copy.Substring(index + substring.Length);
}
}
if (positions.Count < substrings.Length) return -1;
for (int i = 0; i < positions.Count; i++)
{
var keys = new List<int>();
int key = positions.Keys.ElementAt(i);
keys.Add(key);
for (int j = 1; j < substrings.Length; j++)
{
key += substrings[0].Length;
if (positions.ContainsKey(key))
{
keys.Add(key);
}
}
if (keys.Count == substrings.Length &&
IsDifferent(keys.ToArray(), positions))
{
return positions.Keys.ElementAt(i);
}
}
return -1;
}
private bool IsDifferent(int[] keys, SortedDictionary<int, string> positions)
{
for (int i = 0; i < keys.Length - 1; i++)
{
for (int j = i + 1; j < keys.Length; j++)
{
if (positions[keys[i]] == positions[keys[j]])
{
return false;
}
}
}
return true;
}
#include<iostream>
#include<string>
using namespace std;
int Find_Index( string L[] , string S , int n )
{
int index= 100 , current;
for( int i=0 ; i<n ; i++ )
{
int current = S.find(L[i]);
if( current < index )
index= current;
}
return index;
}
int main()
{
string l[5] ={ "fooo", "barr", "wing", "ding", "wing"};
string s = "lingmindraboofooowingdingbarrwingmonkeypoundcake";
int a = Find_Index(l , s , 5);
cout << a;
return 0;
}
an O(sizeof(long string) + sizeof(small string) * length-of-each-small-string )) solution lives here:
computationalpuzzles.wordpress.com/2011/11/17/substring-with-concatenation-of-all-words-in-a-list/
but im highly suspicious if this is really a phone interview prob. The algorithm may be too hard to write in 45 minutes.
I've got an easy one.
candidates = set(["fooo", "barr", "wing", "ding", "wing"])
seq = "lingmindraboofooowingdingbarrwingmonkeypoundcake"
for i in range(4):
j = i
while j < len(seq):
sub = seq[j: j + 4]
if sub in candidates:
mp = {sub: 1}
k = j + 4
while k < len(seq) - 4 and k < len(candidates) * 4 + j:
sub = seq[k: k + 4]
if sub in candidates and sub not in mp:
mp[sub] = 1
else:
break
if len(mp) == len(candidates):
print seq[j:k + 4]
break
k += 4
j += 4
L: "fooo", "barr", "wing", "ding", "wing".
S: "lingmindraboofooowingdingbarrwingmonkeypoundcake".
Using brute-force hash based solution:
1. Hash all words in L. (hash value indicate number of occurrence)
2. For each substring of S with length (Number_of_words) * (word_length)
2.1 initialize visit count of the hashtable (and a global visit count)
2.2 for each possible word check and update visit count of the hashtable entry, and decrease global visit count
2.3 a match is fount when global visit count reaches 0
Total cost is O(m * n * l), where m is length of the string, n is number of words, l is length of word.
Can be reduced to O(m * l) with optimization.
def _get_subset_of_len_k(s, k):
ret = []
for i in range(0, len(s)):
curr = []
j = i
while j < len(s):
curr.append((s[j:j+4], j))
j+=4
ret.append(curr)
return ret
#Given a list of words, L, that are all the same length, and a string, S, find the starting position of the substring of S that is a concatenation of each word in L exactly once and without any intervening characters. This substring will occur exactly once in S..
def get_sub_str(s, words):
map = {w:"" for w in words}
ret = _get_subset_of_len_k(s, 4)
start = None
for l in ret:
start = -1
match = 0
for w, idx in l:
if w in map:
if start == -1:
start = idx
match +=1
else:
match = 0
start = -1
if match == len(words):
return start
return None
public void findString(String L , String[] V){
Hashtable<String , Integer > ht = new Hashtable<String ,Integer>();
int unitNum = V.length;
if(unitNum == 0) return;
int unitLen = V[0].length();
for(int i = 0 ; i < V.length ; i++){
for(String t : v) ht.put(t , -1);
stringSearch(L , i , ht , unitNum , unitLen);
}
}
public void stringSearch(String L , int start , Hashtable<String , Integer> ht , int unitNum , int unitLen){
int curNum = 0 , begin = 0;
while( start+unitLen <= L.length() ){
String tem = L.substring(start, start+unitLen);
if(!ht.containsKey(tem)){
curNum = 0 ;
begin = start+unitLen;
}
else{
int t = ht.get(tem);
if(t >= begin){
curNum -= (t - begin )/unitLen +1;
begin = t+unitLen;
}
ht.put(tem , start);
start+=unitLen;
curNum++;
}
if(curNum == unitNum){
System.out.println(L.substring(begin , start));
}
}
}
JavaScript implementation
1. Find anagram of all words merged together O(n * log(k))
2. Check that anagram is actually combination of searched words
function getPosition(str, words, wordLen) {
var result = -1;
/* create anagram and count its characters */
var anagram = words.join('').split('');
var anagramHash = {};
var dueMatches = 0; /* number of matching characters */
for (var index = 0; index < anagram.length; index += 1) {
var char = anagram[index];
if (anagramHash.hasOwnProperty(char)) {
anagramHash[char] -= 1;
} else {
anagramHash[char] = -1;
dueMatches += 1;
}
}
/* create words hash */
var wordsHash = {};
var dueWords = 0;
for (var index = 0; index < words.length; index += 1) {
var word = words[index];
if (wordsHash.hasOwnProperty(word)) {
wordsHash[word] -= 1;
} else {
wordsHash[word] = -1;
dueWords += 1;
}
}
var index = 0;
var anaLen = 0;
var matches = 0;
while (index < str.length) {
var char = str.substr(index, 1);
anaLen += 1;
if (anagramHash.hasOwnProperty(char)) {
if (anagramHash[char] == 0) {
matches -= 1;
}
anagramHash[char] += 1;
if (anagramHash[char] == 0) {
matches += 1;
}
}
if (anaLen > anagram.length) {
var char = str.substr(index - anagram.length, 1);
if (anagramHash.hasOwnProperty(char)) {
if (anagramHash[char] == 0) {
matches -= 1;
}
anagramHash[char] -= 1;
if (anagramHash[char] == 0) {
matches += 1;
}
}
anaLen -= 1;
}
if (anaLen == anagram.length) {
if (matches == dueMatches) {
/* check that our substring contains required words */
var wordsHash2 = {};
for (var word in wordsHash) {
wordsHash2[word] = wordsHash[word];
}
var wordMatches = 0;
for (var wordIndex = 0; wordIndex < words.length; wordIndex += 1) {
var word = str.substr(index - anagram.length + 1 + wordIndex * wordLen, wordLen);
if (wordsHash2.hasOwnProperty(word)) {
wordsHash2[word] += 1;
if (wordsHash2[word] == 0) {
wordMatches += 1;
}
}
}
if (wordMatches == dueWords) {
result = index - anagram.length + 1;
break;
}
}
}
index += 1;
}
return result;
}
var result1 = getPosition("lingmindraboofooowingdingbarrwingmonkeypoundcake", ["fooo", "barr", "wing", "ding", "wing"], 4); // 13
var result1 = getPosition("monkey", ["mon", "key"], 3); // 0
var result3 = getPosition("abcdfecdba", ["a", "b", "c", "d", "e"], 1); // 5
import java.util.HashMap;
import java.util.Map;
public class FindWordCombination {
public FindWordCombination() {
}
public String solve(String[] L, int k, String target){
if(target == null || L == null){
throw new IllegalArgumentException();
}
if(L.length == 0){
return "";
}
Map<String, Integer> dictionary = new HashMap<String,Integer>();
for(String s : L){
int freq = 1;
if(dictionary.containsKey(s)){
freq = dictionary.get(s);
freq = freq + 1;
}
dictionary.put(s,freq);
}
int m = L.length;
int n = target.length();
//char sc[] = target.toCharArray();
int start = -1;
for(int i = 0; i < k; i++){
start = i;
int numMatches = 0;
boolean misMatch = false;
Map<String, Integer> wordFreq = (new HashMap<String,Integer>(dictionary));
while((numMatches < m) && ((start+k) < n)){
String ss = target.substring(start, start+k);
misMatch = !wordFreq.containsKey(ss);
if(!misMatch){
// decrement frequency
int freq = wordFreq.get(ss);
if(freq > 1){
wordFreq.put(ss, freq-1);
} else {
wordFreq.remove(ss);
}
numMatches++;
} else {
if(numMatches > 0){
wordFreq = (new HashMap<String,Integer>(dictionary));
}
}
//System.out.println("DEBUG " + ss + " " + numMatches);
start=start+k;
}
if(numMatches == m){
start = start - k*m;
break;
}
}
if(start < 0){
System.err.println("No solution exists");
return null;
}
String res = target.substring(start, start+k*m);
return res;
}
public static void main(String[] args){
FindWordCombination wrapper = new FindWordCombination();
String L[] = {"fooo", "barr", "wing", "ding", "wing"};
String s = "lingmindraboofooowingdingbarrwingmonkeypoundcake";
{
String res = wrapper.solve(L, 4, s);
System.out.println("input:\t" + s);
System.out.println("result:\t" + res);
}
}
}
the idea is to create a trie of Trie(or Ternary tree) of L.
Now start with index i = 0 of S.
1. search for 4 letters(or whatever the length of words) at a time in the trie. if you dont find a match, then increment i by 1 and continue searching
2. if u find a match increment i by 4 , and mark the entry as 'M' (match).
3. stop parsing the string when you get MMMM(4 matches ) , return the index of the first M
complexity = O(len(S)*log(len(word)))
Comments/corrections are invited.
I don't think anybody solved this problem easier than this. I am returning the index where I got substring.
public static List<Integer> findSubstring(String s, String[] words) {
Map<String, Integer> map = new HashMap<>();
int left = 0, right = 0;
List<Integer> res = new ArrayList<>();
int wordSize = words[0].length();
for(String str: words)
map.put(str, 1);
int i =0, temp;
while(right < s.length()- wordSize + 1) {
if(map.containsKey(s.substring(right, right + wordSize))){
for(temp = right, i =0; temp + wordSize < s.length() && map.containsKey(s.substring(temp, temp+wordSize)); temp+=wordSize, i++){
// System.out.println("got match");
}
if(i == words.length){
res.add(right);
}
right=temp;
}else
right++;
}
return res;
}
public static List<Integer> findSubstring(String s, String[] words) {
Map<String, Integer> map = new HashMap<>();
int left = 0, right = 0;
List<Integer> res = new ArrayList<>();
int wordSize = words[0].length();
for(String str: words)
map.put(str, 1);
int i =0, temp;
while(right < s.length()- wordSize + 1) {
if(map.containsKey(s.substring(right, right + wordSize))){
for(temp = right, i =0; temp + wordSize < s.length() && map.containsKey(s.substring(temp, temp+wordSize)); temp+=wordSize, i++){
// System.out.println("got match");
}
if(i == words.length){
res.add(right);
}
right=temp;
}else
right++;
}
return res;
}
1. I think we can construct a suffix tree for the input text S. Space and Time required for construction of suffix trees is linear.
2. Once the suffix tree is constructed, we start with each word in L and check if all words are a part of a prefix of a suffix, or its a suffix entirely in the suffix tree for S.
as per i think just create the all possible permutation of the L and save it in any extra place.now one by one search the every permutation in the string source S as u find stop searching and u will get the locatin if u are using the kmp the trhe searching time will be O(len) where len is the lenghth of the over all words length .now the totac complexity in worst case will be O(k!*len)
where k is the total number of words in L.
For finding position
import java.util.*;
import java.io.*;
import java.lang.*;
class stringproject {
static int p=0;
public static void main(String[] args)throws IOException
{
String s,f,t;
int z=0;
DataInputStream x=new DataInputStream(System.in);
String[] a={"fooo", "barr", "wing", "ding", "wing"};
String m=Arrays.toString(a);
char[] k=m.toCharArray();
java.util.Arrays.sort(k);
t=process(k);
System.out.println("Enter the string");
s=x.readLine();
try
{
for(int i=0;i<=s.length()-1;i++)
{
f=s.substring(i,i+t.length());
char[] l=f.toCharArray();
java.util.Arrays.sort(l);
p++;
if(process(l).equalsIgnoreCase(t))
{
z=p;
}
}
}catch(StringIndexOutOfBoundsException e){}
if(z>0)
{
System.out.println("The given string is present at position"+(z));
}
else
{
System.out.println("The given string is not present");
}
}
public static String process(char[] k)
{
StringBuffer n=new StringBuffer();
for(int j=0;j<=k.length-1;j++)
{
if(Character.isLetter(k[j]))
{
n.append(k[j]);
}
}
return n.toString();
}
}
The implementation of solving the problem is a little complex.My idea is
1. according to L, permutation and concat all the strings, e.g.
L: "fooo", "barr", "wing" => L: A="fooo", B="barr", C="wing"
generate: (if the number of strings in L is m, words length is l, the time in this step is O(m!)
ABC = "fooobarrwing"
ACB = "fooowingbarr"
......
then generate "next array" for every string (which will be used to KMP find)
the time is O(m*l)
2. use KMP algo the search every string above in S. (the length of S is n)
kmp time complex is O(m*l + n)
so the whole time complex is O(m! + m*l + n)
This problem is easier than it appears. We can solve it in O(word length * string length) time.
- fentoyal January 13, 2012I just give a hint here, because it shows it is from topcoder.
Since all the words have the same length, we can conduct the search for length-of-word passes, with each pass offsetting 1 more character.
e.g. Given:
lingmindraboofooowingdingbarrwingmonkeypoundcake
we do it in 4 passes, spaces shows how we treat the strings.
1: ling mind rabo ofoo ..
2: ingm indr aboo fooo
3: ngmi ndra ..
4: gmin drab ...
You will find it is easy to handle 1 pass (O(string length) time), The remaining is trivial too.