Amazon Interview Question
Research ScientistsCountry: United States
Interview Type: In-Person
You can also start at the left side, taking the heads of each list and forming an N-wide tuple: (0, 1, 5). Sweep through the tuple values to find the min and max, which will give you the current range, and then choose the array that the min came from and advance it by one position for the next iterations. So, after considering (0,1,5), next consider (3,1,5), then (3,4,5), then (6,4,5). After (6,4,5), there is no way to advance beyond the 4, so you stop, as advancing either of the other letters is only gonna make your range wider.
Suggestion:
Acquire all the arrays for the querry strings from the dictionary into an array(array of arrays - 2D).
Run a n-way sorted merge on the idex values to form an array of Nodes. Use class Node { int index; String value;} //place the values into a new Node every time you add an element to the resultant string.
Once, this is done you may run the 'sliding window' algorithm on the 'sorted array' to find the min length using the indices in the Node class.
Worst case time and space complexity: O(number of occurences of all query strings).
I could use some constructive criticism. :-D
Feel free to point out flaws/improvments. :)
To simplify issue, assuming the dictionary and list are both array of char, just like your example.
bool ShortestSeq(array<char> D, array<char> L)
{
int finalStart = 0;
int finalEnd = 0;
int shortestLength = 0;
int start = -1;
int end = 0;
int j = 0;
for(int i = 0; i < D.length; i++)
{
if(D[i] == L[0])
{
start = -1;
j = 0;
}
if(start == -1 && D[i] = L[j])
{
if(++j >= L.length)
{
finalStart = i;
finalEnd = i;
return true;
}
start = i;
}
else if(start >=0 && D[i] == L[j])
{
if(++j >= L.length)
{
end = i;
int length = end - start;
if(length < shortestLength)
{
finalStart = start;
finalEnd = end;
}
else
{
j = 0;
start = -1; //continue search
}
}
}
}
}
Here's an algorithm I think should work (and in O(N)). It doesn't actually use the information on where characters occur, so there's probably a cleaner way to do it given that info..
What it does is count how often each character occurs, then it goes through the array, subtracting from the count each time it meets a character. If going to the next item would reduce one of the items we're looking for to 0, it stops. It then repeats the procedure from the back, eating its way through the array until one of the item counts we're looking for would become zero. Now we have the minimum span starting from the front.
From the back it could be different, so we repeat the 'eating away' but in the opposite order, eating from the back first and then from the front. Finally you compare the result eating from the front to the one eating from the back and return the smaller one.
Complexity for counting is O(N), each pair of 'eating aways' is O(N), so total of 3*O(N) = O(N).
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class FindShortestSequence {
public static void main(String[] args) {
char [] sequence = {'a', 'b', 'a', 'c', 'd','x','b','a'};
for(char c : sequence)
{
System.out.print(c + " ");
}
System.out.println();
findShortestSeq(sequence, genSet('a','c','x'));
}
public static Set <Character> genSet(char ... chars)
{
Set <Character> s = new HashSet();
for(char c : chars)
{
s.add(c);
}
return s;
}
public static void findShortestSeq(char [] sequence, Set <Character> words)
{
Map <Character, Integer> wordCounts = new HashMap<Character, Integer>();
//Count up how often we have the word total
for(char curWord : sequence)
{
if(wordCounts.containsKey(curWord))
{
wordCounts.put(curWord,wordCounts.get(curWord)+1);
}
else
wordCounts.put(curWord, 1);
}
//Prune from front first, then from back
Map <Character, Integer> wordCountsCopy = new HashMap(wordCounts);
int charAt = 0;
while(charAt < sequence.length)
{
char curChar = sequence[charAt];
System.out.print("At " + charAt + ", " + curChar + " ");
if(words.contains(curChar) && wordCountsCopy.get(curChar) <= 1)
{
System.out.println(" got " + wordCountsCopy.get(curChar) + ", breaking\n");
break;
}
else
System.out.println(" got " + (wordCountsCopy.get(curChar)-1) + " left");
wordCountsCopy.put(curChar, wordCountsCopy.get(curChar)-1);
charAt++;
}
int startCharFromFront = charAt;
charAt = sequence.length-1;
while(charAt >= startCharFromFront)
{
char curChar = sequence[charAt];
System.out.print("At " + charAt + ", " + curChar + " ");
if(words.contains(curChar) && wordCountsCopy.get(curChar) <= 1)
{
System.out.println(" got " + wordCountsCopy.get(curChar) + ", breaking\n");
break;
}
else
System.out.println(" got " + (wordCountsCopy.get(curChar)-1) + " left");
wordCountsCopy.put(curChar, wordCountsCopy.get(curChar)-1);
charAt--;
}
int endCharFromFront = charAt;
//Prune from back first, then from front
wordCountsCopy = new HashMap(wordCounts);
charAt = sequence.length-1;
while(charAt >= 0)
{
char curChar = sequence[charAt];
if(words.contains(curChar) && wordCountsCopy.get(curChar) <= 1)
{
break;
}
wordCountsCopy.put(curChar, wordCountsCopy.get(curChar)-1);
charAt--;
}
int endCharFromBack = charAt;
charAt = 0;
while(charAt < endCharFromBack)
{
char curChar = sequence[charAt];
if(words.contains(curChar) && wordCountsCopy.get(curChar) <= 1)
{
break;
}
wordCountsCopy.put(curChar, wordCountsCopy.get(curChar)-1);
charAt++;
}
int startCharFromBack = charAt;
System.out.println("From front, smallest is from " + startCharFromFront + " to " + endCharFromFront);
System.out.println("From back , smallest is from " + startCharFromBack + " to " + endCharFromBack);
}
}
Linear Python Algorithm.
(At first glance, this might appear to be N*N, but if you look at the inner loops, you can convince yourself that each inner loop can only run a linear number of times, due to the way they advance start and end.)
def doc_range(D, L):
d = {}
n = 0
for c in D:
for i in D[c]:
d[i] = c
n += 1
cnts = {}
for c in L:
cnts[c] = 0
start = 0
end = -1
letters_counted = 0
min_len = n
while True:
while letters_counted < len(L):
end += 1
if end >= n:
break
c = d[end]
if c not in cnts:
continue
if cnts[c] == 0:
letters_counted += 1
cnts[c] += 1
if end >= n:
break
while True:
c = d[start]
if c not in cnts:
start += 1
continue
cnts[c] -= 1
if cnts[c] == 0:
break
start += 1
l = end + 1 - start
if l <= min_len:
min_range = (start, end)
min_len = l
start += 1
letters_counted -= 1
cnts[d[start]] = 0
return min_range
D = {}
D['a'] = [0,2,7,8]
D['b'] = [1,6]
D['c'] = [3,9]
D['d'] = [4]
D['x'] = [5,10]
L = 'acx'
print doc_range(D, L)
from collections import defaultdict
doc = "abacdxba"
D = defaultdict(list)
for i in range(len(doc)):
D[doc[i]].append(i)
L = ['c', 'd', 'a']
start_i = None
target = 0
best = None
i = 0
while i < len(doc):
if doc[i] == L[0]:
start_i = i
target = 1
elif doc[i] == L[target]:
target += 1
if target == len(L):
if best == None:
best = (start_i, i)
elif i - start_i < best[1] - best[0]:
best = (start_i, i)
target = 0
i+= 1
from collections import defaultdict
doc = "abacdxba"
D = defaultdict(list)
for i in range(len(doc)):
D[doc[i]].append(i)
L = 'acx'
start_i = None
target = 0
best = None
i = 0
while i < len(doc):
if doc[i] == L[0]:
start_i = i
target = 1
elif doc[i] == L[target]:
target += 1
if target == len(L):
if best == None:
best = (start_i, i)
elif i - start_i < best[1] - best[0]:
best = (start_i, i)
target = 0
i+= 1
from collections import defaultdict
doc = "abacdxba"
D = defaultdict(list)
for i in range(len(doc)):
D[doc[i]].append(i)
L = 'acx'
start_i = None
target = 0
best = None
i = 0
while i < len(doc):
if doc[i] == L[0]:
start_i = i
target = 1
elif doc[i] == L[target]:
target += 1
if target == len(L):
if best == None:
best = (start_i, i)
elif i - start_i < best[1] - best[0]:
best = (start_i, i)
target = 0
i+= 1
Ok, I think I figured it out. Now I understand why the interviewer gave me the dictionary of word locations.
- brighama March 28, 2013Example:
If the document is “a b c a b x a x” and the list of words L = [“a”, “b”, “x”]. Then the resulting word position arrays will be:
[0 3 6]
[1 4]
[5 7]
Now imagine that I tell you the right-most position of the optimal span is 7. Then the problem becomes trivial: for each of the three position arrays, just select the highest position that is less than or equal to 7 (in this case, 6, 4, and 7.)
So, given that it’s easy to find the shortest span IF we fix the right-most position, we just need to iterate through all possible right-most positions, find the best span for each, and select the smallest span.
If we use a little care, this can be done in O(sum of lengths of position arrays)