Amazon Interview Question
Software DevelopersCountry: United States
Interview Type: Phone Interview
In this solution I have assumed it to be substring instead of subsequence
public int[] findMinimalOverlapSubsequence(String str) {
// using this to find the index of each character
Map<Character, Integer> charIndex = new HashMap<>();
// using this to track repeated character sequence
int[] arr = new int[str.length()];
// marking termination
arr[0] = -1;
charIndex.put(str.charAt(0), 0);
// populating possible sequence break points.
for (int i = 1; i < arr.length; i++) {
char ch = str.charAt(i);
if (charIndex.containsKey(ch)) {
arr[i] = charIndex.get(ch);
} else {
arr[i] = i - 1;
charIndex.put(ch, i);
}
}
int end = arr.length - 1;
Stack<Integer> breakPoints = new Stack<>();
while (end >= 0) {
char ch = str.charAt(end);
int beg = charIndex.getOrDefault(ch, end);
// if the current character is unique
if (beg == end) {
breakPoints.push(1);
end = arr[end];
}
// if the current character have multiple occurrence
else {
int minIndex = findMinIndex(arr, beg + 1, end);
// checking if the current sequence is a part of adjacent sequence overlap
while (minIndex < beg) {
int prevBeg = beg;
beg = minIndex;
minIndex = findMinIndex(arr, beg + 1, prevBeg);
}
breakPoints.push(end - arr[minIndex]);
end = arr[minIndex];
}
}
// populating the result matrix
int[] result = new int[breakPoints.size()];
int resultIdx = 0;
while (!breakPoints.isEmpty()) {
result[resultIdx++] = breakPoints.pop();
}
return result;
}
private int findMinIndex(int[] arr, int beg, int end) {
int min = Integer.MAX_VALUE;
while (beg <= end) {
min = Math.min(min, arr[end]);
end--;
}
return min;
}
public static void main(String[] args) {
for (String s : Arrays.asList("abcbaebadfgdfifklmnml", "abc", "abca")) {
System.out.println(s);
int[] result = new MinimalOverlapSubsequence().findMinimalOverlapSubsequence(s);
for (int i : result) {
System.out.print(i + ", ");
}
System.out.println("\n--------------------------------------------");
}
}
Simpler solution with two passes over the string. Time and space complexity: O(n) where n = len(s).
1. First, mark the start and end of each character.
2. Calculate the locations at which the number of starts <= location equals number of ends <= location.
3. Output their differences.
There's a mistake in the question data. The last string's partition is [8, 7, 1, 5] because 'k' is a loner.
import itertools as it
# Outputs the tuple (start, end) where start contains the indices of the first appearance of each
# character in the string s (similarly end).
def apperance_locations(s):
start, end = {}, {}
for i, x in enumerate(s):
if not x in start:
# x hasn't been encountered before
start[x] = i
# Override x's entry in end with ots latest occurrence.
end[x] = i
# Output the locations only.
return set(start.itervalues()), set(end.itervalues())
# Cumulative sum generator of a generator x.
def cumsum(x):
s = 0
for y in x:
s += y
yield s
# Differences of a numerical sequence generator x. Outputs x[0],x[1]-x[0],x[2]-x[1],... .
def diff(x):
y = next(x)
yield y
for z in x:
yield z - y
y = z
# Outputs the list lengths of minimum string partition.
def partition_lengths(s):
start, end = apperance_locations(s)
return list(diff(it.imap(lambda (i, x): i, it.ifilter(lambda (i, x): x == 0, enumerate(cumsum((i in start)\
- (i in end) for i in xrange(len(s))), 1)))))
if __name__ == "__main__":
print partition_lengths('abc') # [1, 1, 1]
print partition_lengths('abca') # [4]
print partition_lengths('abcbaebadfgdfifklmnml') # [8, 7, 1, 5]
Output:
[1, 1, 1]
[4]
[8, 7, 1, 5]
public class Result
{
public char name;
public int inicial;
public int final;
public int total
{
get {
return final == 0 ? 1 : final - inicial+1;
}
}
public Result (char n, int ini)
{
inicial = ini;
name = n;
final = 0;
}
}
static public int[] GetSubSequences(char[] p)
{
Dictionary<char, Result> sub = new Dictionary<char, Result>();
int count = 0;
for (int i = 0; i < p.Length; i++)
{
if (sub.ContainsKey(p[i]))
{
if (sub[p[i]].final == 0) count++;
sub[p[i]].final = i+1;
}
else
{
sub.Add(p[i], new Result(p[i], i+1));
}
}
int[] result;
if (count == 0)
{
result = new int[sub.Count];
for (int i = 0; i < sub.Count; i++)
result[i] = 1;
}
else
{
result = new int[count];
int index = 0;
foreach (var item in sub)
{
if (item.Value.total > 1)
{
result[index] = item.Value.total;
index++;
}
}
}
return result;
}
# input = ['a','b','c','b','a','e','b','a','d','f','g','d','f','i','f','k','l','m','n','m','l']
# input = ['a','b','c','a']
# input = ['a','s','v']
input = "abcbaebadfgdfifklmnml"
# input = "aba"
input.split()
intvl =dict()
for i,x in enumerate(input):
if x in intvl:
intvl[x].append(i)
else:
intvl[x] = [i]
intvl = [y for x,y in intvl.items()]
intvl.sort(key= lambda x: x[0])
out = []
out.append(intvl[0])
print(intvl)
for x in intvl:
if len(out[-1])<2:
if x not in out:
out.append(x)
elif len(x)<2:
if x[0]<= out[-1][1]:
pass
else:
out.append(x)
elif x[0] < out[-1][1]:
tmp = [min(x[0],out[-1][0]),max(x[-1],out[-1][-1])]
out.pop()
out.append(tmp)
else:
out.append(x)
print(out)
out = [x[1] - x[0]+1 if len(x)>1 else 1 for x in out]
print(out)
# input = ['a','b','c','b','a','e','b','a','d','f','g','d','f','i','f','k','l','m','n','m','l']
# input = ['a','b','c','a']
# input = ['a','s','v']
input = "abcbaebadfgdfifklmnml"
# input = "aba"
input.split()
intvl =dict()
for i,x in enumerate(input):
if x in intvl:
intvl[x].append(i)
else:
intvl[x] = [i]
intvl = [y for x,y in intvl.items()]
intvl.sort(key= lambda x: x[0])
out = []
out.append(intvl[0])
print(intvl)
for x in intvl:
if len(out[-1])<2:
if x not in out:
out.append(x)
elif len(x)<2:
if x[0]<= out[-1][1]:
pass
else:
out.append(x)
elif x[0] < out[-1][1]:
tmp = [min(x[0],out[-1][0]),max(x[-1],out[-1][-1])]
out.pop()
out.append(tmp)
else:
out.append(x)
print(out)
out = [x[1] - x[0]+1 if len(x)>1 else 1 for x in out]
print(out)
- shreyas.srivastava March 22, 2018