Amazon Interview Question
Developer Program EngineersCountry: United States
Actually, thinking about this a bit more, a suffix tree would larger than O(N) space. However, we don't need a full suffix tree to solve this problem. A suffix tree with only the full word branch is necessary so its probably still doable in O(N) space, I'm just not sure how to construct such a suffix tree.
@Some guy
Nice solution! A few ideas that I would like to contribute..
Build a 2 suffix trees T1 and T2.
1. Build T1 for substring(n/3, 2n/3) and T2 for substring(2n/3, n).
2. Look for prefixes of substring(1,n/3) in both the T1 and T2.
3. The longest such prefix of substring(1,n/3) that is present both in T1 and T2 is a border with maximum length and having an occurrence count >=3
@Some guy & Ayahuasca
Nice! However let me challenge your idea of using tries :)
How can you efficiently and correctly solve the "non-overlap" criteria? <Some guy>'s solution does not address the non-overlap criteria between the head string and the middle string (if I understand correctly, at least not in a very efficient way), while <Ayahuasca>'s solution is, pardon me if I am wrong, not correct: the desired string may appear twice in the range (1,n/3) and once in (2n/3,n), right?
#include<stdio.h>
#include<string.h>
int main()
{
char str[]="ababab";
int n,i,j,l,k,count,max=0;
l=strlen(str);
for(i=0;i<(l/2);i++)
{ j=l-1-i;
k=0;
count=0;
while(k<=i&&j<l)
{
if(str[k]==str[j])
count++;
k++;
j++;
}
if(count==(k))
{
if(isasubstring(str,k,l-(2*(k))))
if(max<count)
max=count;
}
}
printf("length of longest border is : %d",max);
return 0;
}
isasubstring(char *a,int s,int n)
{
int i,j;
char *temp;
char *pattern=malloc(sizeof(char)*(s+1));
char *input =malloc(sizeof(char)*(n+1));
memcpy(pattern,a,s);
pattern[s]='\0';
j=0;
for(i=s;i<=s+n-1;i++)
{
input[j]=a[i];
j++;
}
input[j]='\0';
//printf("%s--- %s\n",input,pattern);
temp=strstr(input,pattern);
if(temp)
return 1;
else
return 0;
}
Step 1 : use stack to identify the sample border cases -- Space O(N/2) Time O(N)
Step 2 : if a border exist perform KMP for the identified border and check the stating position .if there are three disctinct starting position print the border
- Space O(max(border.length)) - time O(N);
border identifying can also be done with a reverse string function performing validations from both ends
take two pointers one from start of string and other from end, match the charters, if matched then pass the strings to kmp algorithm where input string will be from index 0 to length -1, if kmp return true then again pass it to kmp where input string will be end point of previous match . and count the matches,
if match occurs three times then this will be the one such string,
then again take more characters and try it. if match found for this then replace it with previous one. when matched failed then last string will be longest string for borders.
why not:
again pass it to KMP and
again pass it to KMP and
again pass it to KMP and
...???
I am using modified version of KMP algorithm as follows:
As the border should have 3 non-overlapping occurrences, the maximum length of this border can be len /3.
1) I am computing LPS array for the largest suffix possible.
The last index of suffix_lps array will tell me the longest suffix border found in the given string.
2) if longest suffix border is 0, then return 0 as there is no suffix that matches prefix.
Else compute second_lps array for the 2nd occurence. The 2nd occurence can happen anywhere from index 1 to index len-2.
3) Then I will look in the second_lps to see if there is any border of length same as suffix border and also that there is no over lap in the index of 2nd border and suffix border.
Here is the code:
int len = instr.length();
int suf_start_idx = (len%2) ? 2*len/3 +1 : 2*len/3;
string suffix = instr.substr(suf_start_idx);
int max_suf_len = len - suf_start_idx;
int suffix_lps[max_suf_len];
int second_lps[len - 1];
string prefix = instr.substr(0, len/3);
computeLPSarray(suffix, max_suf_len, prefix, suffix_lps);
if (suffix_lps[max_suf_len-1] == 0)
ret_val = 0;
else
{
computeLPSarray(instr.substr(1), len-1, prefix, second_lps);
int idx_suf_start_match = len - suffix_lps[max_suf_len-1];
for(int i = 0; i < len-1; i++)
if ((second_lps[i] == suffix_lps[max_suf_len-1]) && (i < idx_suf_start_match))
{
ret_val = second_lps[i];
break;
}
}
cout << ret_val << endl;
suf_start_idx = (len%2) ? 2*len/3 +1 : 2*len/3;
string suffix = instr.substr(suf_start_idx);
pattern = instr.substr(0, len/3);
computeLPSarray(suffix, max_suf_len, pattern, suffix_lps);
Tested following code on linux machine
#include <iostream>
#include <string.h>
using namespace std;
int getBorder(const char* sInput)
{
int nLen = strlen(sInput);
int iFHIdx = nLen/2;
int iStartIdx = -1;
int iEndIdx = nLen-1;
bool flag = false;
while(iFHIdx >= 0)
{
// No match, start looking after iStartIdx - 1;
if( sInput[iEndIdx] != sInput[iFHIdx])
{
if(flag == true)
iFHIdx = iStartIdx - 1;
else
{
iStartIdx = -1;
}
iFHIdx--;
iEndIdx = nLen-1;
flag = false;
}
if(sInput[iEndIdx] == sInput[iFHIdx])
{
if( flag == false)
iStartIdx = iFHIdx;
iFHIdx--;
iEndIdx--;
flag = true;
}
}
char* sBorder = NULL;
if(flag)
{
sBorder = new char[iStartIdx+2];
cout << "Input string:" << sInput << " Border:" << iStartIdx+1 << endl;
memcpy(sBorder, sInput, iStartIdx+1);
cout << "Border: " << sBorder << endl;
}
else
{
cout << "Input string:" << sInput << " Border:" << 0 << endl;
return 0;
}
char* sSubStrStart = strstr(sInput + iStartIdx+1, sBorder);
cout << "String: " << sSubStrStart << endl;
if(sSubStrStart != NULL
&& strlen(sSubStrStart) != iStartIdx+1 )
{
cout << "Sub string found" << endl;
return iStartIdx+1;
}
}
int main()
{
//getBorder("baxxxxxa");
//getBorder("baxxxxxb");
//getBorder("baxxxxba");
getBorder("abcabcabxxxabcabxxxxxxpqrsabcab");
}
Hi,
this is my solution that works for all my tests but not for Codility. Can you please give me an example for which this code is wrong?
public static int solution(String S) {
// The string must be at least 3 chars
if (S == null || S.length() < 3) {
return 0;
}
// We transform the String into an array of char
final char[] input = S.toCharArray();
final int maxCandidateLength = input.length / 3;
// We try to search the first char into all the array
int[] candidateIndexes = new int[input.length];
int candidateNumber = 0;
int lastIndex = 0;
for (int i = 0; i < input.length ; i++ ){
if (input[i] == input[0]) {
candidateIndexes[candidateNumber++] = i;
lastIndex = i;
}
}
// If the number of items is less that 3 we return 0
if (candidateNumber < 3) {
return 0;
}
int maxLength = 0;
// Here we have at least 3 items with 1 char so we set as candidate the 1
if (lastIndex == input.length - 1){
maxLength = 1;
}
// We have to test the others
int offset = 1;
int[] tmpIndexes = new int[candidateNumber];
int tmpNumber = 0;
while (offset <= maxCandidateLength) {
char tocheck = input[offset];
tmpIndexes[tmpNumber++] = 0;
for (int j = offset ; j < candidateNumber; j++) {
int newIndex = candidateIndexes[j] + offset;
if (newIndex >= input.length) {
// Here we have reached the end so we have found a candidate
if (offset > maxLength){
maxLength = offset;
}
continue;
}
if (input[newIndex] == tocheck) {
tmpIndexes[tmpNumber++] = candidateIndexes[j];
}
}
if (tmpNumber < 3) {
// In this case we have finished because we don't have 3 occurrences
break;
}
candidateNumber = tmpNumber;
candidateIndexes = tmpIndexes;
tmpNumber = 0;
offset++;
}
return maxLength;
}
public class ThreeString {
public int solve(String S) {
//modified KMP
int[] pi = new int[S.length()];
int matched = -1;
pi[0] = -1;
for (int i = 1; i < S.length(); i++) {
while (matched != -1 && S.charAt(i) != S.charAt(matched + 1)) {
matched = pi[matched];
}
if (S.charAt(i) == S.charAt(matched + 1)) {
matched++;
}
pi[i] = matched;
}
int possible = pi[S.length() - 1];
while (possible != -1 && possible + 1 > S.length() / 3) {
possible = pi[possible];
}
if (possible == -1) {
return 0;
}
pi = Arrays.copyOf(pi, possible + 1);
// 012345678
// aaaaaaaaa
// 012
matched = -1;
int max = 0;
for (int i = 0; i < S.length(); i++) {
while (matched != -1 && S.charAt(i) != S.charAt(matched + 1)) {
matched = pi[matched];
}
if (S.charAt(i) == S.charAt(matched + 1)) {
matched++;
}
max = Math.max(
max,
Math.min(Math.min(i - matched, matched + 1), S.length() - 1
- i));
if (matched == pi.length - 1) {
matched = pi[matched];
max = Math.max(
max,
Math.min(Math.min(i - matched, matched + 1), S.length()
- 1 - i));
}
}
return max;
}
public static void main(String[] args) {
ThreeString ts = new ThreeString();
String[] S = { "123451234512345", "a", "aa", "aaa", "aaaa", "aaaaa",
"aaaaaa","aaaaaaaa","aaaaaaaaa"};
for (int i = 0; i < S.length; i++) {
System.out.println(S[i]+"= "+ts.solve(S[i]));
}
StringBuilder sb=new StringBuilder("");
for(int i=0;i<200;i++){
sb.append("a");
}
for(int i=0;i<300;i++){
sb.append("b");
}
for(int i=0;i<200;i++){
sb.append("a");
}
System.out.println(ts.solve(sb.toString()));
}
}
Not that you would actually be able to code this in time but an algorithm to solve this in linear time is:
- Some guy July 19, 20131) Construct a suffix tree using Ukkonen algorithm
2) Starting at top, take the branch that starts with the first char in the string.
3) Traverse down, look for nodes that have a branch to end of string and another char.
Whenever you have found one of these node, you have found a border.
4) Find the deepest node that is a border that is less than or equal to length of string divided by 3.
5) That will be the answer.