Amazon Interview Question
Software Engineer / DevelopersCountry: India
Has anyone even tried to run this code... Where do I begin...
First Dictionary is an "obsolete" DS as defined by Java, so I would use another kind of map like a Hash.
Next you have to use wrapper classes not the primitive when defining a map i.e. HashMap<Character, Integer> not <char, int>
Also if you want to index into a string you either have to turn it into an array str.toCharArray() or say str.charAt(3)
(an extra note not pertaining to this question, strings are immutable so if you are going to try to modify the string at different indices it is best to make it a char array, if you don't every time you modify a value Java is creating a completely new String EVERY time you change one letter of the string, which turns a simple O(1) operation into O(n).)
Then you are indexing the map (dictionary) as though it was an array charIdx[s[ii]]... maps have functions for access so you would use .get in this instance
Again the hash is accessed like an array, to put a value in a hash you have to use .put(key, value), not charIdx[s[ii]]=ii.
The logic does seem correct but it is so convoluted with so many syntax errors it is difficult to trace. It is better to write your algorithm in words, or psuedo code then having it cluttered with errors. Although your algorithm is correct this solution will definitely not run, so I fixed all these errors:
public static void FindMaxUniqueString(String s){
HashMap<Character, Integer> charIdx =
new HashMap<Character, Integer>();
int maxLen = 0;
int maxStart = 0;
int start = 0;
for (int ii = 0; ii < s.length(); ii++){
//new char is not in current unique substring
if (!charIdx.containsKey(s.charAt(ii)) || charIdx.get(s.charAt(ii))<start){
charIdx.put(s.charAt(ii), ii);
if (maxLen < ii-start+1){
maxLen = ii-start+1;
maxStart = start;
}
}
//dup char exist in current unique substring, abandon chars before
//dup char and update dup char idx
else{
start = charIdx.get(s.charAt(ii)) + 1;
charIdx.put(s.charAt(ii), ii);
}
}
for (int ii = 0; ii < maxLen; ii++){
System.out.print(s.charAt(maxStart + ii));
}
}
//dup char exist in current unique substring, abandon chars before //dup char and update dup char idx
else{
start = charIdx.get(s.charAt(ii)) + 1;
charIdx.put(s.charAt(ii), ii);
}
where the characters before the dup are getting abandoned?
Given a string such as:
asderdfstringydeflep
find the longest sequence/substring with no repeating letters, also known as having all unique chars. If we capitalize all the duplicates in the string above it makes it a little more clear.
a S D E r D F S t r i n g y D E F l E p
longest would be: fstringyde OR stringydef
If you want to check a working solution copy and paste the code from the comment on the first response. I cleaned up the original so that you can just drop it in an IDE and run it.
void findLongest(char str[40])
{
int prev_in=0, curr_len=0, max_len=0,start_in=0,i=0;
int visited[256]={0};
for(i=0; i<256; i++)
visited[i]=-1;
visited[str[0]]=0;
for(i=1;str[i]!='\0';i++)
{
prev_in=visited[str[i]];
cout<<"\n"<<prev_in;
if(prev_in==-1 || i-curr_len>prev_in)
curr_len++;
else
{
if(curr_len>max_len)
{
max_len=curr_len;
start_in=i-curr_len;
}
curr_len= i-prev_in;
}
visited[str[i]]=i;
}
//if last case
if (curr_len > max_len)
{
max_len = curr_len;
start_in=i-curr_len;
}
cout<<"\n\n Longest String length:"<<max_len;
cout<<"\n Longest string: ";
for(i=start_in;i<(start_in+max_len);i++)
cout<<str[i];
}
I think we can save space... algorithm might be o(n) (I didnt see it completely)... but i feel its space inefficient... lets try to modify the same..
algorithm seems to be O(n) but can you please explain the reason for taking visited array of size 256.... i think we can work out by only taking it of size 26 if we assume that only alphabets will be entered ...
or of size 52 if we consider upper and lower case as different
also someone do correct me if you think that algorithm is not O(n)
#include<stdio.h>
struct results
{
unsigned int idx;
unsigned int cnt;
};
char* arr="ab1234567890apqqrstppsxaabcdefgh";
int main()
{
unsigned int idx1,idx2,idx3;
unsigned int count,countb;
struct results data;
idx1=idx2=idx3=0;
count=1;
data.idx=0;
data.cnt=1;
printf("%s\n",arr);
while(arr[idx3++]!='\0')
{
for(;idx2<idx3;)
{
if(arr[idx2++]!=arr[idx3])
{
++count;
}
else
{
if(count>data.cnt)
{
data.cnt=count;
data.idx=idx1;
}
idx1=idx2;
break;
}
}
countb=count-1;
count=(idx2==idx3)?1:(idx3-idx1);
idx2=idx1;
}
if(countb>data.cnt)
{
data.cnt=countb;
data.idx=idx1;
}
printf("index=%u,count=%u\n",data.idx,data.cnt);
return 0;
}
int hash(char ch)
{
static int a[26];
for(int i=0;i<26;i++)
{ a[i]=0;return 1;
}
if(a[ch-97]==0)
{ a[ch-97]=1;
return 1;
}
else
return 0;
}
void longestSubString(char str[])
{
int count=0,tmp_cnt=0;
int start=0,end=0;
for(int i=0;str[i];i++)
{
if(str[i]>64 && str[i]<91)
str[i]+=32;
if(hash(str[i]))
count++;
else
{
if(tmp_cnt<count)
{
tmp_cnt=count;
end=i-1;
}
hash('1'); //for clearing the hash table
start=i;
count=0;
}
}
if(tmp_cnt<count)
{
tmp_cnt=count;
end=i-1;
}
cout<<"Length="<<tmp_cnt<<endl;
for(i=(end-tmp_cnt+1);i<=end;i++)
cout<<str[i];
}
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define base 97
struct bucket {
int present;
int prev_index;
};
struct bucket buck[26];
int check_max_unique(char *a,int num){
int i,j,max_count,count;
max_count = 0;
count = 0;
for(i = 0; i < num; i++){
if(!buck[(int)a[i]-base].present){
count++;
buck[(int)a[i]-base].present = 1;
buck[(int)a[i]-base].prev_index = i;
}
else{
max_count = count > max_count ? count : max_count;
if((i - buck[(int)a[i]-base].prev_index) > count){
count++;
}
else{
count = (i - buck[(int)a[i]-base].prev_index);
}
buck[(int)a[i]-base].prev_index = i;
}
printf("%c %d %d\n",a[i],count,max_count);
}
printf("\n");
return count > max_count ? count : max_count;
}
int main(){
char a[] = "abababababaabsdfababab";
int i;
for(i = 0; i < 26; i++){
buck[i].present = 0;
buck[i].prev_index = -1;
}
printf("%d\n",check_max_unique(a,strlen(a)));
return 0;
}
Worked fine with the limited no. of test case I checked with.....
Do inform if you find a case in which it gives unexpected output.
Also I have just counted the length of the max unique substring....
The string can be printed easily by storing the pointer to the present character - max_count every time max_count gets updated....Sorry for being lazy....
char s[]="avwesdfaeqodlcpzsctpple";
int i,count,temp,number;
int table[26]={0};
i=0;
number=0;
while(s[i]!='\0'){
temp = s[i]-'a';
i++;
if (table[temp]==0){
table[temp]++;
number ++;
}
}
printf("%d ", number);
is it subsequence or substring???
for substring it is very easy, use counting sort...
for subsequence in minimal complexity go with some special algo such as KMP.
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
void main()
{
char s[] = "aaaafghaksdljweojlaskdjhiiuaaaaaaaa";
unsigned char e[26];
int p = 0, max = 0, b =0;
memset(e, 0, 26);
while(p<strlen(s))
{
if(e[s[p]-'a'] == 0)
{
e[s[p]-'a'] ++;
p++;
}
else
{
max = MAX (max,(p-b));
b = p;
memset(e, 0, 26);
}
}
max = MAX (max,(p-b));
printf("s:%s\nmax:%d\n",s,max);
}
You algo is wrong. For you input string max length substring with unique characters is fghaksdljweo , length = 12
Moreover, although you feel that your algo is O(n) but it is actually O(n^2) because you are using strlen(s) in loop, which itself is a O(n) algo. This information is not stored somewhere, it is calculated every time you make a call to strlen.
Thanks Ghost! Have tried to correct per your suggestions.
char s[] = "aaaafghaksdljweojlaskdjhiiuaaaaaaaa";
unsigned char e[26], c ;
int p = 0, max = 0, b =0, n = 0 ;
memset(e, 0, 26);
n = strlen(s);
while(p<n)
{
if(e[s[p]-'a'] == 0)
{
e[s[p]-'a'] ++;
p++;
}
else
{
max = MAX (max,(p-b));
if(1 == (p-b))
b = p;
else
p = ++b;
memset(e, 0, 26);
}
}
max = MAX (max,(p-b));
printf("s:%s\nmax:%d\n",s,max);
}
Output:
s:aaaafghaksdljweojlaskdjhiiuaaaaaaaa
max:12
public static String longestUniqueSubstring(String input) {
Set<Character> uniquSet = new HashSet<Character>();
int finalStartIndex = 0;
int finalEndIndex = 0;
int startIndex = 0;
int endIndex = 0;
int maxLength = Integer.MIN_VALUE;
char[] characters = input.toCharArray();
uniquSet.add(characters[startIndex]);
for (int i = 1; i < characters.length; i++) {
if(!uniquSet.add(characters[i])) {
if(uniquSet.size() > maxLength) {
maxLength = uniquSet.size();
finalStartIndex = startIndex;
finalEndIndex = endIndex;
}
uniquSet.clear();
startIndex = endIndex = i;
uniquSet.add(characters[i]);
}
else {
endIndex = i;
}
}
char[] resultArr = new char[finalEndIndex - finalStartIndex + 1];
for (int i = finalStartIndex; i <= finalEndIndex; i++)
resultArr[i - finalStartIndex] = characters[i];
return new String(resultArr);
}
I ran this solution with the string: asderdfstringydeflep
your solutions output: dfstringy
correct output: stringydef OR fstringyde
Here are the ranges between the duplicate letters
S 1-7
D 2-5-14
E 3-15
R 4-9
F 6-16
At first glace, and what your algorithm does, is look to the largest range being E 3-15, then it checks to see if there are any smaller range limitations within it which would be D 5-14. But by discarding F 6-16 the correct range is throw out because it was not the largest to start with. I think the correct solution would keep track of all the ranges, then do a check between all of the longest ranges to see what their inner range is.
i.e. Once we do the check with E 3-15 and find a shorter range constraint D 5-14, we should then check to see if we have any other ranges that are larger then 5-14 and check those until there are no more ranges larger then what we have found.
The problem with this solution is it only checks the largest range and doesn't take into consideration just because it has the largest range to start, it does not mean it doesn't contain a smaller range that constrains it.
Also if you used the string: asderdfstringydeflepqzwvmkubcj
The longest sequence is at the end, and is NOT contained within any range of repeated letters
correct solution: flepqzwvmkubcj
but your solution still gives: dfstringy
public class StringTest {
static int[] chars = new int[] {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
public static void main(String... args) {
String s = "asderdfstringydeflepqzwvmkubcj";
int start = 0, end = 0, j = 0;
for(int i = 0; i < s.length(); i++) {
int charIndex = s.charAt(i) - 'a';
if(chars[charIndex] != -1) {
int t = chars[charIndex];
if(j <= t) {
j = t + 1;
}
}
chars[charIndex] = i;
if((i - j) > (end - start)) {
start = j;
end = i;
}
}
System.out.println(s.substring(start, end + 1));
}
}
//a C# implementation.
const int MaxChars = 26;
const int MinChar = 'a';
public String FindMaxSubString(String inputStr)
{
if (null == inputStr)
{
return inputStr;
}
int[] chars = new int[MaxChars]; //Temporary helper array
int start = 0, maxStart = 0, maxLen = 0;
int len, charIndex;
for (charIndex = 0; charIndex < MaxChars; charIndex++)
{
//Initialize to a value outside valid array Indexes
chars[charIndex] = -1;
}
for (int current = 0; current < inputStr.Length; current++)
{
charIndex = inputStr[current] - MinChar;
//check for IndexOutOf range
if (chars[charIndex] >= start)
{
len = current - start;
if (maxLen < len)
{
maxLen = len;
maxStart = start;
}
start = chars[charIndex] + 1;
}
chars[charIndex] = current;
}
//check the last streatch of characters in the string
len = inputStr.Length - start;
if (maxLen < len)
{
maxLen = len;
maxStart = start;
}
return inputStr.Substring(maxStart, maxLen);
}
public static void findMaxUniqueString(String s){
HashMap<Character, Integer> charIdx =new HashMap<Character, Integer>();
int maxLen = 0;
int maxStart = 0;
int start = 0;
for (int i = 0; i < s.length(); i++){
if (!charIdx.containsKey(s.charAt(i))){
int lenghtOfUniqueString = i - start +1 ;
if (maxLen < lenghtOfUniqueString){
maxLen = lenghtOfUniqueString;
maxStart = start;
}
}
else{
start = charIdx.get(s.charAt(i)) + 1;
}
charIdx.put(s.charAt(i), i);
}
for (int i = 0; i < maxLen; i++){
System.out.print(s.charAt(maxStart + i));
}
}
}
I executed your program for following input: "asderdfstoingydeflep" and got the incorrect result "derdfstoingy"
Here is O(N^2) algorithm
public static string GetUniqueSubstring(string source)
{
if(string.IsNullOrEmpty(source)) return source;
HashSet<char> characters = new HashSet<char>();
int uStartIndex=0, uEndIndex=0,cStartIndex=0,cEndIndex=-1;
int j=0;
for (int i = 0; i < source.Length; i++)
{
characters.Clear();
j=cStartIndex=cEndIndex=i;
while(j<source.Length && !characters.Contains(source[j]))
{
cEndIndex=j;
characters.Add(source[j]);
j++;
}
if (cEndIndex - cStartIndex > uEndIndex - uStartIndex)
{
uStartIndex = cStartIndex;
uEndIndex = cEndIndex;
}
}
return source.Substring(uStartIndex, uEndIndex + 1 - uStartIndex);
}
int test20()
{
const char szTest[] = "aaaafghaksdxljwedojlzacskdjhiiuaaaaaaaa";
int maxLen = 0;
int lastIndex = -1;
char szTmp[26] = {0x00};
for( size_t i = 0; i < sizeof(szTest); i++ )
{
char chr = szTest[i];
size_t index = chr - 'a';
if( szTmp[index] == 0x00 )
{
szTmp[index] = chr;
}
else
{
if( maxLen < i - lastIndex && lastIndex > 0 )
{
maxLen = i - lastIndex;
}
lastIndex = i;
::memset( szTmp, 0x00, sizeof(szTmp) );
szTmp[index] = chr;
}
}
cout<<"max length is " << maxLen <<endl;
return(0);
}
public class MaxUniqueString {
public static void main(String[] args) {
String str = "asderdfstringydeflepqzwvmkubcj";
String ans = "";
String temp = "";
for(int i=0; i<str.length(); i++) {
String curr = str.charAt(i) + "";
if(temp.contains(curr)) {
temp = temp.substring(temp.indexOf(curr)+1) + curr;
} else {
temp = temp + curr;
}
if(temp.length() > ans.length()) {
ans = temp;
}
}
System.out.println(ans);
}
}
The above code gives right output: flepqzwvmkubcj
I have tried it with different inputs, and able to see right output getting generated.
public static String maxSubstring(String s) {
Map<Character, Integer> map = new HashMap<Character, Integer>();
int maxLen, maxPointer, len, pointer;
maxLen = maxPointer = len = pointer = 0;
for (int i = 0; i < s.length(); i++) {
if (map.containsKey(s.charAt(i))) {
int position = map.get(s.charAt(i));
if (position >= pointer) {
pointer = position + 1;
}
}
map.put(s.charAt(i), i);
if (i + 1 - pointer > maxLen) {
maxLen = i + 1 - pointer;
maxPointer = pointer;
}
}
return s.substring(maxPointer, maxLen + maxPointer);
}
O(n) (even though it has a nested loop, it only traverses each element thrice in the worst case. 3n = O(n).
#include <iostream>
#include <algorithm>
#include <string>
using namespace std;
inline int key(char& ch) {
return ch - 97;
}
string longest_unique_substr(string str) {
int arr[26];
fill_n(arr, 26, 0);
int substr_start = 0, substr_len = 0, highest_start = 0, highest_len = 0;
for (size_t i = 0; i < str.length(); i++) {
int k = key(str[i]);
if (arr[k] != 0) {
if (substr_len > highest_len) {
highest_start = substr_start;
highest_len = substr_len;
}
int pos = arr[k];
int ign_offset = substr_start + pos;
for(size_t j = substr_start; j < substr_start + substr_len; j++) {
if (j < ign_offset)
arr[key(str[j])] = 0;
else
arr[key(str[j])] -= pos;
}
substr_len -= pos;
substr_start += pos;
}
arr[k] = ++substr_len;
}
if (substr_len > highest_len) {
highest_start = substr_start;
highest_len = substr_len;
}
return str.substr(highest_start, highest_len);
}
int main(int argc, char** argv) {
string str = "abcedefhdikhj";
transform(str.begin(), str.end(), str.begin(), ::tolower);
string substr = longest_unique_substr(str);
cout << substr << endl;
return 0;
}
Tested work
public class LongestUniq {
public static String longestUniqSub (String s ){
int end = 0;
int longestLength = 0;
int runningLongest = 0;
int [] charTable = new int [256];
for (int i = 0 ; i< s.length () ; i++){
char c = s.charAt(i);
if (charTable [c] == 0){
runningLongest ++;
} else {
charTable = new int [256];
runningLongest = 0;
}
charTable [c] = 1;
end = longestLength > runningLongest ? end : i;
longestLength = longestLength > runningLongest ? longestLength : runningLongest;
}
return s.substring(end +1 -longestLength, end+1);
}
public static void main (String args []){
System.out.println(longestUniqSub ("abcdefabjiasdflkty"));
}
}
#import<stdio.h>
#import<stdlib.h>
#import<string.h>
void maxCommonSequence(char* string, size_t len)
{
int charCount[26] = {0};
char sequenceSoFar[len];
char *currentMaxSequence;
int currentMaxLen = 0;
memset(sequenceSoFar, 0, sizeof(char)*len);
int sequenceCharacterIndex = 0;
int i = -1;
do
{
i = i + 1;
if((string[i] != '\0') && (charCount[string[i]-'a'] == 0))
{
sequenceSoFar[sequenceCharacterIndex++] = string[i];
charCount[string[i]-'a'] +=1;
}
else
{
memset(charCount, 0, (sizeof(int)*26));
sequenceSoFar[sequenceCharacterIndex++] = '\0';
if(strlen(sequenceSoFar) > currentMaxLen)
{
currentMaxLen = strlen(sequenceSoFar);
if(currentMaxSequence == NULL)
{
currentMaxSequence = (char*)malloc(sizeof(char)*(strlen(sequenceSoFar)+1));
}
currentMaxSequence = (char*)memcpy(currentMaxSequence, sequenceSoFar, sizeof(char)*(strlen(sequenceSoFar)+1));
}
sequenceCharacterIndex = 0;
}
}while(string[i] != '\0');
printf("Longest Unique Sequence: %s with lenght: %d\n", currentMaxSequence, currentMaxLen);
}
int main()
{
char str[] = "asderdfstringydeflepqzwvmkubcj";
maxCommonSequence(str,(sizeof(str)/sizeof(char)));
}
#include<iostream>
using namespace std;
#include<stdlib.h>
#include<conio.h>
#include<string.h>
#include<math.h>
void uniquechar(char *str)
{
if(!str)
return ;
else
{
int *ta=new int[256];
for(int i=0;i<256;i++)
ta[i]=0;
//for(int i=0;str[i];i++)
//tstr[str[i]]++;
int max=-1;
int j,k;
int si=0,li=0,count,f=0;
for(int i=0;i<str[i]!='\0';i++)
{
k=f;
j=i;
count=i-f;
while(str[i]!='\0'&&ta[str[i]]==0)
{
ta[str[i]]=1;
count++;
j++;
i++;
}
if(str[i]=='\0'||str[i]!='\0')
{
if(max<=count)
{
max=count;
si=k;li=j-1;
}
if(str[i]=='\0')
break;
}
if(str[i]=='\0')
break;
for(int m=si;m<=li;m++)
{
if(str[m]!=str[i])
ta[str[m]]=0;
else
{f=m+1;
break;
}
}
}
cout<<"\nThe from index "<<si<<" to "<<li<<" of length "<<li-si+1<<" is the max unique character ::\n";
for(int i=si;i<=li;i++)
{
cout<<str[i];
}
}
}
int main()
{
char *str=new char[100];
cin>>str;
uniquechar(str);
getch();
return 0;
}
order O(n) complexity..............
#include<iostream>
using namespace std;
#include<stdlib.h>
#include<conio.h>
#include<string.h>
#include<math.h>
void uniquechar(char *str)
{
if(!str)
return ;
else
{
int *ta=new int[256];
for(int i=0;i<256;i++)
ta[i]=0;
//for(int i=0;str[i];i++)
//tstr[str[i]]++;
int max=-1;
int j,k;
int si=0,li=0,count,f=0;
for(int i=0;i<str[i]!='\0';i++)
{
k=f;
j=i;
count=i-f;
while(str[i]!='\0'&&ta[str[i]]==0)
{
ta[str[i]]=1;
count++;
j++;
i++;
}
if(str[i]=='\0'||str[i]!='\0')
{
if(max<=count)
{
max=count;
si=k;li=j-1;
}
if(str[i]=='\0')
break;
}
if(str[i]=='\0')
break;
for(int m=si;m<=li;m++)
{
if(str[m]!=str[i])
ta[str[m]]=0;
else
{f=m+1;
break;
}
}
}
cout<<"\nThe from index "<<si<<" to "<<li<<" of length "<<li-si+1<<" is the max unique character ::\n";
for(int i=si;i<=li;i++)
{
cout<<str[i];
}
}
}
int main()
{
char *str=new char[100];
cin>>str;
uniquechar(str);
getch();
return 0;
}
order O(n) complexity..............
public static void main(String [] args)
{
int[] a ={2,1,1,2,3,4,2};
String x="abcdabcedfbb";
char y[]= x.toCharArray();
//System.out.println(GetMax(a,0,a.length-1));
int n = 92;
//System.out.println( fib(n));
lengthOfLongestSubstring(y,0,y.length,0);
}
public static void lengthOfLongestSubstring(char[] y,int start,int end,int max) {
boolean[] flag = new boolean[256];
int newmax=0;
//System.out.println("Start :"+start+"end :"+end);
if(start==end-1)
{
System.out.println(max);
}
for(int i=start;i<end;i++)
{
if(!flag[y[i]])
{
flag[y[i]]=true;
newmax++;
}
else
{
if(newmax>max)
{
lengthOfLongestSubstring( y,start+1,end,newmax);
break;
}
else
{
lengthOfLongestSubstring( y,start+1,end,max);
break;
}
}
}
}
void printLongestSubstringOfUniqueChars(const char *s)
{
int i, j, map[256] = {0};
int n = strlen(s), overall_best = 0, cur_best = 0, overall_offset=0, cur_offset = 0;
for (i=0; i<n; i++) {
if (map[s[i]] == 0) {
map[s[i]] = 1;
cur_best++;
if (overall_best < cur_best) {
overall_best = cur_best;
overall_offset = cur_offset;
}
} else {
for (j=cur_offset; j<i; j++) {
map[s[j]] = 0; cur_best--;
if (s[j] == s[i]) {
j++; break;
}
}
cur_offset = j; map[s[i]] = 1; cur_best++;
printf("cur_offset became %d\n", cur_offset);
}
}
printf("ans: ");
for (i=overall_offset; i<overall_offset+overall_best; i++) {
printf("%c", s[i]);
}
printf("\n");
}
- intercosmos May 26, 2012