Interview Question
Country: United States
well, I believe this is a mutation of longest common substring problem.
so given string a, use dp to find the common longest substring of a and itself. but we need to ignore those max values where i==j in dp[i][j]. time complexity still O(length^2)
or surely you can use trie
C# Solution
Complexity: O(n2)
/* Algorithm:
* Compare each character with each succeeding characters.
* If there is a match, compare rest of the characters till they differ using an inner loop.
* If the length of the matched substring is greater than current max length, update max and other related values.
*
* Test Cases:
* abc
* abcab
* aaa
* abcdabcdab
* 123abcabc21
*/
public static void findLongestSubstring(string str){
if (str == null) throw new ArgumentException("String cannot be null");
int max = 0; //Length of longest substring.
int max_start = 0; //starting index of longest substring
int max_end = 0; //ending index of longest substring
int tempLen = 0;
int m, n;
for (int i = 0; i < str.Length - 1; i++){
for (int j = i+1; j < str.Length; j++){
if (str[i] == str[j]){ //if letters match, compare characters further.
for(m = i, n = j; m < str.Length && n < str.Length && str[m] == str[n]; m++, n++)
tempLen++; //increment as you keep counting length.
if (tempLen > max){ //if length of the substring is greater than current max len
max = tempLen; //update max
max_start = i; //update start and end indexes of this string.
max_end = i+max-1;
}
tempLen = 0; //for a fresh start in next iteration
}
}
}
Console.WriteLine("Max Length: {0}", max);
Console.WriteLine("Max Start: {0}", max_start);
Console.WriteLine("Max End: {0}", max_end);
}
Every substring is a prefix of a suffix. So build a suffix tree. Maintain a count variable at every node of the tree to track the number of time the node was parsed when inserting a suffix. Remember to put zero for the first level as we do not want to consider single letters as substring. Once such an suffix tree is created, just do a dfs to find a node with highest count and bingo, you have your substring.
The code for it in javascript is :
var SuffixTree = function (inputString) {
this.str = inputString;
this.root = null;
}
SuffixTree.prototype.buildTree = function () {
this.root = { count : 0 };
for (var i = this.str.length - 1; i >= 0; i--) {
this._insertSuffix(this.root, this.str.substr(i));
}
}
SuffixTree.prototype.getRepeatedLongestSubstring = function () {
var maxSubstr = '', maxCount = 0;
var dfs = function (node, substr) {
for (chr in node) {
if (chr !== "count") {
var cNode = node[ chr ];
if (cNode.count > maxCount) {
maxSubstr = substr + chr;
maxCount = cNode.count;
}
dfs(cNode, substr + chr);
}
}
};
dfs(this.root, '');
return {'substring' : maxSubstr, 'occurrance' : maxCount};
}
SuffixTree.prototype._insertSuffix = function (node, suffix) {
var level = 0;
for (var i = 0; i < suffix.length; i++) {
var chr = suffix[ i ];
if (typeof node[ chr ] === 'undefined' ) {
node[ chr ] = { count : level };
} else {
node[ chr ].count+=level;
}
level |= 1;
node = node[ chr ];
}
}
var banana = new SuffixTree('bananan');
banana.buildTree();
banana.getRepeatedLongestSubstring();
Here's a plain vanilla C implementation using Trie based suffix tree.
#include <stdio.h>
#include <stdlib.h>
typedef struct node {
char key;
int value;
struct node *next, *children;
} node;
node *makeNode(char key, int value) {
node *n = (node *) malloc(sizeof(node));
n->key = key;
n->value = value;
n->next = n->children = NULL;
return n;
}
int insert (node *trie, char *str, int value) {
int ret = 1;
node *curr;
if (trie == NULL)
return 0;
curr = trie;
if (curr->children) {
curr = curr->children;
while (curr->key != *str) {
if (curr->next)
curr = curr->next;
else {
curr->next = makeNode(*str, value);
curr = curr->next;
ret = 0;
}
}
}
else {
curr->children = makeNode(*str, value);
curr = curr->children;
ret = 0;
}
if (*str == '\0')
return 0;
else
return ret + insert(curr, str+1, value);
}
int main () {
int i, n, max=0, index;
char str[] = "sdkljasdasdhjfhsfhsfdsjdjshjdshjdshhdsjdhhjhfhshhjshfjh";
node *trie;
trie = makeNode('\0', 0);
for (i=0; i < sizeof(str)/sizeof(char); i++) {
n = insert(trie, str+i, 0);
if (n > max) {
max = n;
index = i;
}
}
for (i=0; i<max; i++)
printf ("%c", str[index+i]);
printf ("\n");
}
Isn't this a form of subset problem?
- Anonymous February 07, 2013