Facebook Interview Question
Software Engineer / DevelopersCountry: United States
Interview Type: In-Person
If I understood your approach well, we have to keep the indices in tree nodes, right? So in this case the size of the suffix tree would be O(N*K*K)?
and to avoid duplicate report, we need to use binary tree or hash or set. right?
I've implemented your approach, if you have any optimization on it, please let me know. (My code is in next comment)
Also I have no idea how to implement the construction part in O(NK), mine is O(NK^2). (For this specific problem, we can not use the edge compressing idea to reduce the building time)
Here's my implementation with suffix tree.
N= # of numbers(strings)
K= size of numbers (which in this problem is 10)
Time complexity for building the tree is O(N*K*K), Does anyone have any idea how to reduce it?
Also, to avoid repetitive answers, I used set to store the index of each node.any better idea ?
#include <bits/stdc++.h>
using namespace std;
#define null NULL
struct Node
{
set<int> indices;
Node* child[10];
}*root;
Node* add(Node* root,string s,int cur,int index)
{
if (root==null){
root = new Node();
}
root->indices.insert(index);
if (cur != s.size())
root->child[s[cur]-'0'] = add(root->child[s[cur]-'0'],s,cur+1,index);
return root;
}
void createSuffixTree(vector<string> numbers)
{
int N=numbers.size();
for (int i=0;i<N;i++){
for (int j=0;j<numbers[i].size();j++)
//I know that it will be better if I avoid using
//substr here (or at least preprocess every substrings)
//but please try to optimize other parts of the code
root = add(root,numbers[i].substr(j,numbers[i].size()),0,i);
}
}
set<int> count(Node* root,string s,int cur)
{
if (root==null) return set<int>();
if (cur == (int)s.size())
return root->indices;
else
return count(root->child[s[cur]-'0'],s,cur+1);
}
int main()
{
int N;
cin >> N;
vector<string> numbers(N);
for (int i=0;i<N;i++)
cin >> numbers[i];
createSuffixTree(numbers);
string t;
while (cin >> t){
set<int> ans=count(root,t,0);
for (auto x:ans)
cout << x << " " ;
cout << endl;
}
return 0;
}
This is a suffix trie, as suffix tree will do the edge compression which is missing here.
There is an easier way: as all we have are numbers, you can see it as a string database that was already hashed for you. All you gotta do is search it as you'd do in a hash-based query (sort of):
long long pot10(long long x)
{
long long ret = 1;
while(x > 0)
{
ret *= 10;
x /= 10;
}
return ret;
}
long long database[NUM_ENTRIES];
void search(long long queryString)
{
long long p10 = pot10(queryString);
long long aux;
cout << "Querying for " << queryString << " in database " << endl;
for(int i = 0; i < NUM_ENTRIES; ++i)
{
aux = database[i];
for(int j = 0; j < 10; ++j)
{
if(aux % p10 == queryString)
{
cout << "Match id: " << i << " at index " << j << endl;
}
aux /= 10;
}
}
cout << "End query" << endl;
}
Here is the solution in C++.
Idea is to produce the map of partial strings in advance. It takes O(MN^2), where M is number of number strings and N is length of number strings.
Once it is done, searching is O(logn) when map is used.
#include<set>
#include<string>
#include<map>
#include<cstdlib>
#include<iostream>
using namespace std;
class NumberSearch {
public:
NumberSearch(string* input, int length)
{
for (int i = 0; i < length; i++)
{
add_partial(input[i]);
}
}
void add(string newnum)
{
add_partial(newnum);
}
set<string> search(string num)
{
set<string> result;
map<int, set<string> >::iterator found;
if ((found = hash.find(atoi(num.c_str())))!= hash.end())
return found->second;
return result;
}
private:
map<int, set<string> > hash;
void add_partial(string num)
{
for (int tlen = 1; tlen <= num.length(); tlen++)
{
string token ="";
for (int i = 0; i < num.length(); i++)
{
if(num[i] == ' ')
continue;
token.push_back(num[i]);
if (token.length() == tlen)
{
int key = atoi(token.c_str());
if (hash.find(key)== hash.end())
{
set<string> l;
hash[key] = l;
}
if (hash[key].find(num)== hash[key].end())
hash[key].insert(num);
token.erase(0, 1);
}
}
}
}
};
This actually is solved using a regular Trie data structure with R = 10, with alphabet (0, 1, ...9).
public class TrieST<Value> where Value : class
{
//Radix size
private const int R = 256;
private Node root;
class Node
{
public Value Val;
public Node[] Next = new Node[R];
}
public void Put(string key, Value val)
{
root = Put(root, key, val, 0);
}
private Node Put(Node x, string key, Value val, int d)
{
if (x == null) x = new Node();
if (key.Length == d) { x.Val = val; return x; }
char ch = key[d];
x.Next[ch] = Put(x.Next[ch], key, val, d + 1);
return x;
}
public IEnumerable<string> KeysThatMatch(string pat)
{
Queue<string> q = new Queue<string>();
Collect(root, "", pat, q);
return q;
}
private void Collect(Node x, string pre, string pat, Queue<string> q)
{
if (x == null) return;
int d = pre.Length;
if(d == pat.Length)
{
if (x.Val != null) q.Enqueue(pre);
return;
}
char ch = pat[d];
for (char c = (char)0; c < R; c++)
{
if (ch == '.' || ch == c)
Collect(x.Next[c], pre + c, pat, q);
}
}
}
this is a very odd question.
Although it's algorithmically logical to implement it via suffix tree, none of them beat a simple search:
for number in numbers:
if number.find( subnum ) != -1:
print number;
it's always faster.
On a very large data set my program runs out of memory whilst building the tree due to all that memory overhead of new-ing Node objects.
I guess trees should eventually win if the volume grows, but as far as I can see simple iterating is far more practical
this is a very odd question.
Although it's algorithmically logical to implement it via suffix tree, none of them beat a simple search:
for number in numbers:
if number.find( subnum ) != -1:
print number;
it's always faster.
On a very large data set my program runs out of memory whilst building the tree due to all that memory overhead of new-ing Node objects.
I guess trees should eventually win if the volume grows, but as far as I can see simple iterating is far more practical
private static boolean contains(long input, String search) {
int size = search.length();
long s = Long.parseLong(search);
long diff;
int shift = 0;
boolean match;
while (s <= input) {
diff = input - s;
match = true;
for (int i = shift; i < shift + size; i++) {
if (diff % Math.pow(10, i) != diff % Math.pow(10, i + 1)) {
match = false;
break;
}
}
if (match) {
return true;
}
s = s * 10;
shift++;
}
return false;
}
import java.io.*;
import java.util.*;
class Solution {
public static void main(String[] args) throws Exception {
ArrayList<Integer[]> arr = new ArrayList<Integer[]>();
Integer[] a = {1,2,3,4,5,6,7,8,9,0};
Integer[] b = {4,1,2,4,1,2,3,1,2,3};
Integer[] c = {3,1,2,3,1,2,3,3,2,2};
arr.add(a);
arr.add(b);
arr.add(c);
Integer[] f = {4,1}; //{1,2,3};
ArrayList<Integer[]> answer = find(arr, f);
print(answer);
}
// assume the given numbers are all of length 10
private static ArrayList<Integer[]> find(ArrayList<Integer[]> given, Integer[] num) {
if(given == null || given.isEmpty() || num == null )
return null;
ArrayList<Integer[]> answer = new ArrayList<Integer[]>();
for(Integer[] iA : given) {
for(int i=0; i<10; i++) {
int x = i;
int y = 0;
while(y<num.length && iA[x] == num[y]) {
x++;
y++;
}
if(y==num.length) {
answer.add(iA);
i = i+num.length;
break;
}
}
}
return answer;
}
private static void print(ArrayList<Integer[]> answer) {
if(answer == null || answer.isEmpty())
return;
for(Integer[] iA: answer) {
System.out.println(Arrays.asList(iA));
}
}
}
import java.io.*;
import java.util.*;
class Solution {
public static void main(String[] args) throws Exception {
ArrayList<Integer[]> arr = new ArrayList<Integer[]>();
Integer[] a = {1,2,3,4,5,6,7,8,9,0};
Integer[] b = {4,1,2,4,1,2,3,1,2,3};
Integer[] c = {3,1,2,3,1,2,3,3,2,2};
arr.add(a);
arr.add(b);
arr.add(c);
Integer[] f = {4,1}; //{1,2,3};
ArrayList<Integer[]> answer = find(arr, f);
print(answer);
}
// assume the given numbers are all of length 10
private static ArrayList<Integer[]> find(ArrayList<Integer[]> given, Integer[] num) {
if(given == null || given.isEmpty() || num == null )
return null;
ArrayList<Integer[]> answer = new ArrayList<Integer[]>();
for(Integer[] iA : given) {
for(int i=0; i<10; i++) {
int x = i;
int y = 0;
while(y<num.length && iA[x] == num[y]) {
x++;
y++;
}
if(y==num.length) {
answer.add(iA);
i = i+num.length;
break;
}
}
}
return answer;
}
private static void print(ArrayList<Integer[]> answer) {
if(answer == null || answer.isEmpty())
return;
for(Integer[] iA: answer) {
System.out.println(Arrays.asList(iA));
}
}
}
public class IntegerSearch {
public static void main(String[] args) {
long[] a = {1354255942L, 2482224421L, 8689451784L, 2255478996L, 5418255689L, 8754962548L};
for (int i = 0; i < a.length; i++) {
if (contains(a[i], "22244")) {
System.out.println(a[i]);
}
}
}
private static boolean contains(long input, String search) {
int size = search.length();
long s = Long.parseLong(search);
long diff;
int shift = 0;
boolean match;
while (s <= input) {
diff = input - s;
match = true;
for (int i = shift; i < shift + size; i++) {
if (diff % Math.pow(10, i) != diff % Math.pow(10, i + 1)) {
match = false;
break;
}
}
if (match) {
return true;
}
s = s * 10;
shift++;
}
return false;
}
}
There are two clues in the question.
1. its a 10 digit number
2. they are numbers
With these we can design a list of 10 bitsets where
- 'i'th bit of 0th bitset will be 1 if the 'i'th row contains zero
- 'i'th bit of 1st bitset will be 1 if the 'i'th row contains one
and so on.
when you get a number to search get the corresponding bitset for each digit and do an AND of collected sets. Now wherever you have one, that rows are the results.
Space: 10*n bits where n is the number of rows
Time for building: O(1) for finding the right bitset for each digit and O(1) for marking the row in the set. hence 10*n (~n)
Time for searching: O(1) for finding the right bitset for each digit and iterate over the bitset to find the final rows (i.e O(n))
This can also be done like this.
Let the no in row = k
and total row = n
1. Sort all numbers in all row ( time: nkLog(k) space: O(n) as we are changing the file we need extra space. )
2. Sort all row: ( time: nlog(n) space: contant if the file is too large we may have to use merge and it may be n)
3. Binary search for all number divisible by 1230000000 and return 1 ( Time: nlog(n) space: constant)
4. Repeat 3 for 123000000 ( for any no containing 0) ( time: nlog(n) space constant)
So total time will be: nklog(k) + nlog(n) + nlog(n)
asuming k is very small time will be aprox NLogN
Sapace will be N
This can also be done like this.
Let the no in row = k
and total row = n
1. Sort all numbers in all row ( time: nkLog(k) space: O(n) as we are changing the file we need extra space. )
2. Sort all row: ( time: nlog(n) space: contant if the file is too large we may have to use merge and it may be n)
3. Binary search for all number divisible by 1230000000 and return 1 ( Time: nlog(n) space: constant)
4. Repeat 3 for 123000000 ( for any no containing 0) ( time: nlog(n) space constant)
So total time will be: nklog(k) + nlog(n) + nlog(n)
asuming k is very small time will be aprox NLogN
Sapace will be N
I am beginner in DS. This is a very simple code i wrote in java using LinkedList. I assume the numbers in the text to be separated by commas and i am returning the numbers in string type.
Can somebody please tell me if this is an appropriate method to solve this problem. Also, I am not able to figure out the time complexity of my solution.
import java.io.File;
import java.io.FileNotFoundException;
import java.util.LinkedList;
import java.util.Scanner;
public class FindIfNumberPresentFB {
public static void main(String[] args) throws FileNotFoundException {
// TODO Auto-generated method stub
LinkedList<String> link=new LinkedList<String>();
Scanner sc=new Scanner(new File("E:/Java/Workspace/text.txt"));
String in="";
while(sc.hasNext()){
in=in+sc.next();
}
System.out.println(in);
String ll="";
for(int i=0;i<in.length();i++){
if(in.charAt(i)==','){
link.add(ll);
ll="";
}
else{
ll+=in.charAt(i);
}
}
//System.out.println("Linkedlist is "+link);
String n="123";
int test=Integer.parseInt(n);
char f=n.charAt(0);
//System.out.println("f is "+f);
int size=n.length();
//System.out.println("size is "+size);
while(!link.isEmpty()){
String app="";
String z=link.removeFirst();
//System.out.println("z is "+z);
for(int i=0;i<10;i++){
//System.out.println("chars are "+z.charAt(i));
//System.out.println("f is "+f);
if(z.charAt(i)==f){
for(int j=i;j<i+size;j++){
// System.out.println("j is "+j);
app+=z.charAt(j);
}
//System.out.println("app is "+app);
int sample=Integer.parseInt(app);
//System.out.println("sample is "+sample);
//System.out.println("test is "+test);
if(sample==test){
//System.out.println("In if");
//System.out.println("z is "+z);
//System.out.println(z.getClass().getName());
//System.out.println("Z in int is "+Integer.parseInt(z));
//System.out.println(z.getClass().getName());
System.out.println("Number containing is "+z);
}
}
}
}
}
}
boolean countPatterns(String s, String p){
int count = 0;
for (int i = 0; i < s.length() - (p.length() - 1); i++) {
if (s.charAt(i) == p.charAt(0)) {
if (s.substring(i, p.length() + i).equals(p)) {
return true;
}
}
}
return false;
}
void countInText(String[] s, String p) {
for(int i = 0; i < s.length; i++) {
if (countPatterns(s[i], p)) {
System.out.println(s[i]);
}
}
}
Assume
- SK July 15, 2015k = # of digits per number
n = # of numbers in file
Suffix Tree for the numbers. Assume O(k) suffix tree construction for each number. Then, we would perform this n time for a suffix tree construction of O(nk). Now, we can simply look up the number in the suffix tree in O(k) time.