Facebook Interview Question
Software EngineersCountry: United States
Interview Type: Phone Interview
class Node(object):
def __init__(self):
self.children = {}
class Trie(object):
def __init__(self):
self.sentinel = Node()
def add_word(self, word):
curr_node = self.sentinel
for c in word:
if c in curr_node.children:
curr_node = curr_node.children[c]
else:
new_node = Node()
curr_node.children[c] = new_node
curr_node = new_node
def _search(self, tokens, i, curr_node):
# Base case: No tokens, default True
if i == len(tokens):
return True
# Wildcard
if tokens[i][-1] == '*':
# Wildcard matches with nothing
if self._search(tokens, i + 1, curr_node):
return True
# '.*' encountered
if tokens[i][0] == '.':
# Return False if we need to match with something
# but there's nothing left in the string.
#
# Otherwise, recurse with the same token
# and the next character in the string.
return (len(curr_node.children) != 0
and any(self._search(tokens, i, child)
for child in curr_node.children.values()))
# 'w*' encountered, where w is any non-. character
else:
# Return False if we need to match with something
# but there's nothing left in the string.
#
# Otherwise, recurse with the same token
# and the next character in the string.
return (tokens[i][0] in curr_node.children
and self._search(tokens, i, curr_node.children[tokens[i][0]]))
# Single character match
else:
if tokens[i] == '.':
# Return False if we need to match with something
# but there's nothing left in the string.
#
# Otherwise, recurse with the next token
# and the next character in the string.
return (len(curr_node.children) != 0
and any(self._search(tokens, i, child)
for child in curr_node.children.values()))
else:
# Return False if we need to match with something
# but there's nothing left in the string.
#
# Otherwise, recurse with the next token
# and the next character in the string.
return (tokens[i] in curr_node.children
and self._search(tokens, i + 1, curr_node.children[tokens[i]]))
def search(self, regex):
# Assuming valid regex, tokenize
tokens = []
n = len(regex)
i = 0
while i < n:
if i == n - 1 or regex[i + 1] != '*':
tokens.append(regex[i])
i += 1
else:
tokens.append(regex[i : i + 2])
i += 2
return self._search(tokens, 0, self.sentinel)
t = Trie()
t.add_word('mad')
t.add_word('bag')
t.add_word('fad')
t.search('.*d')
solution of search can be improved to O(pattern.length * nodes) with DP
I made it a bit more generic to cover the case with the string containing '.' symbol too. Here '.' means any character (exactly one) and '*' means any number of any characters (0 or more)
Here it is in java:
public class TrieForRegexSearch {
Node root;
int nodeIndex;
public TrieForRegexSearch() {
root = new Node(0);
nodeIndex = 1;
}
public void add(String word) {
Node current = root;
for (int i = 0; i < word.length(); i++) {
int c = word.charAt(i) - 'a';
if (current.children[c] == null) {
current.children[c] = new Node(nodeIndex++);
}
current = current.children[c];
}
current.isLeaf = true;
current.word = word;
}
public boolean containsWord(String pattern) {
Boolean[][] cache = new Boolean[pattern.length()][nodeIndex];
return containsWord(pattern, 0, root, cache);
}
private boolean containsWord(String pattern, int i, Node node, Boolean[][] cache) {
if (node == null) return false;
if (i == pattern.length() && node.isLeaf) return true;
if (i == pattern.length()) return false;
if (cache[i][node.index] != null) return cache[i][node.index];
boolean result = false;
int c = pattern.charAt(i);
if (node.isLeaf) {
result = c == '*' && containsWord(pattern, i + 1, node, cache);
} else if (c == '*') {
if (containsWord(pattern, i + 1, node, cache)) result = true;
else {
for (int j = 0; j < 26; j++)
if (containsWord(pattern, i + 1, node.children[j], cache)) {
result = true;
break;
}
}
} else if (c == '.') {
for (int j = 0; j < 26; j++)
if (containsWord(pattern, i + 1, node.children[j], cache)) {
result = true;
break;
}
} else {
result = containsWord(pattern, i + 1, node.children[c - 'a'], cache);
}
cache[i][node.index] = result;
return result;
}
public static class Node {
boolean isLeaf;
int index;
String word;
Node[] children;
public Node(int index) {
this.index = index;
children = new Node[26];
}
}
}
Posting simple trie solution:
+ Replace '*' with all possible valid characters in trie.
#include<iostream>
#include<vector>
#include<cstring>
using namespace std;
const int TOTAL_LENGTH = 26;
class trieNode
{
public:
bool isKey;
vector<trieNode*> children;
trieNode():isKey(false), children( vector<trieNode*>( TOTAL_LENGTH, nullptr))
{}
};
class wordDict
{
private:
trieNode* root;
bool findWordHelper( trieNode* curr, const string& word )
{
if( word.size() == 0 ) return false;
for( size_t i=0; i<word.size(); ++i)
{
// either a regualer character
if( curr && word[i] != '*')
{
curr = curr->children[ word[i]-'a'];
}
else if( curr && word[i] == '*')
{
// Go over all the characters
for( int j=0; j< TOTAL_LENGTH; ++j)
{
if( curr->children[j] != NULL &&
findWordHelper( curr->children[j], word.substr(i)) )
{
return true;
}
}
}
else
{
break;
}
}
return curr && curr->isKey;
}
public:
wordDict(): root( new trieNode() )
{}
void addWord( const string& word )
{
if( word.size() == 0) return;
trieNode* curr = root;
for( size_t i=0; i< word.size(); ++i )
{
if( curr->children[ word[i]-'a'] == NULL)
{
curr->children[ word[i]-'a'] = new trieNode();
}
curr = curr->children[ word[i]-'a'];
}
//after loop set the flag to indicate valid word
curr->isKey = true;
}
// Word could contain a '*' charactrer to map to any charcter
bool findWord( const string& word )
{
return findWordHelper( root, word );
}
};
int main()
{
wordDict dict;
dict.addWord("hello");
dict.addWord("one");
dict.addWord("two");
cout << dict.findWord("one") << endl;
cout << dict.findWord("two") << endl;
cout << dict.findWord("****e") << endl;
return 0;
}
JavaScript implementation. Add method is iterative, O(L) time and search is recursive, exponential time.
- Anonymous November 10, 2017