Google Interview Question
Software DevelopersCountry: United States
Interview Type: In-Person
Preprocess the dictionary into a trie. On every iteration we will have letter z of the input word and set S comprising of pairs (x,r) where x indicates our non-matching character allowance, and r is a trie node. Initially we put (1,R) to S, where R is the trie root node. For every node (x,r) in S and every child node w of r we put (x,w) back to S if w corresponds to z; otherwise we put (x-1,w) provided that x>0. Terminate when an end-of-word child node is encountered meaning we found a match. Complexity: quadratic worst case. This is because on every iteration the increment of the size of S is bounded by constant L (the length of the alphabet), as entries with x==0 can only 'spawn' at most 1 new entry in place of itself and at every point there is only 1 entry with x==1 which can spawn at most L new entries (with x==0) giving us O(n*n*L) = O(n*n).
D = ['apple', 'apple', 'banana', 'orange']
def preprocess_dict():
trie = {}
for word in D:
r = trie
for z in word:
r = r.setdefault(z, {})
r[None] = []
return trie
R = preprocess_dict()
def word_exists(word):
s = [(1,R)]
for z in word:
next_s = []
for (x,w) in s:
if z in w:
next_s += [(x,w[z])]
elif x>0:
next_s += [(x-1,r) for r in w.values()]
s = next_s
return any(None in w.keys() for (x,w) in s)
print (word_exists('applx'))
Another option is to populate the trie with all combinations of the allowed string, with an additional wildcard option (*) in the trie
eg: 'apple' will be added as 6 words - 'apple', '*pple', 'a*ple', 'ap*le', 'app*e', 'appl*'
when searching for an input word, search for exact match first
if not found try with the wildcard option at one position at a time
At each position, there are only two possible options to check the trie match - the exact char or the wildcard char, so search complexity is proportional to 2n operations (where n is the length of the search string)
An alternative O(n*n) solution would be to preprocess the dictionary into a hash map thus: for every dictionary word w of length q add q words to the hashmap which can be created by removing q-th letter from the word w. Example: for the word 'dog' we add 'og', 'dg', 'do' into the hashmap. Then for the input word we do a similar thing: remove i-th letter and see if the resultant word is in the hashmap. It's quadratic because it takes O(n) to hash the word and we need to do it n times.
D = ['apple', 'apple', 'banana', 'orange']
def preprocess_dict():
hmap = set()
for word in D:
for i in range(len(word)):
hmap.add(word[:i] + word[i+1:])
return hmap
H = preprocess_dict()
def word_exists(word):
for i in range(len(word)):
if (word[:i] + word[i+1:]) in H:
return True
return False
print (word_exists('aPple'))
The algorithm is as follows.
1. Construct a prefix trie with the given words.
2. Starting from the empty prefix of the word to the complete word, iteratively do the following. Assume that the letter following the prefix is the typo and should be skipped, and check if any subtries starting from that prefix are contain the suffix of the word. For example, if the input word is "posqfix", then check if in the subtrie starting at "", for all the children of "", if "osqfix" is a valid suffix. Next, assume that the valid prefix is "p", and check if in the subtrie starting at "p", for all the children of "p", if "sqfix" is a valid suffix. And so on until the valid prefix "posqfi", and the suffix is ""
# %pip install english-words
from english_words import english_words_set
from typing import Dict, Set, List, Optional
class TrieNode:
"""
Represents a node in a Prefix Trie.
"""
def __init__(self, letter: str, ends_word: bool):
self.letter: str = letter # The letter corresponding to the node
# If True, then the sequence of letters leading up to this node represent a
# valid word.
self.ends_word: bool = ends_word
self.children: Dict[str, "TrieNode"] = {}
def add_word(self, word: str) -> "TrieNode":
"""
Adds a word to Trie starting at this TrieNode.
"""
ends_word = True if len(word) == 1 else False
first_letter = word[0]
if first_letter not in self.children:
self.children[first_letter] = TrieNode(first_letter, ends_word)
if ends_word:
self.children[first_letter].ends_word = True
return self.children[first_letter]
# Recursively, add the suffix following the first letter.
self.children[first_letter].add_word(word[1:])
return self.children[first_letter]
def add_words(self, words: Set[str]) -> None:
for word in words:
self.add_word(word)
def contains(self, word: str) -> bool:
"""
Returns True if the Trie starting at this TriNode contains the word.
"""
if len(word) == 0:
if self.ends_word:
return True
else:
return False
letter = word.lower()[0]
if letter not in self.children:
return False
if len(word) == 1:
return self.children[letter].ends_word
# word is longer than one letter and the letter is in one of the children
return self.children[letter].contains(word[1:])
def is_prefix(self, word: str) -> bool:
"""
Returns True if the word is a prefix of the words in the Trie starting
at this TrieNode
"""
if len(word) == 0:
return True
letter = word.lower()[0]
if letter not in self.children:
return False
# if len(word) == 1:
# # last letter exists in the Trie. So is a prefix
# return True
# word is longer than one letter and the letter is in one of the children
# Recursively, check if the suffix is a valid prefix of the subtrie
return self.children[letter].is_prefix(word[1:])
def get_prefix(self, word: str) -> List["TrieNode"]:
"""
If the word is a valid prefix in the Trie starting at this TrieNode, then
this function returns the list of TrieNodes corresponding to this word.
Else, it returns list with just self
"""
current_level = self.children
current_node = None
prefix = [self]
if not self.is_prefix(word):
return prefix
for letter in word.lower():
current_node = current_level[letter]
current_level = current_node.children
prefix.append(current_node)
return prefix
# Unused. Not needed.
def get_longest_prefix(self, word: str) -> List["TrieNode"]:
"""
Returns the list of TrieNodes that constitute longest prefix of the word in
the Trie starting at this TrieNode
"""
current_level = self.children
current_node = None
prefix = [self]
for letter in word.lower():
if letter not in current_level:
break
current_node = current_level[letter]
current_level = current_node.children
prefix.append(current_node)
return prefix
def __repr__(self):
return f"[{self.letter}][{self.ends_word}][{set(self.children.keys())}]"
trie = TrieNode('', False)
trie.add_words(english_words_set)
def get_one_letter_off_word(trie: TrieNode, word: str) -> Optional[str]:
if trie.contains(word):
return None
# Iterate backward from the empty prefix to the entire word minus the last letter.
for index in range(len(word)):
index_of_candidate_skip_letter = index
prefix_word = word[:index_of_candidate_skip_letter]
if not trie.is_prefix(prefix_word):
# Not a valid prefix. So no subtrie to search.
continue
index_of_suffix_start = index_of_candidate_skip_letter + 1
suffix_word = word[index_of_suffix_start:]
current_trie_node = trie.get_prefix(prefix_word)[-1]
# print(f"Current trie node is {current_trie_node} at prefix {prefix_word}")
for letter, child_node in current_trie_node.children.items():
if word[index_of_candidate_skip_letter] == letter:
# print(f"Ignoring child node {child_node}")
# Dont consider subtries that match this letter.
# are are looking for one letter off.
continue
if child_node.contains(suffix_word):
# print(f"Found suffix {suffix_word} at {child_node}")
return f"{prefix_word}{letter}{suffix_word}"
# else:
# print(f"Failed search for {suffix_word} at {child_node}")
return None
assert get_one_letter_off_word(trie, "applx") == "apple"
trie.add_word('predate')
trie.add_word('predict')
assert trie.contains('predate')
assert trie.contains('predict')
assert get_one_letter_off_word(trie, 'predite') == "predate"
trie.add_word('ok')
assert get_one_letter_off_word(trie, 'qk') == 'ok'
assert get_one_letter_off_word(trie, 'aawdefcz') is None
import math
from typing import List
from collections import namedtuple
from pprint import pprint
class Point2D(namedtuple('Point2D', ['x', 'y'])):
def __eq__(self, other):
return self.x == other.x and self.y == other.y
def __hash__(self):
return hash(tuple([self.x, self.y]))
def euclidean_distance_2d(point1: Point2D,
point2: Point2D) -> float:
return math.sqrt((point1.x - point2.x)**2 + (point1.y - point2.y)**2)
class DSU2DPoint:
def __init__(self,
points: List[Point2D],
k: float = 2):
"""
:param points:
:param k:
"""
self.points = points
self.parents = {}
self.make_set()
self.threshold = k
def make_set(self):
for point in self.points:
self.parents[point] = point
def find_set(self,
point: Point2D):
if point == self.parents[point]:
return point
self.parents[point] = self.find_set(self.parents[point])
return self.parents[point]
def union_sets(self,
point1: Point2D,
point2: Point2D):
distance = euclidean_distance_2d(point1, point2)
if distance <= self.threshold:
a = self.find_set(point1)
b = self.find_set(point2)
if a != b :
self.parents[b] = a
def groups(self):
return len(set(self.parents.values()))
def group_2D_points(points: List[Point2D],
k=2):
dsu = DSU2DPoint(points, k)
for point_1 in points:
for point_2 in points:
if dsu.parents[point_1] != dsu.parents[point_2]:
dsu.union_sets(point_1, point_2)
pprint(dsu.parents)
return dsu.groups()
if __name__ == "__main__":
xx = [1, 2, 4, 7, 0, 9]
yy = [1, 0, 1, 4, 1, 10]
points = []
for x, y in zip(xx, yy):
point_2d = Point2D(x=x, y=y)
points.append(point_2d)
count = group_2D_points(points, k=3)
print("Total groups %s" % count)
static int min(int x, int y, int z){
return Math.min(x,Math.min(y,z));
}
static int editDist(String str1, String str2, int m, int n) {
// If first string is empty, the only option is to insert all characters of second string into first
if (m == 0)
return n;
// If second string is empty, the only option is to remove all characters of first string
if (n == 0)
return m;
// If last characters of two strings are same, nothing much to do.
// Ignore last characters and get count for remaining strings.
if (str1.charAt(m - 1) == str2.charAt(n - 1))
return editDist(str1, str2, m - 1, n - 1);
// If last characters are not same, consider all three
// operations on last character of first string, recursively
// compute minimum cost for all three operations and take
// minimum of three values.
return 1 + min(editDist(str1, str2, m, n - 1), // Insert
editDist(str1, str2, m - 1, n), // Remove
editDist(str1, str2, m - 1, n - 1) // Replace
);
}
public static void main(String args[]){
String[] arrayOfWords = {"apple", "pineapple","banana", "orange"};
String str1 = "apple";
int min_edScore = 1000000;
String closestMatch = "";
for(String word: arrayOfWords){
int edScore = editDist(str1, word, str1.length(), word.length());
if(min_edScore>edScore) {
min_edScore = edScore;
closestMatch = word;
}
}
System.out.println(closestMatch);
}
class LevenshteinDistanceDP {
static int compute_Levenshtein_distanceDP(String str1,
String str2)
{
int[][] dp = new int[str1.length() + 1][str2.length() + 1];
for (int i = 0; i <= str1.length(); i++)
{
for (int j = 0; j <= str2.length(); j++) {
if (i == 0) {
dp[i][j] = j;
}
else if (j == 0) {
dp[i][j] = i;
}
else {
dp[i][j] = minm_edits(dp[i - 1][j - 1]
+ NumOfReplacement(str1.charAt(i - 1),str2.charAt(j - 1)), // replace
dp[i - 1][j] + 1, // delete
dp[i][j - 1] + 1); // insert
}
}
}
return dp[str1.length()][str2.length()];
}
static int NumOfReplacement(char c1, char c2)
{
return c1 == c2 ? 0 : 1;
}
static int minm_edits(int... nums)
{
return Arrays.stream(nums).min().orElse(
Integer.MAX_VALUE);
}
public static void main(String args[])
{
List<String> s1= new ArrayList<String>();
s1.add("apple");
s1.add("pineapple");
s1.add("banana");
s1.add("orange");
String s2 = "applx";
int count = 0;
for(int i=0;i<s1.size();i++) {
count = compute_Levenshtein_distanceDP(s1.get(i), s2);
if(count==1) {
System.out.print(true);
break;
}
}
System.out.println(false);
}
}
std::string guess_word(const std::string& w, const std::vector<std::string>& dict) {
std::map<int, std::string> words;
for (auto&& word : dict) {
if (word.length() > w.length() + 1 || w.length() > word.length() + 1)
continue;
std::vector<std::vector<char>> matrix(word.size(), std::vector<char>(w.size(), 0));
int number = 0;
for (int i = 0; i < matrix.size(); i++) {
for (int j = 0; j < matrix[i].size(); j++) {
if (w[j] == word[i]) {
if (j && i) {
matrix[i][j] = matrix[i - 1][j - 1] + 1;
}
else {
matrix[i][j] = 1;
}
if (matrix[i][j] > number) {
number = matrix[i][j];
}
}
}
}
if (std::abs(number - (int)w.length()) < 2)
words[number] = word;
}
if (words.size())
return words.rbegin()->second;
return std::string();
}
std::string guess_word(const std::string& w, const std::vector<std::string>& dict) {
std::map<int, std::string> words;
for (auto&& word : dict) {
if (word.length() > w.length() + 1 || w.length() > word.length() + 1)
continue;
std::vector<std::vector<char>> matrix(word.size(), std::vector<char>(w.size(), 0));
int number = 0;
for (int i = 0; i < matrix.size(); i++) {
for (int j = 0; j < matrix[i].size(); j++) {
if (w[j] == word[i]) {
if (j && i) {
matrix[i][j] = matrix[i - 1][j - 1] + 1;
}
else {
matrix[i][j] = 1;
}
if (matrix[i][j] > number) {
number = matrix[i][j];
}
}
}
}
if (std::abs(number - (int)w.length()) < 2)
words[number] = word;
}
if (words.size())
return words.rbegin()->second;
return std::string();
}
- nicolarusso February 27, 2020