Google Interview Question
Backend DevelopersCountry: United States
Interview Type: In-Person
Check rules with maximum match:
in here, we use queue for BFS of graph, you can improve performance with check similarity and ignore graph with low similarity to continue.
from collections import deque
from bisect import insort
class RuleMap:
def __init__(self, n):
self._n = n
self._rules = {}
def add_rule(self, rule, value):
rules = self._rules
for i,x in enumerate(rule):
if x not in rules:
rules[x] = {}
if i==self._n-1:
rules[x] = value
else:
rules = rules[x]
def get_value(self, rule):
ans = []
q = deque()
#rule/similarity/index
q.append([self._rules, 0, 0])
while q:
curr_rule, similarity, idx = q.popleft()
if rule[idx] in curr_rule:
if idx==self._n-1:
insort(ans, (similarity+1, curr_rule[rule[idx]]))
else:
q.append([curr_rule[rule[idx]], similarity+1, idx+1])
if '*' in curr_rule:
if idx==self._n-1:
insort(ans, (similarity, curr_rule['*']))
else:
q.append([curr_rule['*'], similarity, idx+1])
return ans and ans[-1][1]
I use queue for BFS, you can improve the code by ignore traversing the graph with much less similarity of the last path which has been found
from collections import deque
from bisect import insort
class RuleMap:
def __init__(self, n):
self._n = n
self._rules = {}
def add_rule(self, rule, value):
rules = self._rules
for i,x in enumerate(rule):
if x not in rules:
rules[x] = {}
if i==self._n-1:
rules[x] = value
else:
rules = rules[x]
def get_value(self, rule):
ans = []
q = deque()
#rule/similarity/index
q.append([self._rules, 0, 0])
while q:
curr_rule, similarity, idx = q.popleft()
if rule[idx] in curr_rule:
if idx==self._n-1:
insort(ans, (similarity+1, curr_rule[rule[idx]]))
else:
q.append([curr_rule[rule[idx]], similarity+1, idx+1])
if '*' in curr_rule:
if idx==self._n-1:
insort(ans, (similarity, curr_rule['*']))
else:
q.append([curr_rule['*'], similarity, idx+1])
return ans and ans[-1][1]
if __name__=='__main__':
data = [
[3,
[
[['A1', 'B1', 'C1'], 30],
[['*', 'B1', 'C2'], 20],
[['A2', 'B1', '*'], 25]
],
[
[['A1', 'B1', 'C1'], 30],
[['A1', 'B1', 'C2'], 20],
[['A2', 'B1', 'C3'], 25],
]
]
]
print('Check rules with maximum match:')
for d in data:
rm = RuleMap(d[0])
for r in d[1]:
rm.add_rule(*r)
for r in d[2]:
print('for rule: ', r[0], 'output is', rm.get_value(r[0]), 'expected', r[1])
from typing import List, Dict, Tuple, Set
def input_string_to_2d_array(input: str) -> List[List[str]]:
list_of_rows = input.strip().split('\n')
list_of_list_of_terms = map(lambda x: x.strip().split(' '), list_of_rows)
return list(list_of_list_of_terms)
First, we convert the input to a directed graph.
The graph has a single source "Source" that is has outgoing edges to all the vertices corresponding to the first column
of the rule table. Every other vertex has an outgoing edge to the rule in the next column of the rule table on the same
row.
Next, we start minimizing the graph as follows.
For each vertex (starting with 'Source'),
1. merge all children that have the same name into a single vertex
2. merge the same name nodes' children into a single set
3. update the number of rules that reach the vertex as the sum of number of rules that reach each of the vertices with the same name.
4. For children that are a '\*' vertex, make a copy of suffix of '\*' vertex and merge it with each of the non-'\*'
node's children.
Recursively perform this minimization on each of the vertex's children.
After minimizing the entire graph, if some sink node that mode than one value, then the ruleset is ambiguous.
import copy
class Vertex:
def __init__(self, name: str):
self.name: str = name
self.prefix_count: int = 1 # Number of prefix paths leading to this vertex
self.children: List["Vertex"] = []
self.is_sink = True
def __repr__(self) -> str:
neighor_names = list(map(lambda x: x.name, self.children))
return f"<{self.name}, count: {self.prefix_count}) -> {neighor_names}>"
def __str__(self) -> str:
return self.__repr__()
def make_copy(self) -> "Vertex":
"""
Makes a deep copy of this vertex, and recursively makes a deep copy of all
its children.
"""
clone = copy.deepcopy(self)
clone.children = {child.make_copy() for child in self.children}
return clone
class RuleTable:
def __init__(self, rules: List[List[str]]):
self.rule_list = rules
self.source = Vertex("source")
self.is_ambiguous_rules = False
def make_graph(self) -> None:
"""
Creates the graph one row at a time.
"""
for row_number in range(len(self.rule_list)):
self._make_graph_for_row(row_number)
def _make_graph_for_row(self, row: int):
initial_vertex = Vertex(self.rule_list[row][0])
current_vertex = initial_vertex
col_count = len(self.rule_list[row])
for col in range(1, col_count):
next_vertex = Vertex(self.rule_list[row][col])
current_vertex.children.append(next_vertex)
current_vertex.is_sink = False
current_vertex = next_vertex
self.source.children.append(initial_vertex)
self.source.is_sink = False
def is_ambiguous(self) -> bool:
return self.minimize_graph(self.source)
def minimize_graph(self, parent_vertex: Vertex) -> bool:
"""
Minimizes the children of a given vertex by merging children with the same
name, and copying the wild children's suffixes to each of the (merged)
children. It also updates the number of rules that reach this vertex.
Then it recursively merges each of the children (updating the number of rules
that reach those vertices).
If we are merging the sink nodes, then the function determines if more than
one rule reaches the vertex before the sink vertex, and if it has more
than on child (so multiple sinks), then it states that the ruleset is
ambiguous, and returns that value.
"""
children = parent_vertex.children
minimized_children: Dict[str, Vertex] = {}
wild_children: List[Vertex] = []
for child in children:
if child.name == "*":
wild_children.append(child)
continue
if child.name not in minimized_children:
minimized_children[child.name] = child
continue
minimized_children[child.name].children.extend(child.children)
minimized_children[child.name].prefix_count+=child.prefix_count
parent_vertex.children = list(minimized_children.values())
# Now to deal with the wildcard children.
for wild_child in wild_children:
# for every non-wild child, extend its children by each of the wild
# child's suffix.
for child in parent_vertex.children:
wild_grandchildren = \
[grandchild.make_copy() for grandchild in wild_child.children]
child.children.extend(wild_grandchildren)
# The number of prefix rules that can reach this child must now be
# incremented by the number of prefix rules that can reach the wild
# child.
child.prefix_count+= wild_child.prefix_count
for child in parent_vertex.children:
self.minimize_graph(child)
if (parent_vertex.prefix_count > 1
and len(parent_vertex.children) > 1
and parent_vertex.children[0].is_sink):
self.is_ambiguous_rules = True
return self.is_ambiguous_rules
Tests:
all_wild_ambiguous_rule_string = """
a1 b2 c1 20
a2 b1 c2 40
a1 b1 c2 30
* * * 20
"""
all_wild_ambiguous_rule = RuleTable(
input_string_to_2d_array(all_wild_ambiguous_rule_string))
all_wild_ambiguous_rule.make_graph()
assert all_wild_ambiguous_rule.is_ambiguous()
one_rule_string = """
a1 b2 c1 20
"""
one_rule = RuleTable(input_string_to_2d_array(one_rule_string))
one_rule.make_graph()
assert one_rule.is_ambiguous() is False
one_rule_all_wild_string = """
* * * * 20
"""
one_rule_all_wild = RuleTable(
input_string_to_2d_array(one_rule_all_wild_string))
one_rule_all_wild.make_graph()
assert one_rule_all_wild.is_ambiguous() is False
all_wild_ambiguous_2_rule_string = """
* * * 40
* * * 20
"""
all_wild_ambiguous_2_rule = RuleTable(
input_string_to_2d_array(all_wild_ambiguous_2_rule_string))
all_wild_ambiguous_2_rule.make_graph()
assert all_wild_ambiguous_2_rule.is_ambiguous() is False
ambiguous_rule_string = """
a1 b2 c1 d345 20
a2 b1 c2 d34 40
a1 b2 c1 d345 30
"""
ambiguous_rule = RuleTable(input_string_to_2d_array(ambiguous_rule_string))
ambiguous_rule.make_graph()
assert ambiguous_rule.is_ambiguous()
non_ambiguous_rule_string = """
a1 b2 c1 d34 20
a2 b1 c2 d34 40
a1 b2 c1 d345 30
"""
non_ambiguous_rule = RuleTable(
input_string_to_2d_array(non_ambiguous_rule_string))
non_ambiguous_rule.make_graph()
assert non_ambiguous_rule.is_ambiguous() is False
package main.rules;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
public class RulesExecute {
public static void main(String[] args){
RulesExecute executor = new RulesExecute();
RulesTable table = new RulesTable();
RuleRecord r = new RuleRecord();
r.rule1="A1";r.rule2="B2";r.rule3="C3"; r.result=40;
table.rules.add( r );
r = new RuleRecord();
r.rule1="A1";r.rule2="B1";r.rule3="C1"; r.result=20;
table.rules.add( r );
r = new RuleRecord();
r.rule1="A2";r.rule2="B2";r.rule3="C2"; r.result=10;
table.rules.add( r );
r = new RuleRecord();
r.rule1="A1";r.rule2="B1";r.rule3="C1"; r.result=20;
table.rules.add( r );
r = new RuleRecord();
r.rule1="*";r.rule2="B1";r.rule3="*"; r.result=20;
table.rules.add( r );
r = new RuleRecord();
r.rule1="A5";r.rule2="B2";r.rule3="*"; r.result=10;
table.rules.add( r );
List<RuleRecord> tempList = new ArrayList<>( table.rules );
boolean ambiguous = table.rules.stream().anyMatch( record->{
tempList.remove( record );
return tempList.stream().filter( other->{
if(executor.ruleMatch( other.rule1, record.rule1 ) &&
executor.ruleMatch( other.rule2, record.rule2 ) &&
executor.ruleMatch( other.rule3, record.rule3 ) &&
other.result != record.result){
return true;
}else{
return false;
}
} ).collect(Collectors.toList()).size() > 0;
} );
System.out.print( ambiguous );
}
public boolean ruleMatch(String rule1, String rule2){
if(rule1.equals( "*" ) || rule2.equals( "*" )){
return true;
}else if(rule1.equals( rule2 )){
return true;
}
return false;
}
}
# With `r` = number of rules in `table` and `sr` = number of rules that contain at least one `*`:
# Time complexity = O(r) + O(r * sr)
def isAmbiguous(table):
seen = {}
starred_rules = {}
is_amb = False
for rule in table: # O(r)
val = rule.pop()
curr_rule = tuple(rule)
if '*' in rule:
if curr_rule in starred_rules and starred_rules[curr_rule] != val: # worst case: O(s) - assuming robust hash function with no collisions: O(1)
is_amb = True
break
starred_rules[curr_rule] = val
else:
if curr_rule in seen and seen[curr_rule] != val: # worst case: O(se) - assuming robust hash function with no collisions: O(1)
is_amb = True
break
seen[curr_rule] = val
if not is_amb and bool(starred_rules):
for star_rule in starred_rules: # O(sr)
for rule in seen: # O(r)
if len(set(star_rule) - set(rule)) < 2:
return True
return is_amb
I am using trieNode that takes strings instead of char. The upper bound is m*(!n) where n is the length of rule.
class TrieNode{
public HashMap<String, TrieNode> childrenMap;
public String value;
public TrieNode(HashMap<String,TrieNode> map, String value){
this.childrenMap = map;
this.value = value;
}
}
boolean hasAmibiguouty(String [][] rules, TrieNode root){
HashSet<String> [] rulesSet = new HashSet[rules[0].length - 1];
for(int i = 0;i < rules[0].length - 1; i++){
rulesSet[i] = new HashSet<String>();
for(int j = 0; j < rules.length;j++){
if(rules[j][i] != "*" && !rulesSet[i].contains(rules[j][i]))
rulesSet[i].add(rules[j][i]);
}
}
boolean duplicate = false;
for(int i = 0; i < rules.length; i++){
duplicate = addRule(rules[i], rulesSet, root,0);
if(duplicate)return true;
}
return false;
}
boolean addRule(String [] rule, HashSet [] ruleSet, TrieNode root, int index){
if(index == rule.length - 1){
if(root.value != null)return true;
root.value = rule[index];
return false;
}
if(rule[index].equals("*")){
Iterator<String> it = ruleSet[index].iterator();
while(it.hasNext()){
String ruleName = it.next();
root = addRuleToNode(root,ruleName);
boolean duplicate = addRule(rule,ruleSet,root.childrenMap.get(ruleName),index + 1);
if(duplicate)return true;
}
}else{
root = addRuleToNode(root,rule[index]);
return addRule(rule,ruleSet,root.childrenMap.get(rule[index]),index + 1);
}
return false;
}
TrieNode addRuleToNode(TrieNode root, String ruleName){
if(root.childrenMap.containsKey(ruleName)){
return root;
}else{
TrieNode node = new TrieNode(new HashMap<String, TrieNode>(),null);
root.childrenMap.put(ruleName, node);
return root;
}
}
I feel this is problem of linear equations. Once a matrix is constructed by m*n combinations and has row rank equal to number of rows and column rank equal to number of columns, then the equations are stable otherwise ambiguous.
Coming to wild cards, one way to generate rows for every combination of total variables in set and append to Matrix.
Construct a trie based on rules. If while processing the end of the rule we discover that a different value was already inserted for the node we abandon rule processing and print 'ambiguous'.
- adr October 25, 2019