Google Interview Question
InternsCountry: United States
int const P = 3;
typedef array<int, P> MyObject;
vector<unordered_map<int, list<MyObject>>> find_all_matches(vector<MyObject>& lst){
vector<unordered_map<int, list<MyObject>>> maps(P);
for(int p = 0; p < P; p++){ //for each property of my object
for(int i = 0; i<lst.size(); i++){ // for each object
if(maps[p].find(lst[i][p]) != maps[p].end()){
maps[p][lst[i][p]].push_back(lst[i]);
} else {
maps[p][lst[i][p]] = list<MyObject>{};
maps[p][lst[i][p]].push_back(lst[i]);
}
}
}
return maps;
}
First iterate through each attribute and object, while tracking the first object to have a specific attribute value. When we build out the undirected graph, future objects with the same attribute/value are given edges to connect them.
Building the graph is O(m*n).
Then iterate through the graph by vertices using a "mark and sweep strategy". Each pass through the graph uses a new unique id. These ids will find all clusters.
Finally convert the graph back into an array of clusters by iterating through the vertices and keeping an array for each found cluster id.
class Vertice:
obj = None
edges = None
mark = None
visited = None
def __init__(self, o):
self.obj = o
self.edges = []
self.mark = None
self.visited = False
def build_graph(obj_array, attributes):
vertices = []
for obj in obj_array:
v = Vertice(obj)
vertices.append(v)
for index, attr in enumerate(attributes):
map = {}
for v in vertices:
attr_value = v.obj[index]
if attr_value in map:
v.edges.append(map[attr_value])
map[attr_value].edges.append(v)
else:
map[attr_value] = v
for v in vertices:
mark = uuid.uuid1()
dfs(v, mark)
clusters = defaultdict(list)
for v in vertices:
clusters[v.mark].append(v.obj)
for c in clusters:
print(clusters[c])
def dfs(v, mark):
if v.visited:
return
v.visited = True
v.mark = mark
for neighbor in v.edges:
dfs(neighbor, mark)
import math
class car:
def __init__(self, model, engine, turbo, car_class):
self.model = model
self.engine = engine
self.turbo = turbo
self.car_class = car_class
class vector_space:
def __init__(self):
self.object_feature = []
def train(self, cars):
self.create_vector_space(cars)
self.find_similarities()
def create_vector_space(self, cars):
for car in cars:
self.object_feature.append([car.model,car.engine, car.turbo, car.car_class])
def find_similarities(self):
for vector_index1 in range(len(self.object_feature)):
for vector_index2 in range(vector_index1+1,len(self.object_feature)):
sim = self.euclidean_dist_sim(self.object_feature[vector_index1],self.object_feature[vector_index2])
print(vector_index1, vector_index2 , sim)
def euclidean_dist_sim(self, vector1, vector2):
distance = [abs(a-b)**2 for a, b in zip(vector1, vector2)]
distance = math.sqrt(sum(distance))
return distance
car1 = car(1,1,1,1)
car2 = car(1,0,1,1)
car3 = car(0,1,0,0)
car4 = car(1,0,0,0)
cars = [car1, car2, car3, car4]
vector_space = vector_space()
vector_space.train(cars)
import math
class car:
def __init__(self, model, engine, turbo, car_class):
self.model = model
self.engine = engine
self.turbo = turbo
self.car_class = car_class
class vector_space:
def __init__(self):
self.object_feature = []
def train(self, cars):
self.create_vector_space(cars)
self.find_similarities()
def create_vector_space(self, cars):
for car in cars:
self.object_feature.append([car.model,car.engine, car.turbo, car.car_class])
def find_similarities(self):
for vector_index1 in range(len(self.object_feature)):
for vector_index2 in range(vector_index1+1,len(self.object_feature)):
sim = self.euclidean_dist_sim(self.object_feature[vector_index1],self.object_feature[vector_index2])
print(vector_index1, vector_index2 , sim)
def euclidean_dist_sim(self, vector1, vector2):
distance = [abs(a-b)**2 for a, b in zip(vector1, vector2)]
distance = math.sqrt(sum(distance))
return distance
car1 = car(1,1,1,1)
car2 = car(1,0,1,1)
car3 = car(0,1,0,0)
car4 = car(1,0,0,0)
cars = [car1, car2, car3, car4]
vector_space = vector_space()
vector_space.train(cars)
I guess I would start with an O(n^2*p) version (n: #objects, p: #properties): compare for each pair all properties and output if at least one property equals
- Chris August 23, 2017Alternative, one can build indexes over all objects and all properties (e.g. in a hash table). Then it's finding the sets of objects equal to a single property and union this sets for all properties. That is worst case O(p*n), but probably much better in average (depends on the number of objects that have equality)