Amazon Interview Question
Software EngineersCountry: India
Interview Type: Phone Interview
package com.company;
import java.util.*;
/**
* Created by ideven on 31/01/15.
*/
public class TopK {
int[] num={1,2,6,5,4,6,7,8,8,4,2,3,2,2,2,2,2,1,1,1,1};
public final int K=3;
Map<Integer,Integer> map= new HashMap<Integer,Integer>();
public void topK(){
for(int i=0;i<num.length;i++){
if(map.containsKey(num[i])){
int n=map.get(num[i]);
map.put(num[i],++n);
}
else{
map.put(num[i],1);
}
}
List<Map.Entry> list = new LinkedList<Map.Entry>(map.entrySet());
// Defined Custom Comparator here
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
return ((Comparable) ((Map.Entry) (o2)).getValue())
.compareTo(((Map.Entry) (o1)).getValue());
}
});
for(int k=0;k<K;k++){
System.out.println(list.get(k));
}
}
public static void main(String[] args){
TopK t= new TopK();
t.topK();
}
}
Implement using C++11 unordered_map. O(N + MlogM), if N is the length of the stream and M is the number of distinct values.
#include <iostream>
#include <vector>
#include <unordered_map>
#include <algorithm>
using namespace std;
class numocc{
public:
int num;
int occ;
};
bool comp(numocc a, numocc b)
{
return a.occ > b.occ;
}
void TopKFrequent(vector <int> v, int k) {
unordered_map <int, int> m;
for(int i = 0; i < v.size(); i ++) //O(N), N: size of input, if access to unordered_map is O(1)
{
m[v[i]]++;
}
unordered_map<int, int>::iterator it = m.begin();
vector <numocc> vn;
while(it != m.end()) //O(M), M distinct values
{
numocc tmp;
tmp.num = it->first;
tmp.occ = it->second;
vn.push_back(tmp);
it++;
}
sort(vn.begin(), vn.end(), comp); //O(MlogM)
for(int j = 0; j < k && j < vn.size(); j++)
{
cout<<vn[j].num<<"/"<<vn[j].occ<<endl;
}
}
int main()
{
vector <int> in = {1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6};
TopKFrequent(in, 3);
return 0;
}
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* @author Sumit Kumar
*
* Find the top k frequent items in a stream of numbers .
*
*/
public class StreamCountAnalyzer {
/**
*
* @param args
*/
public static void main(String args[]) {
int[] streamData = new int[] { 10, 6, 8, 9, 7, 56, 7, 9, 8, 9, 8, 9 };
TrackStream trackStream = new TrackStream();
trackStream.updateFrequency(streamData);
System.out.println("Max 2 values :"
+ trackStream.getFrequentlyOccuringInteger(2));
}
private static class TrackStream {
int firstCount = 1;
private Map<Integer, Integer> streamDataCounter = new HashMap<Integer, Integer>();
public void updateFrequency(int[] streamData) {
Integer frequency = null;
for (int data : streamData) {
frequency = streamDataCounter.get(data);
if (isNull(frequency)) {
streamDataCounter.put(data, firstCount);
} else {
streamDataCounter.put(data, ++frequency);
}
}
}
public List<Entry<Integer, Integer>> getFrequentlyOccuringInteger(
int maxNumber) {
Set<Map.Entry<Integer, Integer>> values = streamDataCounter
.entrySet();
List<Map.Entry<Integer, Integer>> valueList = new ArrayList<Map.Entry<Integer, Integer>>(
values);
Collections.sort(valueList, new EntryComparator());
return valueList.subList(valueList.size() - maxNumber,
valueList.size());
}
private static class EntryComparator implements
Comparator<Map.Entry<Integer, Integer>> {
@Override
public int compare(Entry<Integer, Integer> object1,
Entry<Integer, Integer> object2) {
return object1.getValue().compareTo(object2.getValue());
}
}
private boolean isNull(Object obj) {
return obj == null;
}
}
}
Put numbers in a hash table with their frequency. After, extract them to a vector and sort.
O(nlogn) time, O(n) space.
Alternative:
Build a max heap with pairs of <number, frequency> and maintain handlers so that updates to the frequencies are easy. Then, remove k items from the heap.
O(nlogn) time, O(n) space
Using hash map for counting frequency and min heap with size k you may improve your solution to nlogk. In the end of the process heap will be the answer.
If we know the span of the numbers we can use a array of that length.
Every time we encounter a number we can increment the index.
After all numbers are added, sort it. In this case we do not have to do the bookkeeping we do to maintain the heap condition.
And only sort once.
However, If we want to find the highest top K numbers at any given point during the process heap is the best option
- ankit.vader January 30, 2015