Amazon Interview Question
SDE-2sCountry: India
Interview Type: In-Person
@Iuri Sitinschi Isn't the semantics of MRU a little different from what is required here?
It will hold the most recently added strings at the top, so the most frequent can get masked by new less frequent strings. For example, let's say we see string "aaa" 100 times, but then we get 1000 new strings, the top 100 of MRU cache are all strings that appeared only 1 time, masking the string that was seen 100 times.
Maybe you intended to update the cache based on frequency of the word and not when that appears, but then you need to track frequencies. Moreover, what data structures do you intend to use for the cache?
All in all, you solution needs a little more elaborate explanation, if you do not mind.
possible c# implementation.
using 2 hash tables.
Time complexity O(c), where c - number of most repeated words.
Actually, O(1).
using System;
using System.Collections.Generic;
using System.Linq;
namespace MostRepeatedWords {
public static class WordsAggregator {
private const int MaxNum = 20;
private static readonly Dictionary<string, int> MostRepetedWords = new Dictionary<string, int>();
private static readonly Dictionary<string, int> AllWords = new Dictionary<string, int>();
public static void AddWord( string word ) {
int cnt;
var isWordPresent = AllWords.TryGetValue( word, out cnt );
cnt++;
if ( isWordPresent ) { AllWords[ word ] = cnt; }
else { AllWords.Add( word, cnt ); }
if ( MostRepetedWords.ContainsKey( word ) ) {
MostRepetedWords[ word ]++;
return;
}
int min = 0;
try {
min = MostRepetedWords.Values.Min();
if ( cnt <= min ) {
return;
}
} catch (InvalidOperationException e) when( e.Message.Equals( "Sequence contains no elements" ) ){}
if ( MostRepetedWords.Count == MaxNum ) {
MostRepetedWords.Remove( MostRepetedWords.FirstOrDefault( x => x.Value == min ).Key );
}
MostRepetedWords.Add( word, cnt );
}
public static List<string> GetMostRepeatedWords() {
return MostRepetedWords.Keys.ToList();
}
}
}
Using two TreeMap
package com.amazon.arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
public class FindMostRepeatedWords {
Map<String, Integer> wordsMap = new TreeMap<String, Integer>();
public void getRepeated(String[] arr, int count) {
Integer val;
for (String s : arr) {
if (wordsMap.containsKey(s)) {
val = wordsMap.get(s);
val++;
wordsMap.put(s, val);
} else {
wordsMap.put(s, 1);
}
}
List<String> st = new LinkedList<String>();
Map<Integer, List<String>> countMap = new TreeMap<Integer, List<String>>()
.descendingMap();
for (Map.Entry<String, Integer> m : wordsMap.entrySet()) {
if (countMap.containsKey(m.getValue())) {
st = countMap.get(m.getValue());
st.add(m.getKey());
} else {
st = new LinkedList<String>();
st.add(m.getKey());
countMap.put(m.getValue(), st);
}
System.out.println(" ");
}
System.out.println(" Count Map ");
int number = 0;
for (Map.Entry<Integer, List<String>> m : countMap.entrySet()) {
System.out.println(" -->>>>" + m.getKey() + " ");
List<String> word = m.getValue();
Iterator<String> itr = word.iterator();
while (itr.hasNext()) {
System.out.println(" " + itr.next());
number++;
if (number == count)
break;
}
if (number == count)
break;
}
return;
}
public static void main(String args[]) {
String[] arr = { " akkhil", "kumar", "gupta", "testCase", " akkhil",
"kumar", "kumar", "kumar", "kumar", "kumar", "kumar", "kumar",
"kumar", "kumar", "kumar", "gupta", "gupta", "gupta" };
new FindMostRepeatedWords().getRepeated(arr, 4);
}
}
Use a heap (for tracking n most frequent words) and a hashMap (for recording the frequency of every word)
two steps:
1. update hash table:
a. if word never appears, add to hash table
b. if word exists, increase the frequency
2. update the heap (three cases)
case 1: the word in the top list already - redo the heap
case 2: the word is not in the top list and the top list is under filled - add it to the list
case 3: the word is not in the top list and the top list reaches the limit and the new one has a higher frequency than some one in the top list - replace the min and redo the heap
class WordSt {
public:
string word;
int freq;
bool inlist;
WordSt(const string &w, int f, bool in) : word(w), freq(f), inlist(in) {}
};
class WordCount {
void injectWord(const string& word)
{
if (wordMap.find(word) == wordMap.end())
wordMap[word] = new WordSt(word, 1, false);
else
wordMap[word]->freq++;
WordSt* ws = wordMap[word];
if (ws->inlist) { // in the list, update
make_heap(tops.begin(), tops.end(), WordStComp());
} else if (tops.size() < toprank) { // not full
tops.push_back(ws);
make_heap(tops.begin(), tops.end(), WordStComp());
ws->inlist = true;
} else if (ws->freq > tops.front()->freq) { // not equal
pop_heap(tops.begin(), tops.end(), WordStComp());
WordSt* discard = tops.back();
discard->inlist = false;
tops.pop_back();
tops.push_back(ws);
push_heap(tops.begin(), tops.end(), WordStComp());
ws->inlist = true;
}
}
vector<string> getTopWords() const
{
vector<string> words;
for_each(tops.begin(), tops.end(),
[&words](WordSt* ws) { words.push_back(ws->word); });
return words;
}
~WordCount()
{
map<string, WordSt*>::const_iterator it;
for (it = wordMap.begin(); it != wordMap.end(); it++)
delete it->second;
}
private:
int toprank;
// vector<WordSt&> tops; // Error! Reference is not assignable
vector<WordSt*> tops;
map<string, WordSt*> wordMap;
};
Trie containing frequency and Min Heap of max 100 elements
- ankushbindlish November 26, 2015New word update frequency > heap root , Eligible for top 100 candidates