Amazon Interview Question
InternsCountry: United States
Interview Type: Phone Interview
Using Multimap
package careercup;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map.Entry;
import org.apache.commons.collections4.map.MultiValueMap;
public class CC6241017174949888 {
public static HashMap<String, Integer> finalScore(ArrayList<testResult> scores){
MultiValueMap<String, Integer> scoresList = new MultiValueMap<String,Integer>();
for(testResult score:scores){
scoresList.put(score.studentId,score.score);
}
System.out.println(scoresList.toString());
HashMap<String,Integer> finalScores = new HashMap<String,Integer>();
for(Entry result:scoresList.entrySet()){
ArrayList<Integer> mark = (ArrayList<Integer>) result.getValue();
Collections.sort(mark, Collections.reverseOrder());;
int finalSc = 0;
for(int i=0;i<5;i++){
finalSc += mark.get(i);
}
finalScores.put((String) result.getKey(), (finalSc/5));
}
return finalScores;
}
public static void main(String[] args){
ArrayList<testResult> testResults = new ArrayList<testResult>();
testResults.add(new testResult(45, "varun", "1"));
testResults.add(new testResult(40, "vikas", "1"));
testResults.add(new testResult(35, "sachin", "1"));
testResults.add(new testResult(20, "praveen", "1"));
testResults.add(new testResult(15, "nitish", "1"));
testResults.add(new testResult(56, "varun", "1"));
testResults.add(new testResult(68, "varun", "1"));
testResults.add(new testResult(44, "vikas", "1"));
testResults.add(new testResult(36, "sachin", "1"));
testResults.add(new testResult(25, "praveen", "1"));
testResults.add(new testResult(17, "nitish", "1"));
testResults.add(new testResult(43, "varun", "1"));
testResults.add(new testResult(42, "varun", "1"));
testResults.add(new testResult(48, "vikas", "1"));
testResults.add(new testResult(45, "sachin", "1"));
testResults.add(new testResult(70, "praveen", "1"));
testResults.add(new testResult(55, "nitish", "1"));
testResults.add(new testResult(75, "varun", "1"));
testResults.add(new testResult(85, "varun", "1"));
testResults.add(new testResult(30, "vikas", "1"));
testResults.add(new testResult(35, "sachin", "1"));
testResults.add(new testResult(10, "praveen", "1"));
testResults.add(new testResult(65, "nitish", "1"));
testResults.add(new testResult(75, "varun", "1"));
testResults.add(new testResult(45, "varun", "1"));
testResults.add(new testResult(46, "vikas", "1"));
testResults.add(new testResult(32, "sachin", "1"));
testResults.add(new testResult(78, "praveen", "1"));
testResults.add(new testResult(88, "nitish", "1"));
testResults.add(new testResult(90, "varun", "1"));
System.out.println(finalScore(testResults).toString());
}
}
1) Create a map with the ID as the key and a list of five sorted scores in the descent order as the value
2) For each TestResult, if the ID isn't in the map, add the ID and a list with the score into the map. If the ID is in the map and the list length isn't five yet, insert the score into its list so that the descent order is maintained. If the ID is in the map and the score is lower than the last score in the list, do nothing. If the ID is in the map and the score is bigger than the last score in the list, remove the last score and insert the score in a proper position.
3) Iterating over the map and calculate the average of score list if a list is full of five elements.
One possible solution is that we will create a separate min heap for all the students each exactly having 5 nodes. Insert everything until the the heap reaches size 5. Than
if new element is greater than the root replace it with root (delete and insert operation of heap)
else ignore
when all the data has been seen once.
calculate average for each heap.
From what I think we need not to use student name(names can be repeated use id instead) or date for the score
Complexity
Building each heap is O(5)
building n/5 heaps is O(n)
at max n inserts O(nlog5)= O(n)
at max n deletions O(nlog5)= O(n)
total complexity O(n)
WARN** The code was typed directly into notepad so it might not compile or be pretty
//Asume it is implemented somewhere, reads and parses de raw results data
class ResultsParser{
void open(const &string filename);
bool isOpen();//true if its open false otherWise
int size();
int getResultAt(int n);//like [] operator
}
//given struct
struct TestResult {
int score,
string student_id,
string date
};
//workhorse containing all the info
std::map<string,vector<TestResult> results;//FIXME pretify name
//put it into the map
void addResult(TestResult r){
results[r.student_ip].push_back(r);
}
//descendent sort based on score
bool studentComparator (TestResult &a,TestResult &b) {
return (a.score > b.score);
}
//Sort descendently all the scores from this student, uses firs five to calculate average
int calculateScore(const string &studentId){
vector<TestResult> current = results[studentId];
if (current.size() < 5 )
std::trow("Exception calculatin score, invalide number of results");
int totalScore = 0;
int avgScore =0;
std::sort( current.begin(), current.end(), studentComparator);
for(int i=0; i <5; i++){
totalScore += current[i];
}
avgScore = totalScore / 5;
return avgScore;
}
int main(){
//read data from disc, implementation is trivial so it was ommited for clarity
ResultsParser parser;
parser.open("fileWithData.txt");
if (!parser.isOpen) return;
//Group results by student id
for (int i =0; i< parser.size(); i++){
TestResult current = parser.getResultAt(i);
addResult(current);
}
//Print the results for each student
map<int,vector<TestResult>>::iterator it;
it = results.begin();
while (it != results.end(){
string id = it.first;
int score = calculateScore(id);
cout <<"Student " id << " Scored:" <<score;
it++;
}
return 1;
}
NOTES:
The code throws an exception if a student does not have enough scores to calculate its final score
The code assumes a small list of total scores for the student, therefore it inserts everything in the vector and sorts it after the insertions are finished. If a large number of testresults exist for the same student a space efficient approach will be to store only the top 5 elements as mentioned in other posts
The code was not tested or compiled in anyway, if you find any abominations please let me now, since Im practicing in NotePad
It's obviously we shall sort the array. And as the scores are all unsigned integers <= 100 one can use counting sort which leads to a complexity of O(N) time and O(1) memory - if we think that N is very big (<100.000) and way bigger than 100. We only need to store an array of length 101. :)
In this case, we implement a customer function to compare our struct/class (c/java). Once we have implemented this, we also need to implement a 'multimap'. A multimap is a data structure similar to other maps in that it has key,value pairs. It's uniqueness is derived from the property that you can insert multiple values using the same key value. In this instance, our key would be the student name (ID) and all associated values would be placed in the list. We can sort the value upon insertion with a lot of complex coding, or we can choose to sort once all values are loaded and we actually extract the values to find the 5 max. The method you use would depend on your implementation criteria and data size. The last step would be going through each student and getting the top 5 scores (see above for sorting methods) and peel the average out of them returning a new Map containing student names as the key, and the top 5 score average as the value.
This question appears to be testing your understanding of container classes and data structures in general, so I would emphasize my talking points around design decisions. If I were asked this question, I would think that the interviewer is more interested in my ability to design a system of interdependent structures more so that my mastery of an algorithmic design. Therefore make your design, explain your decisions, and proactively offer counter points that you discarded and why. The interviewer is wanting to know why you made your decisions so they can gauge your design maturity.
# -*- coding: utf-8 -*-
__author__ = 'ouyhui'
import heapq
class StudentScores:
def calculate(self, student_scores_file):
file = open(student_scores_file)
student_scores_map = {}
for line in file:
info = line.split(' ')
if info[1] in student_scores_map:
heap = student_scores_map[info[1]]
else:
heap = []
student_scores_map[info[1]] = heap
score = int(info[0])
if len(heap) < 5:
heapq.heappush(heap, score)
elif heap[0] < score:
heapq.heapreplace(heap, score)
file.close()
for student_name, score_heap in student_scores_map.items():
sum = 0
for score in score_heap:
sum += score
average_score = sum / len(score_heap)
print '%s\'s score:%d' % (student_name, average_score)
I like your solution, however, you can make it shorter and more simple using defaultdict.
import heapq
from collections import defaultdict
def bestFiveScores(filename):
results = defaultdict(list)
fil = open(filename,"r")
for line in fil:
score,name,date = line.split()
if len(results[name]) < 5:
heapq.heappush(results[name],int(score))
elif results[name][0] < int(score):
heapq.heapreplace(results[name],int(score))
fil.close()
return results
def printScore(filename):
finalResults = bestFiveScores(filename)
for name in finalResults:
finalScore=sum(finalResults[name]) / len(finalResults[name])
print '%s\'s score: %d' % (name, finalScore)
#include<iostream>
#include<vector>
#include<fstream>
#include<sstream>
#include<string>
#include<algorithm>
#include<map>
using namespace std;
struct marks
{
int score;
string date;
marks():score(0),date(""){}
};
class student
{
map< string, vector<marks> > iv_stu;
public:
bool updateMarks(string file);
int finalScores(string name);
};
int student::finalScores(string name)
{
do
{
map< string, vector<marks> >::iterator it;
vector<int> mrks;
for(it = iv_stu.begin(); it != iv_stu.end(); it++)
{
if(name == it->first)
{
vector<marks> mrksList = it->second;
for(vector<marks>::iterator i = mrksList.begin(); i != mrksList.end(); i++)
{
marks node = *i;
mrks.push_back(node.score);
}
}
}
for(int i = 0; i < mrks.size(); i++)
cout << mrks[i] << " ";
cout << endl;
sort(mrks.begin(),mrks.end());
for(int i = 0; i < mrks.size(); i++)
cout << mrks[i] << " ";
cout << endl;
int i, j = 1;
int sum = 0;
float finalScore = 0;
for(i = (mrks.size() - 1); i >= 0 && j <= 5; i--, j++)
{
sum += mrks[i];
finalScore = (sum/j);
}
if(sum)
{
cout << "Final Score for " << name << " is: " << finalScore << endl;
}
}while(0);
}
bool student::updateMarks(string file)
{
bool status = true;
do
{
ifstream fss;
fss.open(file.c_str());
string line;
while(getline(fss, line))
{
istringstream iss(line);
string name, date;
int scr;
iss >> scr;
iss >> name;
iss >> date;
cout << scr << " " << name << " " << date << endl;
marks node;
node.score = scr;
node.date = date;
iv_stu[name].push_back(node);
}
}while(0);
return status;
}
int main()
{
student st;
bool status = st.updateMarks("score.txt");
if(status)
{
st.finalScores("JACK");
st.finalScores("MIKE");
st.finalScores("JOHN");
st.finalScores("BOBBY");
}
return 0;
}
- varun.me15 January 16, 2015