Microsoft Interview Question
Computer ScientistsCountry: India
Interview Type: In-Person
Can u code max heap min heap logic......i tried it but it not give me correct results....
Here is an implementation. I am using static arrays but this can easily be changed to use dynamic arrays. Have used both insertion sort and heap sort to verify if the implementation of heap is correct.
#include<stdio.h>
#include<stdlib.h>
typedef struct heap_t
{
int arr[1000];
int count;
} heap;
float insertion(heap*ins_obj, int val);
int htop(heap*hp);
void heap_ins(heap*heap_obj, int val, int minmax);
int heap_del(heap*heap_obj, int minmax);
float minmaxheap(heap*hmax, heap*hmin, int val);
void printarr(int arr[], int l);
main()
{
heap hmax, hmin, ins;
int val, i;
float base, hp;
hmax.count = 1;
hmin.count = 1;
ins.count = 1;
for(i=0;i<50;i++)
{
val = rand()%1000;
base = insertion(&ins, val);
hp = minmaxheap(&hmax, &hmin, val);
printf("Insertion Sort: %.1f \t Heap: %.1f\n", base, hp);
}
return 0;
}
float insertion(heap*ins_obj, int val)
{
int i, j;
for(i=1;i<(*ins_obj).count;i++)
if(val>=(*ins_obj).arr[i])
break;
for(j=(*ins_obj).count;j>=i;j--)
(*ins_obj).arr[j+1] = (*ins_obj).arr[j];
(*ins_obj).arr[i] = val;
(*ins_obj).count++;
if((*ins_obj).count%2==0)
return (*ins_obj).arr[(*ins_obj).count/2];
else
return ((*ins_obj).arr[(*ins_obj).count/2]+(*ins_obj).arr[(*ins_obj).count/2+1])/2.0;
}
void heap_ins(heap*heap_obj, int val, int minmax)
{
int parent, child, temp;
(*heap_obj).arr[(*heap_obj).count]= val;
child = (*heap_obj).count++;
parent = child/2;
while((((*heap_obj).arr[parent]>(*heap_obj).arr[child])&&!minmax)||(((*heap_obj).arr[parent]<(*heap_obj).arr[child])&&minmax)&&(child!=1))
{
//swap parent and child
temp = (*heap_obj).arr[parent];
(*heap_obj).arr[parent] = (*heap_obj).arr[child];
(*heap_obj).arr[child] = temp;
child = parent;
parent = child/2;
}
}
int heap_del(heap*heap_obj, int minmax)
{
int temp, temp1, parent, child, child1;
temp = (*heap_obj).arr[1];
(*heap_obj).arr[1] = (*heap_obj).arr[--(*heap_obj).count];
parent = 1;
child = 2;
child1 = 3;
while((child<=(*heap_obj).count)&&(child1<=(*heap_obj).count))
{
if((((*heap_obj).arr[child]<(*heap_obj).arr[child1])&&!minmax)||(((*heap_obj).arr[child]>(*heap_obj).arr[child1])&&minmax))
{
temp1 = (*heap_obj).arr[parent];
(*heap_obj).arr[parent] = (*heap_obj).arr[child];
(*heap_obj).arr[child] = temp1;
parent = child;
}
else //if(((*heap_obj).min[child1]<(*heap_obj).min[child])&&!minmax)
{
temp1 = (*heap_obj).arr[parent];
(*heap_obj).arr[parent] = (*heap_obj).arr[child1];
(*heap_obj).arr[child1] = temp1;
parent = child1;
}
child = 2*parent;
child1 = 2*parent + 1;
}
if(child <=(*heap_obj).count)
(*heap_obj).arr[parent] = (*heap_obj).arr[child];
return temp;
}
float minmaxheap(heap*hmax, heap*hmin, int val)
{
int hmaxtop, hmintop, temp;
hmaxtop = htop(hmax);
hmintop = htop(hmin);
if(val<hmaxtop)
heap_ins(hmax, val,1);
else
heap_ins(hmin, val,0);
if(abs((*hmax).count - (*hmin).count)==2)
{
if((*hmax).count>(*hmin).count)
{
temp = heap_del(hmax, 1);
heap_ins(hmin, temp, 0);
}
else
{
temp = heap_del(hmin, 0);
heap_ins(hmax, temp, 1);
}
}
int total;
total = (*hmax).count + (*hmin).count;
if(total%2!=0)
{
if((*hmax).count>(*hmin).count)
return htop(hmax);
else
return htop(hmin);
}
else
return (htop(hmax)+htop(hmin))/2.0;
}
int htop(heap*hp)
{
return (*hp).arr[1];
}
void printarr(int arr[], int l)
{
int i;
for(i=1;i<l;i++)
printf("%d ",arr[i]);
printf("\n");
}
To find a median of stream of integers we can use the concept of heaps:
Insert the first element in either of the below mentioned heaps and this number is the current median. Then compare the second incoming number with the first one. If the number is more then insert it in the HeapLow and if it less then insert it in the HeapMax.
Keep two heaps and keep on adding the numbers in these heaps as they come. If the numbers in the two heaps are even then you just take the average of the roots of the two heaps. Otherwise if they are unenven and difference is one in both the heaps take the number from the heap which has a higher count. If difference between their counts is more than one then take a number from the heap which has more elements and insert it into the another heap. And then find the median.
1. HeapLow: This heap is the max heap of the smaller of the numbers in the input stream so far
2. HeapMin: This heap is the min heap of the maximum of the numbers in the input stream so far
These are the two heaps to consider while finding the median.
May be a stupid question - are you assuming that all the the numbers are available at the same time?
I ask because the question uses the word "stream" which suggests to me that we need to compute the median of the numbers available to us, and then recompute it as and when the user/system supplies more numbers
I suspect that your array-based heap solution may not scale well in such situation. What do you think?
This is a good solution, however, since the question mentioned "a stream", I'd imagine you can't sort an array because there isn't an array to start with.
Nevertheless, I think if we modify this a little bit, the solution would still work.
My proposed solution:
Have two heaps: min and max heaps with a running count of the total number of items contained in each heap, let's name them countFromMinHeap, and countFromMaxHeap. Additionally, we need to have a variable to keep count of the current median of all items we've seen so far.
When the first item arrives in the stream, we insert that into either one of the heaps -- it doesn't matter. At that point, we increment the count for the heap we inserted into, and then we set the current median to the number we just received.
A second number arrives, and we compare that with the current median. If the incoming number is less than the current median, we insert it into the max heap. If it is more than, then we insert it into the min heap. We update the counts for each heap, and then here's the interesting part: the counts should always be only 1 apart.
If the counts are the same, that means we have seen an even number of integers, and we take the average from the top of the two heaps. If they are uneven, we take the top from the heap that has a higher count. If they are more than 1 item apart, we take the top from the heap that has more items, and insert that into the other heap.
Using such a method, we can keep a running track of our current median, and the time efficiency for such an operation is O(nlogn) for the heapify, and O(1) for getting the median. Space efficiency is O(n), where n is the number of integers in the stream.
Vgeek, I think you mixed something up. If you had sorted an array with the numbers, the median is just the middle element(s) (if n is odd/even).
The usual answer with 2 heaps was given by Zhia above.
As the input is coming as part of a stream, we need a way to save the input so that median can be found when the query is made.
I can think of following logic for this problem.
This problem can be solved with a Binary Search Tree and an integer keeping count of number of elements received so far.
1. Insert every incoming element in the Binary Search Tree. Using self balancing tree like AVL tree or Red-Black BST is even better option.
Worst Case Time complexity: O(N)
2. Also every time a new element is received and added to the BST, increment the input counter (which keeps track of number of inputs so far) by 1.
Time complexity: O(1)
3. When the query is made for median, perform an inorder traversal of the BST and stop when the in order element count is n/2.
The median is, if the elements received so far are odd, the median is n/2th element. If the elements received so far are even, then the median is (n-1 th element) + (n th element) / 2
Time complexity: O(N)
================
Total Time complexity: O(N)
One critical modification we will have to do is if a duplicate element is received, insert method in the BST can add it as a left of a right child of the same element received earlier. This way we correctly calculate the median if there are duplicates in the input.
i think i agree your appraoch is it not good if we keep a counter at each node that how many element r in left of this and how many r on right of this ...in that case i think lookup is more faster and whole tree inorder we not need to do ......
but u see if we use a tree based solution we need extra space O(n) i think nobody will agree to provide this much space......can u think in terms of min heap max heap......
i code this problem using max heap min heap but my solution was not giving correct result.....
Hi Kavita
In the min and max heap as well, we will have to keep an array with all the elements in it to find min and max value among them. Basically since we are getting values in the stream manner, we will have to store them somewhere whether its a BST or an array.
So in the heap solution as well we end up using O(N) space for storing those elements.
The advantage of this solution over Heap is we dont need any heapify process and the heap solution is O(N logN) in time. This one is in O(N).
Also just to clarify, the additional element to keep the count of incoming numbers is not going to be at each node level. Its separate from the BST.
This solution isn't O(N). Inserting N values into a balanced BST takes O(N log N).
In the heap solution, query for median takes O(1), for you it takes O(n). I don't think there is advantage over a heap solution here, only disadvantage.
Anonymous
Why will insert into BST be O(N log N), in the worst case when the binary search tree becomes a skinny tree leaning only to 1 side, you will have to travel at the max N elements on one side of the tree to insert the last element. Worst case of BST is always O(N).
Please refer to the wikipedia link on BST if you want to know the time complexity for each operation
Average Worst case
Space O(n) O(n)
Search O(log n) O(n)
Insert O(log n) O(n)
Delete O(log n) O(n)
Also finding median in heap takes O(1) no doubt about it but you are not considering the heapify process which happens when a new element is added to the heap. New element might end up moving from last array index to the 0th index to become root of the tree. The entire operation is not O(1).
Ah ha .. I get it, you are talking about inserting N elements together would be N Log N. Yes that is true.
But then for heap as well, we are saving all the incoming elements and heapifying them as soon as they are inserted. So for N elements this time is also exactly O(N Log N), I dont really see any difference.
Also regarding calculating the heap, in the worst case when one heap gets all the elements and the other heap gets only one, you end up moving elements from larger heap to smaller heap before calculating the heap. So worst case operation is O(N) here.
The two heaps never differ by more than 1 in the number of elements they have. You can therefore derive an O(log n) runtime for adding a new element. The insert takes O(log n), and you have to move at most 1 element from one heap to the other, which also takes O(log n). Querying the median is always just a matter of looking at one or both of the roots, and is O(1). There are no O(N) operations here.
int get_median(int next_number, Heap& min_heap, Heap& max_heap) {
if (max_heap.size() && next_number < max_heap.top()) {
max_heap.enqueue(next_number);
if (max_heap.size() > min_heap.size()) {
min_heap.enqueue(max_heap.dequeue());
}
} else {
min_heap.enqueue(next_number);
if (min_heap.size() > max_heap.size()+1) {
max_heap.enqueue(min_heap.dequeue());
}
}
if (max_heap.size()==min_heap.size()) {
return (max_heap.top() + min_heap.top())/2;
} else {
return min_heap.top();
}
}
A balanced BST can be used as well such as AVL. Since the no. of the left and the right child differ by 1, root will be the median.
If both the right and left subtree are same then root is the median
If the right subtree is 1 more then avg of root and immediate right child of root.
Same for left subtree.
1. take the stream
2. point to the first index
3. double the index and check that it goes outof boundary or not if not than
4. go to the next index double it again and do the same process
in this way you will get the median with O(n/2) order
e,g stream = 1654278
-> 1st index = 0 and value = 1
-> double index 0*2=0 and check if 0 is out of boundary if not
-> than go for index 1;
-> now index = 1 and value = 6
-> double index = 1*2 = 2 and check index[2] = not out of boundary
-> index 2 value = 5
-> double index = 2*2 = 4 check index[4] = no
-> index = 3 and value = 4
-> double index = 3*2 = 6 check index[6] = no
-> index = 4 and value = 2
-> double index = 4*2 = 8 check index[8] = invalid
therefor media = index[3]
Use max heap and min heap. Insert the numbers in max and min heap alternatively. But if the number to be inserted in max heap is greater than minimum of the min heap, then add the top element of min heap to max heap, and the new number at the top of min heap and heapify it. Similarly for the number being inserted in min heap is less than max element in max heap, then insert the max number of max heap in min heap, and then put the new number at the top of max heap & heapify it. At any time all the numbers in max heap will be less than equal to all number all number in the min heap. And difference in number of element in max and min heap will be atmost one, in which case the top element in max heap is the median, else the average of top of max and min heap.
I do think insertion sort is the best for this case.
insertion sorting is very efficient for a substantially sorted array (O(n+d)), which is exactly the case of this problem.
using heap, every new data coming, it's necessary to re-heap, which, as I recall, is O(nlogn), worse then insertion sorting for this specific case
#This can be done without sorting using this method based on quicksort.partitioning.
ints = [3,5,7,13,1,44, 34, 2,17,19,6,44,4,67,8,67,99,8,44,45,1,11,2,174,12,9];
med = (len(ints)+1)/2 -1;
def part(ary, l, r):
if l >= r :
return l;
else:
i = l;
v = ary[i];
l = l+1;
while (True):
while(v > ary[l] and l < r) : l = l+1;
while(v <= ary[r] and r >= l) : r = r-1;
if(l >= r ):
break;
ary[r], ary[l] = ary[l] , ary[r];
ary[i], ary[r] = ary[r], ary[i];
return r;
def median(ary, l, r):
axis = part (ary, l, r);
while ( axis != med):
if(axis > med):
r = axis-l;
axis = part (ary, l, r);
else:
l = axis+1
axis = part (ary, l, r);
return ary[axis];
print median(ints, 0, len(ints)-1);
start with keeping two pointers .If the list is odd,use one pointer and point to middle element.If the list size is even keep two pointers and point to middle two.now whenever new element comes just check the size of the list whether it is even or odd and accordingly move the pointers and calculate median
#include <iostream>
using namespace std;
#include <set>
int main() {
set<int> md;
// your code goes here
int values[] = {10,5,6,3,0,14,33,78,4,15,8,100,7};
int count = sizeof(values)/sizeof(values[0]);
for(int i =0; i < count; i++) {
md.insert(values[i]);
int to_find = (i)/2;
int median = 0;
set<int>::iterator it = md.begin();
{
// odd, the
for(int in = 0; in <to_find; in++) {
++it;
}
median = *it;
}
if (!((i+1)%2)) {
// if this is even; then find the to_find+1;
++it;
median = (median + (*it))/2;
}
for(it = md.begin(); it != md.end(); ++it) {
cout << *it << "\t";
}
cout << "\nMedian is " << median <<"\n\n";
}
return 0;
}
It is O(logn, for insertion into binary tree) + O(n, for search) = O(n) for each median location
#include <iostream>
using namespace std;
#include <set>
#include<queue>
#include <algorithm>
#include <cmath>
void do_balance(priority_queue<int, vector<int>, std::greater<int> > &l,
priority_queue<int> &r) {
if(abs(l.size()-r.size()) > 1) {
if(l.size() > r.size()) {
// get from l and insert into r.
int v = l.top();
r.push(v);
l.pop();
} else {
int v = r.top();
l.push(v);
r.pop();
}
}
if(abs(l.size() - r.size()) > 1) {
cout << "Invalid";
}
}
void print_median(const priority_queue<int, vector<int>, std::greater<int> > &l,
const priority_queue<int> &r) {
if(l.size() == r.size()) {
cout << (l.top() + r.top())/2 << "\n";
return;
}
int v= (l.size()>r.size())? l.top(): r.top();
cout << v <<"\n";
}
void find_median() {
int ara[] = {5,1,3,2,5,4,6,8};
int sz = sizeof(ara)/sizeof(ara[0]);
priority_queue<int> minq;
priority_queue<int, vector<int>, std::greater<int> > maxq;
maxq.push(ara[0]);
cout << "\n\nVia priority Queue \n";
cout << ara[0] <<"\n";
for(int i = 1; i < sz; i++) {
if (ara[i] > maxq.top()) {
minq.push(ara[i]);
} else {
maxq.push(ara[i]);
}
do_balance(maxq, minq);
print_median(maxq, minq);
}
}
int main() {
set<int> md;
// your code goes here
int values[] = {10,5,6,3,0,14,33,78,4,15,8,100,7};
int count = sizeof(values)/sizeof(values[0]);
for(int i =0; i < count; i++) {
md.insert(values[i]);
int to_find = (i)/2;
int median = 0;
set<int>::iterator it = md.begin();
{
// odd, the
for(int in = 0; in <to_find; in++) {
++it;
}
median = *it;
}
if (!((i+1)%2)) {
// if this is even; then find the to_find+1;
++it;
median = (median + (*it))/2;
}
for(it = md.begin(); it != md.end(); ++it) {
cout << *it << "\t";
}
cout << "\nMedian is " << median <<"\n\n";
}
find_median();
return 0;
}
what if keep track of quantity and sum
delta = sum/count/count
if value < median { median -= delta} else { median += delta}
I think based on sum and quantity u can get mean not median.
Suppose input is 13, 18, 13, 14, 13, 16, 14, 21, 13
Then find out median sort it 13, 13, 13, 13, 14, 14, 16, 18, 21
Now if i see 13, 13, 13, 13, =< 14, >=14, 16, 18, 21 so here median is 14 bcoz here 4 elements are smaller or equal to 14 and exactly 4 element higher than this
Correct me if i am wrong...
usng 2 heaps
- Anonymous July 08, 2014geeksforgeeks.org/median-of-stream-of-integers-running-integers/