Google Interview Question
Software Engineer / Developersukkonen could do that in O(n)!
google the algorithm (Ukkonen suffix tree online).
the only change to do = count leaf nodes during tree building process
The following is my Java implementation with a simple test.
class Node
{
public Node[] childs = null;
}
class UniqueRows
{
public static int[][] uniqueRows(int[][] matrix)
{
int N = matrix.length;
boolean[] isUnique = new boolean[N];
int uniqueCount = 0;
Node root = new Node();
for (int i = 0; i < N; i++)
{
Node current = root;
for (int j = 0; j < N; j++)
{
int index = matrix[i][j];
if (null == current.childs) {current.childs = new Node[2];}
if (null == current.childs[index])
{
current.childs[index] = new Node();
if ((N - 1) == j)
{
isUnique[i] = true;
uniqueCount++;
}
}
current = current.childs[index];
}
}
int[][] result = new int[uniqueCount][];
int resultCount = 0;
for (int i = 0; i < N; i++)
{
if (isUnique[i])
{
result[resultCount] = matrix[i];
resultCount++;
}
}
return result;
}
public static void main(String[] args)
{
int[][] matrix1 = {
{0, 1, 0, 0, 1},
{1, 0, 1, 1, 0},
{0, 1, 0, 0, 1},
{1, 1, 1, 0, 0}};
debug(uniqueRows(matrix1));
}
public static void debug(int[][] array) {debug(array, " ");}
public static void debug(int[][] array, String separator)
{
for (int i = 0; i < array.length; i++) {debug(array[i], separator);}
}
public static void debug(int[] array, String separator)
{
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < array.length - 1; i++) {buffer.append(array[i] + separator);}
if (array.length > 0) {buffer.append(array[array.length - 1]);}
System.out.println(buffer.toString());
}
}
Above code works great, just a small error with indexing at the second loop. Thus I corrected it the post the code again.
public class DuplicateRowsInBinaryMatrix {
public int[][] getUniqueRows(int[][] matrix) {
int m = matrix.length;
int n = matrix[0].length;
TrieNode root = new TrieNode();
TrieNode p;
int uniqueCount = 0;
boolean[] isUnique = new boolean[m];
// isUnique is used to mark the lines that would appear in final result
// start to build the trie
for (int i = 0; i < m; i++) {
// insert number matrix[i][] into the trie
p = root;
// root element would be an empty heading for all numbers
for (int j = 0; j < n; j++) {
int digit = matrix[i][j];
if (p.kids == null) {
p.kids = new TrieNode[2];
}
if (p.kids[digit] == null) {
// this is a whole new branch, create a new node here
p.kids[digit] = new TrieNode();
if (j == n - 1) {
uniqueCount++;
isUnique[i] = true;
}
}
p = p.kids[digit];
}
}
System.out.println("uniqueCount is " + uniqueCount);
int[][] result = new int[uniqueCount][];
int k = 0;
for (int w = 0; w < isUnique.length; w++) {
if (isUnique[w]) {
result[k++] = matrix[w];
}
}
return result;
}
class TrieNode {
TrieNode[] kids = null;
}
public static void main(String[] args) {
DuplicateRowsInBinaryMatrix ins = new DuplicateRowsInBinaryMatrix();
int[][] matrix1 = { { 1, 0, 0, 0, 0 }, { 1, 0, 0, 0, 1 },
{ 1, 0, 0, 1, 1 }, { 1, 0, 0, 1, 1 }, { 1, 0, 0, 1, 1 },
{ 1, 0, 0, 0, 1 } };
printResult(ins.getUniqueRows(matrix1), " ");
}
public static void printResult(int[][] array, String separator) {
for (int[] line : array) {
for (Integer i : line) {
System.out.print(i + " ");
}
System.out.println();
}
}
}
Use radix sort. Each column needs O(n) time to sort, total O(n^2) time for all columns. Then make one final pass to detect and remove duplicates (also O(n^2) time).
#include <iostream>
#include <algorithm>
#define MAX 1000
using namespace std;
struct cmp{
int index;
cmp():index(0){}
cmp(int i):index(i){}
bool operator()(int a[], int b[])
{
return a[index] < b[index];
}
};
void unique(int** array, int n)
{
for (int i = n - 1; i >= 0; --i)
{
cmp o(i);
sort(array, array + n, o);
}
int pre[1000];
for (int i = 0; i < n; ++i)
{
pre[i] = array[0][i];
cout << pre[i] << " ";
}
cout << endl;
for (int i = 1; i < n; ++i)
{
int j;
for (j = 0; j < n && pre[j] == array[i][j]; ++j)
{
}
if (j == n)
{
continue;
}
for (int j = 0; j < n; ++j)
{
cout << array[i][j] << " ";
pre[j] = array[i][j];
}
cout << endl;
}
}
int main()
{
int n;
cin >> n;
int **array = new int*[n];
for (int i = 0; i < n; ++i)
{
array[i] = new int[n];
}
for (int i = 0; i < n; ++i)
{
for (int j = 0; j < n; ++j)
{
cin >> array[i][j];
}
}
unique(array, n);
return 0;
}
I utilized hashing function to represent each row and eliminate duplicate row.
import java.util.HashSet;
public class Test
{
public static void main(String[] arg)
{
int[][] arrays = new int[][]
{
{0,1,0,0,1},
{1,0,1,1,1},
{0,1,0,0,1},
{1,1,1,0,0},
};
printUniqueRows(arrays);
// Output
// ---------
// 0 1 0 0 1
// 1 0 1 1 1
// 1 1 1 0 0
}
private static void printUniqueRows(int[][] arrays)
{
HashSet<Integer> set = new HashSet<Integer>();
for(int i=0; i< arrays.length; i++)
{
int[] row = arrays[i];
//Unique identifier
int binaryToDecimal = 0;
for(int j=0; j< row.length; j++)
{
if(row[j] == 1)
{
binaryToDecimal += Math.pow(2, (row.length-1) -j);
}
}
if(false == set.contains(binaryToDecimal))
{
set.add(binaryToDecimal);
//Print
for(int el : row)
{
System.out.print(el + " ");
}
System.out.println();
}
}
}
}
Find the value of each row, as in 2^(n-1)*a[0][i]+2^(n-2)*a[1][i]+...+2^(0)*a[n][i]
then compare values and write rows which have difft values
basically you are hashing the values. But then comparing those values takes the major time.. O(nlogn) bec we'll have to sort the hashed values.
We can use the hashing scheme as you suggested or simply sort them directly as string.. we can just directly sort in O(nlogn) n then print unique values in O(n) times..
TO find the hash u need to read all the digits in a row for all rows that means total digits accessed is n^2.Hence O(n^2)
@ishan sorry didnt get u , can u plz explian with example how u came to this fact "Find the value of each row, as in 2^(n-1)*a[0][i]+2^(n-2)*a[1][i]+...+2^(0)*a[n][i]
then compare values and write rows which have difft values" & also u r saying to add row by but u r adding column by column as a[0][i]+a[1][i]...a[n][i]..please reply asap..
Thanks in advance
@all sorry didnt get u , can u plz explian with example how u came to this fact "Find the value of each row, as in 2^(n-1)*a[0][i]+2^(n-2)*a[1][i]+...+2^(0)*a[n][i]
then compare values and write rows which have difft values" & also u r saying to add row by but u r adding column by column as a[0][i]+a[1][i]...a[n][i]..please reply asap..
Thanks in advance
BinaryTree
{
int info ;
int *left ;
int *right ;
vector<int> rowNoVec; // Row Number it belongs to ,same path can belong to more than one row
};
Vector<BinaryTree> VectorofTree;
// We need to have vector of n trees root Number 1 to N.
//Working above data structure , leaf Node we have information how many row it belongs, we can delete them.
We can't solve this problem quickly than O(n * n), because we must at least review all digits.
My solution is create HashSet then do cycle for each row and calculate its hash if HashSet not contain current hash we write this row and add it to HashSet.
can you please elaborate please how to store the above data in HashSet, do you mean store hashes of each element?
if that's the case collisions might occur
Also constructing a trie if N is not very big is not so optimal I guess ?
How much is optimization important in interviews ? Anyone ?
Thanks
S
Constructing a TRIE will take O(log(n)) for every string in the average case and O(n) for the worst case.
So total time taken will be O(n*log(n)) in average case and O(n^2) for worst case
My bad. Looking up for a binary string in a TRIE will take O(n) time and not O(logn). So the whole operation will be O(n^2).
Here is my code for this..taking N=5 here for example
#include<stdio.h>
#define N 5
int main()
{
int a[N]={0,0,0,0,0},b[N]={-1,-1,-1,-1,-1},p,i,j;
int A[][N] = {
{1,1,0,0,1},
{1,1,0,0,1},
{1,0,0,0,1},
{1,1,0,0,1},
{1,0,0,0,0}
};
// a=(int *)malloc(5 * sizeof(int));
for(i=0;i<N;i++)
{for(j=N-1;j>=0;j--)
{if(A[i][j] == 1)
{a[i] += pow(2,(N-1)-(j));}
}//printf("%d\n",a[i]);
}
//b=(int *)malloc(5 * sizeof(int));
// b={-1,-1,-1,-1,-1};
for(i=0;i<N;i++)
{
j=a[i] % N;
p=0;
while(b[j] != -1)
{
if(b[j] != a[i])
j = (j+1)%N;
else
{
p=1;
break;
}
}
if(p!=1)
{
for(p=0;p<N;p++)
printf("%d ",A[i][p]);
b[j] = a[i];
printf("\n");
}
}
getch();
return 0;
}
Computing decimal equivalent takes O(n^2) time and so does hashing..
@abhi algorithm to rahul
he is computing the hash of each of the rows by converting them to decimal numbers in matrix and saving it a matrix a. He is using hash table array b. he is using hashing with linear probing to check if the the current row value is colliding in the hash table if its colliding he is ignoring that, if not he is printing the row.
If we know that all used integers will be represented on 5bits, than we can have only 2^5=32 possibilities. We can create an array like
boolean [] arr = new boolean[32];
Then go through the given set and change each binary string to integer i, and set
arr[i] = true;
Then you simply loop through the array, and print i (binary version) when
arr[i] == true
If changing binary string into int and reverse takes O(n), than this solution is O(n);
Here is a naive C++/STL style approach that assumes the input is a vector of vector of bools, and creates an output set using the help of a set. The runtime is O(n^2 * log n). The reason for the log n portion of because doing a find() on a set takes logarithmic time. This is not as efficient as using a trie.
// we output a collection of the unique rows from matrix
int getNewUniqueRows(vector<vector<bool> >& result,
const vector<vector<bool> >& matrix)
{
result.clear();
set<int> items;
for (vector<vector<bool> >::const_iterator mit = matrix.begin();
mit != matrix.end(); ++mit)
{
const vector<bool>& row = *mit;
int n = findNumber(row);
if (items.find(n) == items.end())
{
result.push_back(row); // note that we COPY the row into result...
items.insert(n);
}
}
return 0;
}
int findNumber(const vector<bool>& row)
{
int number = 0;
for (vector<bool>::const_iterator rit = row.begin();
rit != row.end(); ++rit)
{
number = 2*number;
if (*rit)
{
++number;
}
}
return number;
}
Here is my solution.
1) Read each row of the binary matrix and encode into a string (trivial job)
2) use bitset<N> bs(s), N is the number of rows, s is the string generated in the first step
3) int t=bs.to_ulong(), convert the binary string to int
4) map<int,int> m, m[t]=i; i the index of the row
5) for each row i, perform step 1->3), in step 4, if(m.find(t)==m.end()) m[t]=i;
6) In the end, read through the map and output each corresponding row
Based on the assumption that if two binary strings are the same, its corresponding number is the same and vice versa
#include<iostream>
#include<string>
using namespace std;
class row
{
public:
char str[10];
row * next;
};
class list
{
row* head;
public:
list(){head = NULL;}
void insert(char *input);
void display();
};
void list::insert(char *input)
{
if(head == NULL)
{
row* r = new row();
strcpy(r->str,input);
r->next = NULL;
head = r;
}
else
{
row* start = head;
while(start->next != NULL)
{
if(!strcmp(start->str,input))
return;
else
{
start = start->next;
}
}
if(!start->next)
{
row* r = new row();
strcpy(r->str,input);
r->next = NULL;
start->next = r;
}
}
}
void list::display()
{
if(head == NULL)
{
cout<<"INPUT ROWS YAAR"<<endl;
}
else{
row * start = head;
while(start!=NULL)
{
cout<<start->str<<endl;
start = start->next;
}
}
}
int main()
{
list matrix;
char input[10];
cout<<"INPUT YOUR ROWS"<<endl;
cout<<"TO STOP input x and hit ENTER"<<endl;
while(1)
{
cin.getline(input,10);
if(*input == 'x')break;
matrix.insert(input);
}
cout<<"OUTPUT IS"<<endl;
cout<<endl<<endl;
matrix.display();
system("pause");
return 0;
}
approach similar to singly linked list
instead of node, row is used.
at the time of insertion new character string is compared with already inserted strings
How about, counting the number of 1 bits sets in a row and also the sum of their indexes(whose value is set to 1)
for eg:
0 1 0 0 1
1 0 1 1 0
0 1 0 0 1
1 1 1 0 0
in the above matrix, assume the index starts from 1(instead of 0)
so for the first row, if we sum it up all the 1's we get Count of 2, then if we sum their indexes separately then we get 2+5 = 7
So calculate the two sum values for each row. now the duplicate row will have their two sum values identical.
Here is the step by step calculation:
Row 1 , Count of 1 bit = 2, Index sum = (2+ 5) = 7
Row 2, Count of 1 bit = 3, Index sum = (1+3+4) = 8
Row 3, Count of 1 bit = 2, Index sum = ( 2+5) = 7
Row 4, Count of 1 bit = 3, Index sum = (1+2+3) = 6
From the above results, Row 1 and Row 3 has Identical values so we can consider those two rows are duplicate.
Correct me if i am wrong...
TRIE data structure can work fine for this problem!!!
- Anonymous June 09, 2011