Amazon Interview Question
Software Engineer in TestsCountry: India
Interview Type: In-Person
It should be
DELETE FROM CTE WHERE TAB_ROW<>1
otherwise only the second entry will be deleted and not the thirs one like in case of 3 5
//Approach 1
1. check whether the data is huge, ie if the size of the input file more than the JVM heap size,
split the input into chunks , which can fit into memory
2. Sort the data in the chunk using merge sort (optional)
3. use the code to remove the duplicates.
public void approach1()
{
try {
BufferedReader br=new BufferedReader(new FileReader(new File("filepath")));
try {
Set<String> set=new HashSet<String>();
String line=null;
while((line=br.readLine())!=null)
{
set.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
- Create a mapping between entry (string?) and int
- Go through the list and for each increment the int by 1 so at the end you have a count of everything
- Go through the mapping you made and for each if there is only 1, print it out, if there is more than 1 print it out the number in the mapping minus 1
public string[] filter(string[] everything)
{
if (everything == null || everything.length < 2) { return everything; }
HashMap<string,int> counts = new HashMap<string,int>();
for (int i = 0 ; i < everything.length; i++)
{
if (counts.containsKey(everything[i]))
{
counts.put(everything[i],counts.get(everything[i]) + 1);
}
else {
counts.put(everything[i], 1);
}
}
List<string> finalList = new ArrayList<string>();
for (Map.Entry<string, int> entry : map.entrySet()) {
int times = entry.getValue();
if (times == 1) {
finalList.add(entry.getKey());
}
else {
for (int i = 0; i < times - 1; i++)
{
finalList.add(entry.getKey());
}
}
}
string[] arrayFinal = new string[finalList.size()];
finalList.toArray(arrayFinal);
return arrayFinal;
}
Is this problem driven by any real world problem? I don't understand the requirement of deleting just one duplicate record.
First of all why n the sample output we have
3 5
3 5
I guess there should be only one of them.
This is classic duplicate removal algorithm with a little tweak.
Use a HashMap ..
Iteration over the relational structure (2D array in fact)
1. Create key as "col1+col2+..+coln" - a string
2. Check if the key is already there in the map
3. If yes - delete the current row (this can be done in multiple way - first step could be just marking e.g keeping track of the row to be deleted or setting all values as null.
in second second actually remove that row by not coping such a marked row)
4. if No - add this key in the map
5. do same till all rows are visited.
Then remove the marked rows as mentioned above as second step.
Some more simplified...
import java.io.*;
import java.util.Arrays;
public class Task3 {
public static void main(String args[])
{
String m[] = {"13","23","13","24","23", "13", "24","24"};
String n[] = new String[10];
n[0] = "0";
Arrays.sort(m);
for (int i = 0; i <m.length-1; i++)
{
if ((m[i] == m[i+1]) )
{
n[i] = m[i]; //copy the elements if the next element is same as previous!
//System.out.println("duplicate item "+m[i+1]+" at Location"+(i+1) );
}
//taking the last element!
if(i+1 == m.length)
{
n[i+1]=m[i+1];
}
continue;
}
//printing the elements which removes one duplicate element!
for(int j=0;j<n.length;j++)
{
if(n[j]!=null)
{
System.out.println(n[j]);
}
}
}
}
SELECT "a1", "a2"
FROM TableName
GROUP BY "a1", "a2"
HAVING COUNT(*) > 1 UNION
SELECT "a1", "a2"
FROM TableName
GROUP BY "a1", "a2"
HAVING COUNT(*) = 1
Nope. The group by acts like a 'distinct' operator and you will only get back 1 row for each. Having 2 queries is redundant since your are really doing the same thing whether or not count == 1 or count > 1.
This is pretty much what you wrote, which is note correct.
SELECT a1, a2
FROM tx
GROUP BY a1, a2
public static void removeDuplicate() {
String[] elem = { "1 3", "1 3", "2 4", "3 5", "3 5", "3 5", "1 1",
"1 1", "1 1", "1 1" };
Map<String, Integer> map = new HashMap<String, Integer>();
for (String str : elem) {
if (map.containsKey(str))
map.put(str, map.get(str) + 1);
else
map.put(str, 1);
}
for (String str : elem) {
if (map.containsKey(str)) {
if (map.get(str) == 2) {
System.out.println(str);
map.put(str, null);
} else {
System.out.println(str);
map.put(str, map.get(str) - 1);
}
}
}
}
there was one mistake in able code
here is the working code
public static void removeDuplicate() {
String[] elem = { "1 3", "1 3", "2 4", "3 5", "3 5", "3 5", "1 1",
"1 1", "1 1", "1 1" };
Map<String, Integer> map = new HashMap<String, Integer>();
for (String str : elem) {
if (map.containsKey(str))
map.put(str, map.get(str) + 1);
else
map.put(str, 1);
}
for (String str : elem) {
if (map.containsKey(str)) {
if (map.get(str) == 2) {
System.out.println(str);
map.remove(str);
} else {
System.out.println(str);
map.put(str, map.get(str) - 1);
}
}
}
}
String[] inputs={"1 3","1 3","2 4","3 5","3 5","3 5","1 1","1 1","1 1","1 1"};
Set<String> deletedElements=new HashSet<String>();
for(int i=1;i<inputs.length;i++)
{
for(int j=i-1;j>=0;j--)
{
if(inputs[i]!=null&&inputs[i].equals(inputs[j])&&!deletedElements.contains(inputs[j]))
{
for(int k=j;k<inputs.length-1;k++)
{
inputs[k]=inputs[k+1];
if(k+1==inputs.length-1)
inputs[k+1]=null;
}
deletedElements.add(inputs[j]);
}
}
}
You want to count the rows and restart the count everytime the rows change.
Filter out tgt==2 because you will only have tgt == 2 when you have a row duplicated at least once.
in oracle:
SELECT A1,A2
FROM
(
SELECT
A1
, A2
,ROW_NUMBER() OVER (PARTITION BY A1,A2 ORDER BY A1,A2) tgt
FROM TX
)
WHERE tgt <> 2
public class RemoveOneDuplicate {
public static void main(String[] args) {
String[] elem={"1 3","1 3","2 4","3 5","3 5","3 5","1 1","1 1","1 1","1 1"};
List<String> al=new ArrayList<String>(); // to get the final string
al.addAll(Arrays.asList(elem));
System.out.println("Original array list: "+al);
HashMap<String,Integer> hm=new HashMap<String, Integer>();
for(int j=0;j<elem.length;j++)
{
if(hm.containsKey(elem[j]))
{
int k=hm.get(elem[j]);
k=k+1;
hm.remove(elem[j]);
hm.put(elem[j],k);
}
else
{
hm.put(elem[j], 1);
}
}
Set s=hm.entrySet();
Iterator it=s.iterator();
while(it.hasNext())
{
Map.Entry pairs=(Map.Entry)it.next();
//System.out.println(pairs.getKey()+"----------------"+pairs.getValue());
int n=((Integer) pairs.getValue()).intValue();
if(n >= 2)
{
n--;
pairs.setValue(n);
al.remove((String) pairs.getKey());
}
//System.out.println("### "+s);
}
System.out.println("Hashmap : "+hm);
Object obj[]=al.toArray();
System.out.println("FINAL STRING"+Arrays.toString(obj));
}
}
SELECT S.a1, S.a2 FROM (SELECT a1, rank FROM (SELECT a1, @rank:=@rank+1 AS rank FROM table, (SELECT @rank:=0)r AS R) AS S) JOIN (SELECT a1, rank FROM (SELECT a1, @rank:=@rank+1 AS rank FROM table, (SELECT @rank:=0)r AS U) AS T) ON S.rank = T.rank+1 AND S.a1 = T.a1 AND S.a2=T.a2 UNION SELECT a1, a2 FROM table GROUP BY a1, a2 HAVING COUNT(*)=1
---------------------------------
The idea is first put a rank id number for each row:
rank a1, a2
1, 1, 3
2, 1, 3
3, 2, 4
......
Then self join the table ON table1.rank=table2.rank+1, table1.a1 = table2.a1, table1.a2 = table2.a2. This will just remove 1 duplicate for those (a1, a2) with more than 1 count.
Finally, Union the above results with rows only contain no duplicate pairs (a1, a2)
Try this code.It removes one duplicate exactly
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class Sample {
public static void main(String[] args) {
String[] elem={"1 3","1 3","2 4","3 5","3 5","3 5","1 1","1 1","1 1","1 1"};
int ml=1;
HashMap<String,Integer> hm=new HashMap<String, Integer>();
for(int j=0;j<elem.length;j++)
{
if(hm.containsKey(elem[j]))
{
System.out.println("inside for loop"+elem[j]);
int k= hm.get(elem[j]);
k=k+1;
hm.remove(elem[j]);
hm.put(elem[j],k);
System.out.println("inside for count of "+elem[j]+" :"+k);
}
else
{
hm.put(elem[j], 1);
System.out.println("inside for loop else"+elem[j]);
}
}
Iterator it=hm.entrySet().iterator();
while(it.hasNext())
{
Map.Entry pairs=(Map.Entry)it.next();
int n=Integer.parseInt(pairs.getValue().toString());
System.out.println(pairs.getKey());
for(int k=2;k<n;k++)
System.out.println(pairs.getKey());
it.remove();
}
}
}
create table A (a1 int,a2 int)
- Ayushi Ag June 08, 2014insert into A values(1,3),(1,3),(2,4),(3,5),(3,5),(3,5);
WITH CTE AS(
SELECT *,ROW_NUMBER() OVER(PARTITION BY A1 ORDER BY A2 DESC) AS TAB_ROW
FROM A)
DELETE FROM CTE WHERE TAB_ROW=2