Microsoft Interview Question
SDE1sCountry: United States
Interview Type: Phone Interview
Another way for creating function Split_fn is:
CREATE FUNCTION Split_fn
( @string varchar(max),
@delimiter char(1) )
RETURNS TABLE
AS
RETURN
(
WITH tokens(p, a, b)
AS
( SELECT 1, CAST(1 AS bigint),
CHARINDEX(@delimiter, @string)
UNION ALL
SELECT p + 1, b + 1,
CHARINDEX(@delimiter, @string, b + 1)
FROM tokens
WHERE b > 0 )
SELECT p AS ItemIndex,
SUBSTRING(@string, a,
CASE WHEN b > 0 THEN b-a ELSE LEN(@string) END)
AS Items
FROM tokens
);
GO
int findDuplicates(string list)
{
IDictionary<char, int> kvp = new Dictionary<char,int>();
int counter = 0;
foreach(char character in list.split(","))
{
if(kvp.get(character)!=null)
{
counter++;
}
else
{
kvp.add(character,1);
}
}
return counter;
}
void findDuplicateInStrings(string[] strings)
{
foreach(string s in strings)
{
System.Console.Writeline(findDuplicates(s));
}
}
alter function getDups (@a varchar(20), @b int)
returns @t table
(
a char(2)
, rowss int
)
as
begin
declare @tempTable TABLE (a char(2))
while @b <= (Select len(@a))
begin
insert into @tempTable
select SUBSTRING(@a, @b, 1)
set @b += 1
end
insert into @t
select *, DENSE_RANK() OVER (PARTITION BY a ORDER BY a) as rowss from @tempTable
return
end
-- Execute function by passing desired string to identify number of duplicates ...
select a, (sum(rowss) - 1) as numberOfDups from getDups('AABBBCCCCCC', 1)
group by a, rowss
having count(a) > 1
To get no of duplicate from a table userTab(id,name):
select sum(duplicate) from
(select id, (count(id)-1) as duplicate
from userTab
group by id
having count(id)>1);
TestData:
create table userTab(id number,salary number)
Insert into "userTab" (ID,SALARY) values (1,1000);
Insert into "userTab" (ID,SALARY) values (1,1000);
Insert into "userTab" (ID,SALARY) values (1,1000);
Insert into "userTab" (ID,SALARY) values (2,2000);
Insert into "userTab" (ID,SALARY) values (3,3000);
Insert into "userTab" (ID,SALARY) values (4,4000);
Insert into "userTab" (ID,SALARY) values (4,4000);
Insert into "userTab" (ID,SALARY) values (5,5000);
Insert into "userTab" (ID,SALARY) values (6,6000);
Insert into "userTab" (ID,SALARY) values (6,6000);
Insert into "userTab" (ID,SALARY) values (6,6000);
Insert into "userTab" (ID,SALARY) values (1,1000);
Insert into "userTab" (ID,SALARY) values (1,1000);
table name: tab
Columns:Id,val1,val2,val3,val4,val5
1,a,b,c,a,b
2,a,b,a,a,a
3,c,d,c
create table #tab
(
id int,
val1 varchar(1),
val2 varchar(1),
val3 varchar(1),
val4 varchar(1),
val5 varchar(1)
);
insert into #tab values(1,'a','b','c','a','b')
insert into #tab values(2,'a','b','a','a','a')
insert into #tab values(3,'c','d','c',null,null)
Select id,count(*)-COUNT(distinct val) from
(
Select id,val
from #tab
unpivot(val for valtype in (val1,val2,val3,val4,val5)) unpvt) a
group by id
create table #tab
(
id int,
val1 varchar(1),
val2 varchar(1),
val3 varchar(1),
val4 varchar(1),
val5 varchar(1)
);
insert into #tab values(1,'a','b','c','a','b')
insert into #tab values(2,'a','b','a','a','a')
insert into #tab values(3,'c','d','c',null,null)
Select id,count(*)-COUNT(distinct val) from
(
Select id,val
from #tab
unpivot(val for valtype in (val1,val2,val3,val4,val5)) unpvt) a
group by id
create table #tab
(
id int,
val1 varchar(1),
val2 varchar(1),
val3 varchar(1),
val4 varchar(1),
val5 varchar(1)
);
insert into #tab values(1,'a','b','c','a','b')
insert into #tab values(2,'a','b','a','a','a')
insert into #tab values(3,'c','d','c',null,null)
Select id,count(*)-count(distinct(val)) from
(Select id,val
from #tab
unpivot(val for valtype in (val1,val2,val3,val4,val5)) unpvt) a
group by id
- Assume we don't have strings longer than 256 symbols (actually doesn't matter, we can process any string. Just for performance).
- Lets 'duplicates' is our input table having just a single column str.
Here is a solution for oracle 12 (should work for ms sql. Just remove 'from dual')
WITH
-- let's add some row number to our input table
nrows (id , symb) AS
(
SELECT row_number() over(order by str) AS id, str symb
FROM duplicates
),
-- generate a sequence of numbers: 1, 3, 5...
-- these are char positions of our letters
numbs (id) AS
(
SELECT 1 AS id
FROM dual
UNION ALL
SELECT id + 2 AS id
FROM numbs
WHERE
id < 256
),
-- convert strings to rows
pvt_rows (id, sym) AS
(
SELECT r.id, SUBSTR(r.symb,n.id, 1)
FROM nrows r
INNER JOIN numbs n ON LENGTH(r.symb) >= n.id
),
-- count duplicates of each symbol
preagg(id, sym, cnt) AS
(
SELECT id, sym, COUNT(sym) - 1
FROM pvt_rows
GROUP BY id, sym
ORDER BY id
)
-- final select to output the result
SELECT
id, SUM(cnt) as dups_cnt
FROM preagg
GROUP BY id;
select a,val1,count(val1) as duplicates from
(
SELECT a,
regexp_substr(b,'[^,]+',1,1) as b1,
regexp_substr(b,'[^,]+',1,2) as c,
regexp_substr(b,'[^,]+',1,3) as d,
regexp_substr(b,'[^,]+',1,4) as e,
regexp_substr(b,'[^,]+',1,5) as f
FROM A
)
UNPIVOT
(
val1
for val2 in (b1,c,d,e,f)
)
group by a,val1 having count(val1)>1
order by A;
class Duplicate {
private static int getDuplicates(char arr[]){
int x = 1,y = 0,counter=0;
for (int i=0;i<arr.length;i++){
//To get the number in the range 0-26
//Assuming only capital letters are in the array, we are subtracting by 65
//65 is the ascii value of 'A'
int num = arr[i] - 65;
//Shifting the 1 to as many bits as the value of "num"
x=x<<num;
//Here the particular bt is being added to 'y' by the OR method
if((x & y) == 0 ){
y = x | y;
}
else{
//means there was a duplicate character
counter ++;
}
//because we need to again do the same thing with 'x'. So restoring to the previous state
x=1;
}
return counter;
}
public static void main(String args[]){
char arr[] = {'A','B','C','A','B'};
char arr1[] = {'A','B','A','A','A'};
char arr2[] = {'C','D','C'};
System.out.println(getDuplicates(arr));
System.out.println(getDuplicates(arr1));
System.out.println(getDuplicates(arr2));
}
}
//Just make use of bit shifting. In an integer just make those bits 1 that corresponding to the letter 'A or 'B. And check if a char is a duplicate if the bit for that in the integer is 1 already.
Time O(n)
Space O(1)
public int calcDup(List<String> aListofStringArray)
{
int counter = 0;
for(String aRow : aListOfStringArray)
{
String[] tempArr = aRow.split(",");
Set<String> aSet = Sets.newHashSet();
for(String temp : tempArr )
{
if(aSet.contains(temp))
{
aSet.remove(temp);
counter++;
}
else
{
aSet.add(temp);
}
}
}
return counter;
}
Besides that trivial approach where we can keep an array of 26 letters, there is one thing which we can do is to re-use the provided array.
i.e.,
Case 2: A, B, A, A, A
Could become 3, B, A, A, A as we parse through the array or we can replace A (or the character we are counting in the loop) with any random character
//pseudo code
while N
index = index of first legit character
char = char we are looping at
count of that char = 0
for all array
count ++ if char = array[current_id]
push in map (char, count)
At the end of iterations we will have a map with all characters with their respective count
The array will have all wild_character
package com.careercup;
import java.util.HashMap;
import java.util.Map;
public class HelloWorld {
public static void main(String[] args) {
HashMap<String, Integer> map = new HashMap<String, Integer>();
//for convenience, I am taking space separated input from command line rather comma separated
for (int i = 0; i < args.length; i++) {
String ch = args[i];
Integer val = map.get(ch);
if (val == null){
map.put (ch, new Integer(1));
}
else {
map.put (ch, new Integer(val.intValue() + 1));
}
}
int charRepeated = 0;
for (Map.Entry<String, Integer> entry : map.entrySet()){
charRepeated += entry.getValue().intValue() - 1;
}
System.out.println ("Repeated Characters: " + charRepeated);
}
}
class Program
{
static int CalcDuplicates(string s)
{
char[] arr = s.ToArray();
int index = 0, counter = 0;
foreach (char c in s)
{
index++;
if (c == ' ') continue;
for (int i = index; i < s.Length; i++)
{
if (c == arr[i])
{
arr[i] = ' ';
counter++;
}
}
}
return counter;
}
static void Main(string[] args)
{
Console.WriteLine(CalcDuplicates("ABCAB").ToString());
Console.WriteLine(CalcDuplicates("ABAAA").ToString());
Console.WriteLine(CalcDuplicates("CDC").ToString());
}
I propose my solution:
Function that split the comma separated items and returns them as table rows.
Procedure that calls the function and adds the items into a table as separate rows with identifier.
Execution of the procedure:
Query that counts occurrences of items and returns those that are repeated more than once.
Finally, the query that sums duplicates by identifiers.
The result is:
- cvetygeorg August 18, 2014ListID SumOfDuplicates
----------- ---------------
1 2
2 3
3 1