Microsoft Interview Question
Software Engineer InternsCountry: United States
Interview Type: In-Person
Could you please explain your code a bit in exactly how the head and tail are coming into play?
@sid
The head and tail are coming into picture to move all the unique chars to one side.
If you see, at the end of method, chars till tail are being copied and returned.
I think there is a bug in the above code
try
var chars = "ABCEFGABCXYZ".ToCharArray();
removeDuplicates(chars);
public void removeDuplicate(char[] input){
HashMap hm = new HashMap();
int len = input.length;
for(int i = 0; i<len && input[i]!='\u0000';i++){
if(hm.get(input[i]) == null)
hm.put(input[i], i);
else{
for(int j = i;j<len-1;j++){
input[j]=input[j+1];
input[j+1] = '\u0000';
}
i--;
}
}
System.out.println(input);
}
Following code will remove duplicates with no extra space (When ported this solution to c++) in O(N)
class Program
{
static void Main(string[] args)
{
string input = "TestStringhavngm$$$2@@repeatedCharactes ////9(read))";
DuplicateHandler handler = new DuplicateHandler();
Console.WriteLine(handler.RemoveDuplicate(input));
}
}
class DuplicateHandler
{
bool[] charMap = new bool[256];
public DuplicateHandler() {
for (int i = 0; i < 256; i++)
{
charMap[i] = false;
}
}
//This is just to support printable ascii chars
int uniqueCharsRemaining = 94;
private bool isCharFound(char character)
{
var result = true;
var charAsciiValue = (int)character;
if (charAsciiValue < 32 && charAsciiValue > 126)
{
Console.WriteLine("Unsupported chars : {0} ", character);
return true;
}
if (charMap[charAsciiValue] == false)
{
charMap[charAsciiValue] = true;
uniqueCharsRemaining--;
result = false;
}
return result;
}
public string RemoveDuplicate(string input)
{
int rearPtr = input.Length - 1;
char[] inputChars = input.ToCharArray();
var i = 0;
for (; i < inputChars.Length && uniqueCharsRemaining > 0 && rearPtr > i; i++)
{
if (isCharFound(inputChars[i]))
{
while (isCharFound(inputChars[rearPtr]) && rearPtr > i)
{
rearPtr--;
}
if (rearPtr != i)
{
inputChars[i] = inputChars[rearPtr];
}
}
}
return new String(inputChars, 0, --i);
}
}
@Rajasekar,
The solution will not work for a string with only 1 char or 2 repeating chars.
This may fix it:
public string RemoveDuplicate(string input)
{
if (input.Length == 1)
return input;
int rearPtr = input.Length - 1;
char[] inputChars = input.ToCharArray();
var i = 0;
for (; i < inputChars.Length && uniqueCharsRemaining > 0 && rearPtr >= i; i++)
{
if (isCharFound(inputChars[i]))
{
while (isCharFound(inputChars[rearPtr]) && rearPtr >= i)
{
rearPtr--;
}
if (rearPtr != i)
{
inputChars[i] = inputChars[rearPtr];
}
}
}
return new String(inputChars, 0, --i);
}
The code below will remove duplicates from the String array without using extra space in O(n^2) .
1. For each character, check if it is a duplicate of already found characters.
2. Skip duplicate characters and update the non duplicate characters.
package String;
public class RemoveDupciatesInAString {
public static void main(String[] args) {
String s = "Shuhail";
// char[] s = "{'s','h'm";
char[] st = {'s','h','u','h','a','s'};
removeDuplicates(st);
for(char t : st)
System.out.print(t);
// System.out.println(s);
}
public static void removeDuplicates(char[] str) {
if (str == null) return;
int len = str.length;
if (len < 2) return;
int tail = 1;
for (int i = 1; i < len; ++i) {
int j;
for (j = 0; j < tail; ++j) {
if (str[i] == str[j]) break;
}
if (j == tail) {
str[tail] = str[i];
++tail;
}
}
str[tail] = 0;
}
}
Based on my interview experience with Microsoft, using some sort of hash should be allowed. The removal of dupes should be in place though. So, here's C++ solution:
void RemoveDups_Cpp(char *s)
{
bool fBegin = true;
int hash[256];
if (s)
{
// initialize hash
for (int i = 0; i < 256; hash[i++] = 0);
char *src = s;
char *dst = s;
while (*dst)
{
if (hash[*dst] == 0)
{
hash[*dst] += 1;
src++;
dst++;
}
else
{
hash[*dst] += 1;
dst++;
}
*src = *dst;
}
*src = '\0';
printf("Modified string: %s\n", s);
printf("Removed dups: ");
bool fBegin = true;
for (int i = 0; i < 256; i++)
{
if (hash[i] > 1)
{
if (fBegin)
{
printf("%c", i);
fBegin = false;
}
else
{
printf(", %c", i);
}
}
}
}
}
Sort array lexicographically with mergesort for O(nlogn) then remove duplicate neighbors for O(n).
Pseudo Code:
for(i=1 to strlen(str))
{
if(str[i]!=str[0])
{
for(j=i+1 to strlen(str)
{
if(str[i]==str[j])
str[j]=str[0]; //basically a marker so that we know the indices containing duplicate characters.
}
}
}
int i=0.j=1;
for(i=1;i<strlen(str);i++)
{
if(str[i]!=str[0])
{
str[j]=str[i];
j=i+1;
}
}
Time Complexity:O(n*n)
public class App {
public static void main(final String args[]) {
char[] input = "Samit Saaaaasan".toCharArray();
removeDuplicates(input);
System.out.println(input);
}
public static void removeDuplicates(char[] str) {
boolean inc = true;
int len = str.length;
for (int i = 1; i < len; ) {
inc = true;
for (int j = 0; j < i; ++j) {
if (str[i] == str[j]) {
str[i] = str[len - 1];
str[len - 1] = 0;
--len;
inc = false;
break;
}
}
if (inc)
++i;
}
}
}
//Assuming charachter array will contain only alphabets.
public class Char_Arr {
public static void main(String[] args) {
// TODO Auto-generated method stub
boolean[] arr = new boolean[51];
char[] char_arr = new char[]{'A','B','c','d','d','Z'};
for(int i = 0; i < char_arr.length; i ++){
int ascii = (int) char_arr[i] - 65;
arr[ascii] = true;
}
for(int m = 0; m < arr.length ; m++){
if(arr[m]==true){
System.out.println(Character.toChars(m+65));
}
}
}
}
Is this code good?
public void RemoveDuplicates()
{
string str = "abcdabcdabcdaaaaaaaaaaaaaaaaaaaaaaaaaaaadddddddddddddddddddddddddddddddeeeeeeeeeeeeeee";
HashSet<char> hs = new HashSet<char>();
foreach (char c in str)
{
if (hs.Contains(c))
{
continue;
}
else
{
hs.Add(c);
}
}
foreach (var item in hs)
{
Console.Write(item);
}
}
C# not Java
void RemoveDupChars2(){
var chars = new char[100]; // "ABCDEFGABUCXYZFUWR".ToCharArray();
for(int i=0;i<chars.Length;i++){
chars[i] = (char)( new Random(i).Next((int)'A',((int)'Z')+1));
}
PrintArray(chars,chars.Length);
int[] indexes = new int[27];
for(int i=0;i<indexes.Length;i++) indexes[i] = -1;
for(var i=0;i<chars.Length;i++){
int index = (int)chars[i] - (int)'A';
if(indexes[index]>=0) continue;
indexes[index] = i;
}
indexes = indexes.OrderBy(f => f).ToArray();
int putCharAt = 0;
for(var i =0;i<indexes.Length;i++){
if(indexes[i] < 0) continue;
chars[putCharAt++]=chars[indexes[i]];
}
Console.WriteLine();
PrintArray(chars,putCharAt);
}
Invariant: all characters before i have no duplicates and all characters after j are not yet checked and all characters between i and j are duplicates.
Hash is to identify if a char happens more than once (maintain count) and when it is copied set a marked -1 to identify it is already copied and any more insttances of it are duplicates.
int removeDuplicates(char *input, int n) {
int hash[256] = {0};
for (int i=0;i<n;i++) {
hash[input[i]]++;
}
int i=0;
int j=0;
while (j<n) {
if (hash[input[j]] != -1) {
hash[input[j]] = -1;
swap(input,i,j);
i++;
j++;
} else
j++;
}
input[i] = '\0';
return i;
}
void Test_removeDuplicates() {
//char input[] = {'a','p','p','l','e','e','s'};
char input[1024];
strcpy(input,"ABCEFGABCXYZ");
std::cout<<input<<"\n";
int s = sizeof(input)/sizeof(char);
removeDuplicates(input,s);
std::cout<<input<<"\n";
}
If only 255 characters are allowed, I'd like to implement it like this. In C++, vector<bool> is implemented using bitmask. So, additional space will be 32 bytes + vector overhead. I think there is similar data structure in Java.
void remove_dup(string& str) {
if (str.length() > 0) {
vector<bool> exist(256, false);
int next = 0; // next position to move a unique char
for (int i = 0; i < str.length(); ++i) {
if (!exist[str[i]]) {
exist[str[i]] = true;
// in-place move
str[next++] = str[i];
}
}
str.resize(next);
}
}
if time complexity is not very important, how about using a variant of insertion sort. Please find the code below.
public static void main(String[] args) {
char[] input = new String("samit sasan").toCharArray();
int uniqueIdx = 0;
for(int i=1;i<input.length;i++)
{
boolean isFound = false;
for(int j=0;j<=uniqueIdx;j++)
{
if(input[j] == input[i]){
isFound =true;
break;
}
}
if(!isFound)
{
uniqueIdx++;
char temp = input[uniqueIdx];
input[uniqueIdx] = input[i];
input[i] = temp;
}
}
for(int k=0;k<=uniqueIdx;k++)
{
System.out.println(input[k]);
}
}
This code copies the non-repeated characters to the start of the original array. O(n) time and space complexity.
public char[] removeDuplicates(char[] data) {
int tail = 0;
for(int i = 0; i < data.length; i++) {
int j = 0;
for(j = 0; j < tail; j++) {
if(data[i] == data[j])
break;
}
if(j == tail) {
data[tail++] = data[i];
}
}
return Arrays.copyOfRange(data, 0, tail);
}
O(1) space complexity, O(n^2) time comlexity;
void removeDup(char array[], int &size){
for (int a = 0; a < size;a++){
char chr = array[a];
bool found = 0;
for (int b = a; b < size;b++){
if (found && array[b] == chr){
for (int c = b; c < size-1; array[c] = array[c+1], c++);
array[size-1] = '/0';
size--;
b--;
}
else if (array[b] == chr)
found = 1;
else continue;
}
}
}
public static String removeChars(String str,String remove){
char[] s = str.toCharArray();
char[] r = remove.toCharArray();
int src,dest=0;
boolean[] flags = new boolean[128];
for(src=0;src<r.length;++src){
flag[r[src]]=true;
}
for(src=0;src<s.length;++src){
if(!flag[src]){
s[dst++]=s[src];
}
}
return new String(s,0,dst);
}
public static char[] RemoveDuplicates(char[] array)
{
var swapped = true;
var totalLength = array.Length;
while (swapped)
{
swapped = false;
for (int i = 1; i < totalLength; i++)
{
if (array[i - 1] == array[i])
array[i] = '\u0000';
if (array[i - 1] < array[i])
{
array[i - 1] ^= array[i];
array[i] ^= array[i - 1];
array[i - 1] ^= array[i];
swapped = true;
}
}
}
return array;
}
O(n^2) time complexity and O(1) space complexity!
public static void main(String[] args) {
// TODO Auto-generated method stub
String input = new String("abcadbedaeasfeadegeadc sfews dsjj");
char inputArray[] = input.toCharArray();
int len = inputArray.length;
for(int i=0;i<len-1;i++){
for(int j=i+1;j<len;j++){
if(inputArray[i] == inputArray[j]){
inputArray[j] = inputArray[len-1];
len--;
j--;
}
}
}
for(int i=0;i<len;i++){
System.out.print(inputArray[i]);
}
}
{
char[] arr = new char[] { 'a', 'c', 'c', 'a', 'b', 'a' };
List<char> li = new List<char>();
Dictionary<char, char> dict = new Dictionary<char, char>();
for (int i = 0; i < arr.Length; i++)
{
if(!li.Contains(arr[i]))
{
li.Add(arr[i]);
}
}
Console.WriteLine(li.Count);
Console.ReadLine();
}
{
char[] arr = new char[] { 'a', 'c', 'c', 'a', 'b', 'a' };
List<char> li = new List<char>();
Dictionary<char, char> dict = new Dictionary<char, char>();
for (int i = 0; i < arr.Length; i++)
{
if(!li.Contains(arr[i]))
{
li.Add(arr[i]);
}
}
Console.WriteLine(li.Count);
Console.ReadLine();
}
Here is my code in C#
public static void RemoveDupChars(ref char[] str)
{
bool[] found = new bool[256];
int tailIndex = 0;
for (int index = 0; index < str.GetUpperBound(0); index++)
{
if (found[str[index]])
{
continue;
}
found[str[index]] = true;
if (index > tailIndex)
{
str[tailIndex] = str[index];
}
tailIndex++;
}
Array.Resize(ref str, tailIndex);
}
As the array is very large so we can assume that array has more space than range of its charset i.e [a-z] or [A-Z] or both
1) for only printing the non-repeating elements -
/*for simplicity consider only chars in set [A-Z]*/
void print(char in[],int n)
{
for(int i=0;i<n;i++) {
int pos = (abs(in[i])-'A')%26;
if(in[pos] > 0 ) {
cout<<(char)(abs(in[i]))<<" ";
in[pos] *= -1;
}
}
}
2) For actually removing duplicates from array and return array with non-duplicate elements -
Can somebody point out how can we do in place replacement to remove duplicates?
If we assume char only from 'a' to 'z' then we can achieve it with only one int variable using bit shifts.
int checker = 0, asciValue;
int len = abc.length;
for (int i = 0; i < len; i++)
{
asciValue = (int) abc[i];
if ((checker & (1 << asciValue)) > 0)
{
for (int j = i; j < len - 1; j++)
{
abc[j] = abc[j + 1];
}
abc[len - 1] = '\u0000';
len -= 1;
i--;
} else
{
checker |= (1 << asciValue);
}
}
System.out.println(abc);
}
char* removeDuplicate(char* str, char c) {
// copy str[j] into str[i]
int i=1,j=1;
// empty string should not be processed
if(!str[0]) return str;
while(str[i]) {
/** str[j] is the character intended to
be copied to str[i], unless str[j] and
str[i-1] are both equal to c (duplicate).
In case of a duplicate, just increment j
without incrementing i. **/
if(str[j] == c && str[i-1] == c) {
j++;
continue; }
if(j != i) str[i] = str[j];
i++;
j++;
}
return str; }
char* removeDuplicate(char* str, char c) {
// copy str[j] into str[i]
int i=1,j=1;
// empty string should not be processed
if(!str[0]) return str;
while(str[i]) {
/** str[j] is the character intended to
be copied to str[i], unless str[j] and
str[i-1] are both equal to c (duplicate).
In case of a duplicate, just increment j
without incrementing i. **/
if(str[j] == c && str[i-1] == c) {
j++;
continue; }
if(j != i) str[i] = str[j];
i++;
j++;
}
return str; }
static void Main(string[] args)
{
string value = "abcdabcdabcdaaaaaaaaaaaaaaaaaaaaaaaaaaaadddddddddddddddddddddddddddddddeeeeeeeeeeeeeee";
char[] array = value.ToCharArray();
string valuefinal = "";
Array.Sort(array);
int temp = 0;
valuefinal = array[0].ToString();
for (int i = 1; i < array.Length; i++)
{
if (array[i].ToString() == array[i-1].ToString())
{
continue;
}
else
{
temp = i;
valuefinal += array[i];
}
i = temp;
}
Console.Write(valuefinal);
Console.Read();
}
Output : abcde
O(n) complexity. Uses extra boolean array.
- GK October 25, 2014