Microsoft Interview Question
Senior Software Development EngineersCountry: United States
Interview Type: In-Person
def match_advance(ri, regexp, si, string)
rend = ri == regexp.length
send = si == string.length
if rend && send then return true end
if rend && !send then return false end
rc = regexp[ri].chr
if rc == "*" then
return match_advance(ri+1, regexp, si, string) || # assume regexp wasn't matching
match_advance(ri+1, regexp, si+1, string) # assume regexp did match
else
if send then return false end
sc = string[si].chr
if rc != sc then return false
else return match_advance(ri+1, regexp, si+1, string) end
end
end
def reg(regexp, string)
return match_advance(0, regexp, 0, string)
end
puts reg("a*b", "acb")
puts reg("abc*", "abbc")
puts reg("**bc", "bc")
puts reg("ba**", "badfg")
puts reg("ba**", "badfg")
puts reg("abc", "abcdef")
puts reg("abcdef", "abc")
puts reg("", "")
public static boolean matches(String str, String pat) {
int strLen = str.length();
int patLen = pat.length();
int pati = 0, stri = 0;
while (pati < patLen || stri < strLen) {
if (pati + 1 < patLen && pat.charAt(pati + 1) == '*') {
while (stri < strLen && str.charAt(stri) == pat.charAt(pati))
stri++;
pati += 2;
} else if (stri < strLen && pati < patLen
&& str.charAt(stri) == pat.charAt(pati)) {
stri++;
pati++;
} else
return false;
}
return true;
}
Here is an iterative solution. Complexity O(n)
Algorithm:
1. If the pattern starts with '*', start matching from the back until you hit the '*' and return result
2. If the pattern ends with '*' start matching from the beginning until you hit the * and return result
3. Perform 1 and 2 simultaneously and return result.
package stringalgos;
public class MyRegex {
public static boolean matchPattern(char[] pattern, char[] input) {
if (pattern.length == 0 || input.length == 0) {
return false;
}
if (pattern[0] == '*') {
return matchFromBack(pattern, input);
} else if (pattern[pattern.length - 1] == '*') {
return matchFromBeginning(pattern, input);
} else {
return (matchFromBeginning(pattern, input) && matchFromBack(
pattern, input));
}
}
public static boolean matchFromBeginning(char[] pattern, char[] input) {
boolean result = true;
int i, j;
for (i = 0, j = 0; i < input.length && j < pattern.length; i++, j++) {
if (pattern[j] == '*') {
return result;
} else if (pattern[j] != input[i]) {
result = false;
return result;
}
}
// input string did not finish
if (i != input.length) {
result = false;
}
return result;
}
public static boolean matchFromBack(char[] pattern, char[] input) {
boolean result = true;
int i, j;
for (i = input.length - 1, j = pattern.length - 1; i >= 0 && j >= 0; i--, j--) {
if (pattern[j] == '*') {
return result;
} else if (pattern[j] != input[i]) {
result = false;
return result;
}
}
// input string did not finish
if (i >= 0) {
result = false;
}
return result;
}
public static void main(String[] args) {
String pattern = "a*b";
String input = "acb";
System.out.println(MyRegex.matchPattern(pattern.toCharArray(),
input.toCharArray()));
pattern = "abc*";
input = "abbc";
System.out.println(MyRegex.matchPattern(pattern.toCharArray(),
input.toCharArray()));
pattern = "**bc";
input = "bc";
System.out.println(MyRegex.matchPattern(pattern.toCharArray(),
input.toCharArray()));
}
}
This solution does not handle multiple stars in non-neighbouring positions, e.g. pattern = "*a*a*" and input = "bbabbabb"
//1 is for true
//0 is for false
int regrex(char *s1, char *s2)
{
int flag = 0;
while(*s1 != '\0' && *s2 != '\0')
{
if(*s1 == '*')
{
s1++;
flag=1;
}
else
{
if( *s1 != *s2 )
{
if(flag)
s2++;
else
return 0;
}
else
{
flag=0;
s1++;
s2++;
}
}
}
if(*s2 != '\0')
return 0;
while(*s1 != '\0')
if( *s1 != '*')
return 0;
return 1;
}
Dynamic Programming. If n = len(regex) and m = len(word) have time = O(n * m) and space O(m)
def walk(word, i):
yield i
a = word[i]
while i < len(word) and word[i] == a:
i += 1
yield i
def matches(regex, word):
one = [False] * (len(word) + 1)
two = [False] * (len(word) + 1)
one[-1] = True
for i in range(len(regex) - 1, -1, -1):
for k in range(len(one)):
two[k] = one[k]
one[k] = False
for j in range(len(word) - 1, -1, -1):
if regex[i] == word[j]:
one[j] = two[j + 1]
elif regex[i] == '*':
one[j] = any(two[j] for j in walk(word, j))
return one[0]
I miss-read the question when coming up with this solution. This solution supports an arbitrary number of stars in any position, connected or not. It also has an added restriction that each * has to match one or more of the same character, so * can be a or aa or aaa or bbbbbb but not ab.
class Program
{
static void Main(string[] args)
{
char[] pat0 = new char[] { '*', 'a', 'b', 'c', 'd' };
char[] iTrue0 = new char[] { 'r', 't', 'a', 'b', 'c', 'd' };
Console.WriteLine(ValidatePatten(pat0, iTrue0));
char[] iFalse0 = new char[] { 'r', 't', 'a', 'c', 'c', 'd' };
Console.WriteLine(ValidatePatten(pat0, iFalse0));
char[] pat1 = new char[] { 'a', 'b', 'c', '*', 'd' };
char[] iTrue1 = new char[] { 'a', 'b', 'c', 'r', 't', 'd' };
Console.WriteLine(ValidatePatten(pat1, iTrue1));
char[] iFalse1 = new char[] { 'a', 'b', 'c', 'r', 't', 'd', 'e' };
Console.WriteLine(ValidatePatten(pat1, iFalse1));
char[] pat2 = new char[] { 'a', 'b', 'c', 'd', '*' };
char[] iTrue2 = new char[] { 'a', 'b', 'c', 'd', 'r', 't' };
Console.WriteLine(ValidatePatten(pat2, iTrue2));
char[] iFalse2 = new char[] { 'a', 'b', 'c', 'r', 't', 'd' };
Console.WriteLine(ValidatePatten(pat2, iFalse2));
}
static bool ValidatePatten(char[] pattern, char[] input)
{
int pLen = 0;
int iLen = 0;
bool result = false;
while (pLen < pattern.Length && iLen < input.Length)
{
if (pattern[pLen] == input[iLen])
{
pLen++;
iLen++;
}
else if (pattern[pLen] == '*')
{
pLen++;
//if '*' is at the end, there is no need to do the matching if previous all matched.
if (pLen == pattern.Length) iLen = input.Length;
}
else if (pattern[pLen] != input[iLen])
{
iLen++;
}
}
if (pLen == pattern.Length && iLen == input.Length)
result = true;
return result;
}
}
Brute force for O(n2) complexity
bool regex(string str1,string str2)
{
int j;
for(int i=0;i<str1.length();i++)
{
for(j=0;j<str2.length();j++)
{
if(i+j>=str1.length())
return false;
if(str1[i+j]!=str2[j]&&str2[j]!='*')
break;
}
if(j>=str2.size())
return true;
}
}
C# implementation
namespace SimpleRegex
{
class Program
{
static bool MatchesInDirection(string text, string pattern, int direction)
{
// Scan over pattern in specified direction
int currentTextPosition = (direction < 0) ? text.Length - 1 : 0;
for (int currentPatternPosition = (direction < 0) ? pattern.Length - 1 : 0;
currentPatternPosition >= 0 && currentPatternPosition < pattern.Length;
currentPatternPosition += direction)
{
if (currentTextPosition == -1 || currentTextPosition == text.Length)
{
return false;
}
char currentPatternCharacter = pattern[currentPatternPosition];
char currentTextCharacter = text[currentTextPosition];
if (currentPatternCharacter == '*')
{
// Skip over current wildcard character
while (currentTextPosition >= 0 && currentTextPosition < text.Length &&
text[currentTextPosition] == currentTextCharacter)
{
currentTextPosition += direction;
}
}
else
{
// Check match
if (currentPatternCharacter == currentTextCharacter)
{
currentTextPosition += direction;
}
else
{
return false;
}
}
}
return true;
}
static bool Matches(string text, string pattern)
{
// Error checking
if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(pattern) || text.Length == 0 || pattern.Length == 0)
{
return false;
}
// Match pattern forwards and backwards
return MatchesInDirection(text, pattern, 1) || MatchesInDirection(text, pattern, -1);
}
static void Main(string[] args)
{
System.Console.WriteLine(Matches("acb", "a*b")); //// true - * matches c
System.Console.WriteLine(Matches("abbc", "abc*")); //// false - no match for second b
System.Console.WriteLine(Matches("**bc", "bc")); //// true - *'s match nothing
System.Console.WriteLine(Matches("bbabbabb", "*a*a*")); //// true - *'s match b's
System.Console.WriteLine(Matches("bc", "abc")); //// false - no match for a
System.Console.ReadLine();
}
}
}
bool bIsStringInPattern(PTCHAR pStr, PTCHAR pPattern)
{
if(NULL == pStr || NULL == pPattern || 0 == iStrLen || 0 == iPLen)
{
return false;
}
int iStrLen = _tcslen(pStr);
int iPLen = _tcslen(pPattern);
bool bRet = true;
int i=j=0;
while(i < iStrLen && j < iPLen)
{
if(pPattern[j] != pStr[i] && pPattern[j] != '*')
{
bRet = false;
break;
}
if(pPattern[j] == '*')
{
if((j + 1) < iPLen && pPattern[j+1] == '*')
{
j++;
continue;
}
if((j + 1) < iPLen && pPattern[j + 1] != pStr[i])
{
i++;
continue;
}
}
j++;
i++;
}
// (acb, acb***c, )
if(bRet && i >= iStrLen && j < iPLen)
{
//good case is (acb, acb*******)
while(j < iPLen)
{
if(pPattern[j] != '*')
{
bRet = false;
break;
}
j++;
}
}
// what about (acbsrcs, a*b) or (acbsrcs, a*) or (acbsrcs, a****)
if(bRet && i < iStrLen && j >= iPLen)
{
if(pPattern[j-1] != '*')
{
bRet = false;
break;
}
}
return bRet;
}
My solution:
bool IsMatch(char *p, char *s)
{
bool fMatch = false;
if (p && s)
{
switch (*p)
{
case '*':
if (!*(p + 1))
{
fMatch = true;
}
else
{
if (*(p + 1) == '*')
{
fMatch = IsMatch(p + 1, s);
}
else
{
while (*s)
{
if (IsMatch(p + 1, s))
{
fMatch = IsMatch(p + 2, s + 1);
break;
}
s++;
}
}
}
break;
default:
if (*s == *p)
{
fMatch = (*s) ? IsMatch(p + 1, s + 1) : true;
}
else
{
fMatch = false;
}
break;
}
}
return fMatch;
}
bool match(char *first, char * second)
{
// If we reach at the end of both strings, we are done
if (*first == '\0' && *second == '\0')
return true;
// Make sure that the characters after '*' are present in second string.
// This function assumes that the first string will not contain two
// consecutive '*'
if (*first == '*' && *(first+1) != '\0' && *second == '\0')
return false;
// If the first string contains '?', or current characters of both
// strings match
if (*first == '?' || *first == *second)
return match(first+1, second+1);
// If there is *, then there are two possibilities
// a) We consider current character of second string
// b) We ignore current character of second string.
if (*first == '*')
return match(first+1, second) || match(first, second+1);
return false;
}
#include<iostream.h>
#include<conio.h>
#include<string.h>
main()
{
char pat[10], input[10];
int i, found=1;
clrscr();
cout<<"enter pattern: ";
cin>>pat;
cout<<"enter input: ";
cin>>input;
for(i=0;i<strlen(pat);i++)
{
if(pat[i]!=input[i]&&pat[i]!='*')
{
found=0;
break;
}
}
if(found==1)
cout<<"true";
if(found==0)
cout<<"false";
getch();
}
Java code:
- Dhass April 21, 2013