Facebook Interview Question
Software Engineer / DevelopersI would recommend a recursive procedure. Here is the psudo-code. Let me know if you would like proper Java code, and I'll see if I can work on it.
We'll treat taking substrings as a constant time operation. While this is not completely true (in Java), it simplifies the code. If you like, you can first transform the string into an array of characters to guarantee constant time. We'll use a recursive procedure.
boolean match(input, pattern){
if(input empty but pattern is not)
return false;
else if(input empty and pattern empty)
return true;
else if(pattern[0] equals '.')
return match(input[1:], output[1:]);
else if(pattern[0] equals '*') {
if(input[0] equal pattern[1])
return match(input[1:], pattern[2:]);
} else
return match(input[1:], pattern);
} else if (pattern[0] equals input[0]){
return match(input[1:], pattern[1:]);
else
return false;
}
ive chosen to use python style indexing to represent taking substrings. [x:] means from the (x+1)th character onwards.
Running time is O(n) where n is proportional to the length of the shorter string.
Here is a solution in groovy. It is based on the pseudo code above , but is more complete , since there are some cases that are not covered by the previous one.
boolean match(String regex,String str){
if (regex == null || str == null)
throw new NullPointerException()
if (regex.empty && str.empty)
return true
if (regex.empty && str.length() > 0 || str.empty)
return false
if (regex.charAt(0) == '.')
return match(regex.substring(1),str.substring(1))
if (regex.charAt(0) == '*')
if (str.charAt(0) == regex.charAt(1))
return match(regex.substring(2),str.substring(1)) || match(regex,str.substring(1))
else
return match(regex,str.substring(1))
return regex.charAt(0) == str.charAt(0) ? match(regex.substring(1),str.substring(1)) : false;
}
Hey .... Ur algo won't work if we have something like
bhopali *ali ==> it will work
bhoppali *ali ==> it won't work!!!
Code:
bool pattern(string inp,string pat)
{
cout<< "String is:" << inp << " and Pattern is:"<<pat<<endl;
if(inp.empty() && !pat.empty())
return false;
else if(pat.at(0) == inp.at(0) )
{
if((pat.length()==1))
return true;
return pattern(inp.substr(1,inp.length()-1),pat.substr(1,pat.length()-1));
}
else if(pat.at(0)=='.')
{
if(pat.length()==1)
return true;
return pattern(inp.substr(1,inp.length()-1),pat.substr(1,pat.length()-1));
}
else if(pat.at(0)=='*')
{
if(pat.length()==1)
return true;
if(inp.at(0) == pat.at(1) && pat.length()>1)
{
return (pattern(inp.substr(1,inp.length()-1),pat.substr(2,pat.length()-1))) ||
(pattern(inp.substr(1,inp.length()-1),pat));
}
else
{
return pattern(inp.substr(1,inp.length()-1),pat);
}
}
else if(pat.at(0) != inp.at(0))
return false;
//cout<<str<<endl;
}
<<<
"
if(inp.at(0) == pat.at(1) && pat.length()>1)
{
return (pattern(inp.substr(1,inp.length()-1),pat.substr(2,pat.length()-1))) ||
(pattern(inp.substr(1,inp.length()-1),pat));
}
"
>>>
Will it work for inp="abcba", pat="a*bc"?
Exact matches and '.' matches can be skipped normally. When a '*' is encountered at index i, check all the suffixes of the input string for a match with pattern[i+1, n]
Not thoroughly tested. Please leave a comment if it fails on any case.
#include<stdio.h>
//Hold size of the input string
int isize = 0;
//Hold the size of the pattern
int psize = 0;
bool matchHelper(char* input, int iindex, char* pattern, int pindex)
{
//If we reached the end of string and pattern return true
if(iindex == isize && pindex == psize) return true;
//If we reached the end of the string, but not the end of pattern
else if(iindex == isize)
{
int i = pindex;
//If only '*'s are remaining in the pattern then its a match
while(i < psize)
{
if(pattern[pindex] != '*') return false;
i++;
}
return true;
}
//If we reached the end of pattern, but not the end of the string, its not a match
else if(pindex == psize) return false;
//Recursive calls
//If chars match, just move ahead in the string and pattern
if(input[iindex] == pattern[pindex]) return true & matchHelper(input, iindex+1, pattern, pindex+1);
//If we find a '.' just move ahead in string and pattern
else if(pattern[pindex] == '.') return true & matchHelper(input, iindex+1, pattern, pindex+1);
//If a '*' is observed, recursively check every suffix of the string beginning at iindex for a match
//If at least one match occurs return true
else if(pattern[pindex] == '*')
{
int i = iindex;
bool b = false;
for(; i <= isize; i++)
{
b = b | matchHelper(input, i, pattern, pindex+1);
}
return b;
}
//If none of the above return false
else return false;
}
//Obtain length of a string
int getLength(char* input)
{
int len = 0;
while(*input)
{
input++;
len++;
}
return len;
}
//Print 1 is it is a match and 0 if the input and pattern do not match
void match(char* input, char* pattern)
{
isize = getLength(input);
psize = getLength(pattern);
bool result = matchHelper(input, 0, pattern, 0);
printf("%d ", result);
}
int main()
{
char input[] = "regular expression";
char pattern[] = ".egu*pr..*o*";
match(input, pattern);
}
public boolean match(String s, String pattern) {
if (s == null || pattern == null) {
throw new IllegalArgumentException();
}
if (pattern.isEmpty()) {
return s.isEmpty();
}
if (s.isEmpty()) {
if (pattern.charAt(0) == '*') {
return match(s, pattern.substring(1));
} else {
return false;
}
}
char s_0 = s.charAt(0), p_0 = pattern.charAt(0);
if (p_0 == '.' || p_0 == s_0 ) {
return match(s.substring(1), pattern.substring(1));
}
if (p_0 == '*') {
if (!match(s.substring(1), pattern)) {
return match(s, pattern.substring(1));
}
return true;
}
return false;
}
This should do it :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("aaaaab - a*b - {0}", Match("aaaaab",0, "a*b",0));
Console.WriteLine("aaaaabc - a*b - {0}", Match("aaaaabc", 0, "a*b", 0));
Console.WriteLine("ab - a.b - {0}", Match("ab", 0, "a.b", 0));
Console.WriteLine("acb - a.b - {0}", Match("acb", 0, "a.b", 0));
Console.WriteLine("accccccc - a*b - {0}", Match("accccccc", 0, "a*b", 0));
}
static bool Match(string s,int scur, string r, int rcur)
{
if (IsEmpty(r, rcur))
return IsEmpty(s, scur);
if (r[rcur] == '*')
{
if (IsEmpty(s, scur) & GetLength(r, rcur) == 1)
return true;
return MatchStar(s, scur, r, rcur);
}
if (IsEmpty(s, scur)) return false;
if (r[rcur] != s[scur] && r[rcur] != '.') return false;
return Match(s, scur + 1, r, rcur + 1);
}
static bool MatchStar(string s, int scur, string r, int rcur)
{
if (Match(s, scur, r, rcur + 1))
return true;
for(int i=scur +1;i<s.Length;i++)
{
if(Match(s,i,r,rcur))
return true;
}
return false;
}
static int GetLength(string s, int index)
{
return s.Length - index;
}
static bool IsEmpty(string s, int index)
{
return index == s.Length;
}
}
}
Ok , now this should look better:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("aaaaab - a*b - {0}", Match("aaaaab",0, "a*b",0));
Console.WriteLine("aaaaabc - a*b - {0}", Match("aaaaabc", 0, "a*b", 0));
Console.WriteLine("ab - a.b - {0}", Match("ab", 0, "a.b", 0));
Console.WriteLine("acb - a.b - {0}", Match("acb", 0, "a.b", 0));
Console.WriteLine("accccccc - a*b - {0}", Match("accccccc", 0, "a*b", 0));
}
static bool Match(string s,int scur, string r, int rcur)
{
if (IsEmpty(r, rcur))
return IsEmpty(s, scur);
if (r[rcur] == '*')
{
if (IsEmpty(s, scur) & GetLength(r, rcur) == 1)
return true;
return MatchStar(s, scur, r, rcur);
}
if (IsEmpty(s, scur)) return false;
if (r[rcur] != s[scur] && r[rcur] != '.') return false;
return Match(s, scur + 1, r, rcur + 1);
}
static bool MatchStar(string s, int scur, string r, int rcur)
{
if (Match(s, scur, r, rcur + 1))
return true;
for(int i=scur +1;i<s.Length;i++)
{
if(Match(s,i,r,rcur))
return true;
}
return false;
}
static int GetLength(string s, int index)
{
return s.Length - index;
}
static bool IsEmpty(string s, int index)
{
return index == s.Length;
}
}
}
I revised you code as below. The main changes are:
static public void Test()
{
Console.WriteLine( "aaaaab - a*b - {0}", Match( "aaaaab", 0, "a*b", 0 ) );
Console.WriteLine( "aaaaabc - a*b - {0}", Match( "aaaaabc", 0, "a*b", 0 ) );
Console.WriteLine( "ab - a.b - {0}", Match( "ab", 0, "a.b", 0 ) );
Console.WriteLine( "acb - a.b - {0}", Match( "acb", 0, "a.b", 0 ) );
Console.WriteLine( "acccccccb - a*b - {0}", Match( "acccccccb", 0, "a*b", 0 ) );
Console.WriteLine( "acccccccb - a.*b - {0}", Match( "acccccccb", 0, "a.*b", 0 ) );
Console.WriteLine( "b - a*b - {0}", Match( "b", 0, "a*b", 0 ) );
}
static bool Match( string s, int scur, string r, int rcur )
{
if ( IsEmpty( r, rcur ) )
{
return IsEmpty( s, scur );
}
if ( r[rcur] == '*' )
{
if ( IsEmpty( s, scur ) & GetLength( r, rcur ) == 1 )
{
return true;
}
return MatchStar( s, scur, r, rcur, r[rcur - 1] );
}
if ( IsEmpty( s, scur ) )
{
return false;
}
if ( r[rcur] != s[scur] && r[rcur] != '.' )
{
if ( GetLength( r, rcur ) > 1 && r[rcur + 1] == '*' )
{
return Match( s, scur, r, rcur + 2 );
}
else
{
return false;
}
}
return Match( s, scur + 1, r, rcur + 1 );
}
static bool MatchStar( string s, int scur, string r, int rcur, char c )
{
if ( Match( s, scur, r, rcur + 1 ) )
{
return true;
}
for ( int i = scur + 1; i < s.Length; i++ )
{
if ( IsMatch( s[i - 1], c ) && Match( s, i, r, rcur ) )
{
return true;
}
}
return false;
}
static int GetLength( string s, int index )
{
return s.Length - index;
}
static bool IsEmpty( string s, int index )
{
return index == s.Length;
}
static bool IsMatch( char c1, char c2 )
{
if ( c1 == c2 || c2 == '.' )
{
return true;
}
else
{
return false;
}
}
I have a correct solution to it, and probably the most efficient and standard way to solve it, because my method is from KMP. The time complexity is O(n), same as the classic KMP. I just extended it to support '*' and '.' .
Write in C++. You need to include <string> and <vector> at least to compile it.
Here is the code, which is very similar to classic KMP except for a few changes. I will point them out later.
vector<int> next_ext(const string &str)
{
int leng = str.size();
if (leng < 0 || str.empty())
return vector<int>();
vector<int> next(leng + 1);
int i = -1, j, off; //treated like str[-1] is a '*'
off = i, j = off, i ++, next[i] = off;
while (i < leng)
{
if (str[i] == '*' ) //consistent with the above comment.
off = i, j = off, i ++, next[i] = off;
else if (j == off || str[i] == str[j] || str[j] == '.')
++i, ++j, next[i] = j;
else
j = next[j];
}
return next;
}
//if matched, return the last position of the first matched substring of the string, else return -1.
int kmp_ext(string str, string pattern)
{
int str_len = str.size(), pattern_len = pattern.size();
vector<int> next = next_ext(pattern);
int i = 0, j = -1, off; // treated like pattern[-1] is a '*'
off = j, j ++;
while(i < str_len && j < pattern_len)
{
if (j != off && pattern[j] == '*' )
off = j, j ++; //consistent with the above comment
else if( j == off || str[i] == pattern[j] || pattern[j] == '.')
j++, i++;
else
j = next[j];
}
if(j == pattern_len)
return i;
else
return -1;
}
The kmp_ext() method returns the ending position of the first matched substring of the string or -1 if not found any.
Note: if a pattern ends with a '*', it returns the ending position of the last non-'*' match rather than the length of string. You can modify the algo to let it return length-of-string easily though.
e.g.
int main()
{
string s = "abdcxdefabc";
cout<<kmp_ext(s, "ab..cx")<<endl;
cout<<kmp_ext(s, "ab*x.*fa*c")<<endl;
cout<<kmp_ext(s, "cxd*")<<endl; //Ending with a '*', return the matched position of 'd' rather than the length of s.
cout<<kmp_ext(s, "*b*x.*fa*c")<<endl;
}
it outputs:
-1
11
6
11
KMP is a "find" method, the original question is a "match" problem. Generally speaking, "find" is usually harder than "match".
So, with slight modification, this method can be used to solve the "match" too-- just also return the beginning position of the first matched substring of the string. If the found substring turns out to cover the whole string, it is a match.
The differences from classic KMP of this method are:
1. Add one more condition in 'if' statement indicating pattern[j] == '.' is considered as a vaild match for any character.
2. Add one more 'if' statement to generalize the standard KMP to handle '*'.
3. Introduced an additional variable 'off', whcih is initialized with -1, but changes when a '*' is met. In standard KMP, it is set to be a constant: -1. Here I generalized it to a variable.
These 3 differences occur both in next() and kmp() function, symmetrically.
Except for these 3 differences (in each function), all other part is standard KMP.
I believe some people may be confused with the logic and want me to explain the logic more clearly. But I have to say, it has already been complicated enough for a person to just explain the standard KMP. I am really not able to explain the logic of this extended KMP, simply because the logic is too complicated. If you are interested, and have thoroughly understood standard KMP, it is possible for you to understand the logic just by reading this code.
a recursive version that may be wrong
public boolean compare(byte[] rule, byte[] string, int p_r, int p_s)
{
if(p_s >= string.length)
{
while(p_r<rule.length)
if(rule[p_r++]!= '*')
return false;
return true;
}else if(p_r >= rule.length)
{
return false;
}
if(rule[p_r] == '*')
{
return p_r == rule.length - 1 ? true : compare(rule,string,p_r,p_s + 1) || compare(rule,string,p_r + 1,p_s);
}else if(rule[p_r] == '.' || string[p_s] == rule[p_r])
{
return compare(rule,string,p_r+1,p_s + 1);
}else
{
return false;
}
}
Here is a solution using C#, which solves this without recursion in O(n) time.
The tests are using xunit & xunit.extensions packages:
namespace Tests
{
using Xunit;
public class PatternMatcher
{
public static bool IsMatch(string pattern, string input)
{
int i = 0;
int j = 0;
bool prevIsMatchZeroOrMore = false;
while (i < input.Length)
{
if (IsMatchZeroOrMore(pattern[j]))
{
prevIsMatchZeroOrMore = true;
j++;
}
else
{
if (IsMatchSingle(pattern[j]) || input[i] == pattern[j])
{
i++;
j++;
}
else
{
if (prevIsMatchZeroOrMore)
{
i++;
}
else
{
if (j == 0)
{
i++;
}
else
{
j = 0;
}
}
}
}
//We've reached the end of the pattern and it was matched
if (j >= pattern.Length - 1)
{
return true;
}
}
return false;
}
private static bool IsMatchSingle(char c)
{
return c == '.';
}
private static bool IsMatchZeroOrMore(char c)
{
return c == '*';
}
}
public class UnitTest1
{
[Theory]
[InlineData("ab", "ab", true)]
[InlineData("ab", "ccab", true)]
[InlineData("ab", "aab", true)]
[InlineData("ab", "cd", false)]
[InlineData("*", "abc", true)]
[InlineData("*c", "abc", true)]
[InlineData("a*c", "abbbbbbc", true)]
[InlineData("a*b", "cd", false)]
[InlineData("a*c", "ac", true)]
[InlineData("a*bc", "aba----bc", true)]
[InlineData("*ali", "bhopali", true)]
[InlineData("a*b", "acccbccb", true)]
[InlineData(".", "abc", true)]
[InlineData("a.c", "abc", true)]
[InlineData(".", "", false)]
public void Case1(string pattern, string input, bool expectedIsMatch)
{
Assert.Equal(expectedIsMatch, PatternMatcher.IsMatch(pattern, input));
}
}
}
- Zoidberg May 14, 2012