Google Interview Question
Software Engineer / DevelopersCountry: India
Interview Type: In-Person
Just do it recursively to check for all substrings.
boolean check(String s1, String s2){
if(s2.equals("*")) return true;
if(s1.equals(s2)) return true;
if(s2.charAt(0)=='?'){
return check(s1.substring(1),s2.substring(1))||check(s1,s2.substring(1));}
else if(s2.charAt(0)=='*'){
for(int i=0;i<=s1.length();i++){
if(check(s1.substring(i),s2.substring(1)) retun true;}
return false; }
else if(s1.charAt(0)!=s2.charAt(0)) return false;
return check(s1.substring(1),s2.substring(1));
}
Cool, one minor improvement would be just decreasing the number of recursive calls and getting rid of extra memory, here is my version(not working for a corner cases like "hello","*" but the idea seems to be right:
function match_internal(s1, s2, s1Index, s2Index){
if(s1Index==s1.length && s2Index==s2.length)
return true;
while(s2[s2Index]!='*' && s2[s2Index]!='?' && s1Index < s1.length && s2Index < s2.length){
if(s1[s1Index]!=s2[s2Index])
return false;
s1Index++;
s2Index++;
}
if(s2[s2Index]=='?')
return match_internal(s1, s2, s1Index, s2Index+1) || match_internal(s1,s2,s1Index+1, s2Index+1);
if(s2[s2Index]=='*'){
var zeroMatch = match_internal(s1, s2, s1Index, s2Index+1);
if(zeroMatch)
return true;
var match = false;
for(var i=s1Index+1; i<s1.length;i++){
match = match || match_internal(s1, s2, i, s2Index+1);
}
return match;
}
}
Thanks for your suggestions. Arrays are better in memory, but String also gets better equality comparison. I think your code already handles cases like "hello","*" if you just put the i<=s1.length. Besides you can also deal with it in the for loop of "*". I've modified my code to make it better.
@ zyfo2 I think there is a problem in your code. You have" s1.charAt(0)=='*' ", but the question means that s1 just contains from A to Z.
Tell me how does it work for "Abcde", "Acdef".
All the if conditions will fail, and the control will come to
return check(s1.substring(1),s2.substring(1));
Which will return true (check("A", "A")), which I think is wrong. Correct me if I am wrong.
Also, the * implementation is buggy
I am guessing its basically a question to implement regex matching algorithm.
For e.g if S1 = aabcccd & S2 = a*bc*d?e?. then its a match..
Assuming the S2 string is a proper regular expression then here's my algorithm
bool RegexMatch(char *S1, char* S2, int n1, int n2)
{
int ptr1 = 0; int ptr2 = 0;
while (ptr1 < n1 && ptr2 < n2)
{
if (S1[ptr1] == S2[ptr2])
{
ptr1++; ptr2++;
if (S2[ptr2] == ?)
ptr2++;
if (ptr2 == *)
{
while(S1[ptr1-1] == S1[ptr1] && ptr1 < n1)
ptr1++;
}
}
else if (S2[ptr2+1] == *)
{
ptr2 = ptr2+2;
}
else if (S2[ptr2+1] == ?)
{
ptr2 = ptr2+2;
}
else
{
return false;
}
}
if (ptr1 < n1) return false;
if (pt2 < n2)
{
while(ptr2 < n2)
{
if (S2[ptr2] == * || S2[ptr2] == ?)
ptr2++;
else
{
if (S2[ptr2+1] == ? || S2[ptr2+1] == *)
ptr2 = ptr2 + 2;
else
return false;
}
}
}
return true;
}
Your Algorithm is absolutely correct.
The running code is:
#include<stdio.h>
#include<string.h>
int RegexMatch(char *S1, char* S2, int n1, int n2)
{
int ptr1 = 0; int ptr2 = 0;
while (ptr1 < n1 && ptr2 < n2)
{
if (S1[ptr1] == S2[ptr2])
{
ptr1++; ptr2++;
if (S2[ptr2] == '?')
ptr2++;
if (S2[ptr2] == '*')
{ ptr2++;
while(S1[ptr1-1] == S1[ptr1] && ptr1 < n1)
ptr1++;
}
}
else if (S2[ptr2+1] == '*')
{
ptr2 = ptr2+2;
}
else if (S2[ptr2+1] == '?')
{
ptr2 = ptr2+2;
}
else
{
return 0;
}
}
if (ptr1 < n1) return 0;
if (ptr2 < n2)
{
while(ptr2 < n2)
{
if (S2[ptr2] == '*' || S2[ptr2] == '?')
ptr2++;
else
{
if (S2[ptr2+1] == '?' || S2[ptr2+1] == '*')
ptr2 = ptr2 + 2;
else
return 0;
}
}
}
return 1;
}
int main()
{
char S1[]="aabcccd",S2[]="a*bc*d?e?";
int flag=RegexMatch(S1,S2,strlen(S1),strlen(S2));
if(!flag)
printf("Match Not Found\n");
else
printf("Match Found\n");
getch();
return 0;
}
Thanks Don!!! :)
#include "stdafx.h"
#include<stdio.h>
#include<string.h>
int RegexMatch(char *S1, char* S2, int n1, int n2)
{
int ptr1 = 0; int ptr2 = 0;
while (ptr1 < n1 && ptr2 < n2)
{
if (S1[ptr1] == S2[ptr2])
{
ptr1++; ptr2++;
}
else if (S2[ptr2] == '?')
{
if (S1[ptr1] != S1[ptr1 -1])
{
// skip * ?
while (ptr2 < n2 && (S2[ptr2] == '*' || S2[ptr2] == '?'))
{
++ptr2;
}
}
else
{
ptr1++; ptr2++;
}
}
else if (S2[ptr2] == '*')
{
if (S1[ptr1] != S1[ptr1 -1])
{
// skip * ?
while (ptr2 < n2 && (S2[ptr2] == '*' || S2[ptr2] == '?'))
{
++ptr2;
}
}
else
{
while(S1[ptr1] == S1[ptr1 - 1] && ptr1 < n1)
{
ptr1++;
}
++ptr2;
}
}
else
{
return 0;
}
}
if (ptr1 < n1) return 0;
return 1;
}
int main()
{
char S1[]="aaaabccd",S2[]="aa*bcc?d?e?"; // match
//char S1[]="aaaabcccd",S2[]="aa*bcc?*d?e?"; // match
//char S1[]="abccd",S2[]="aa*bcc?d?e?"; // not match
//char S1[]="aabccfd",S2[]="aa*bcc?d?e?"; // not match
int flag=RegexMatch(S1,S2,strlen(S1),strlen(S2));
if(!flag)
printf("Match Not Found\n");
else
printf("Match Found\n");
return 0;
}
Can you please check if this code works for following example:
s1 = axyzbmnk
s2 =a*m?k
The output should be a match.
The question as phrased is not asking for regex match. If it were regex * would match the preceding character 0 or more times and ? would match the preceding character 0 or 1 times. The question says "any character" which means it is asking for a file name wildcard type of matching in which * fills in for any sequence of 0 or more characters and ? fills in for either no character or any one character. As in sr's comment, axyzbmnk and a*m?k should be a match.
BOOL matche(char *S1, char *S2){
while(s2 && s1){
if(*s2 == *s1){
return matche(++s1, ++s2);
} else {
switch(*s2){
case '*':
while(*s2 == '*') s2++;
while(*s1) {if(match(++s1, s2) == TRUE) return TRUE;}
return FALSE;
break;
case '?':
++s2;
if(match(s1, s2) == TRUE) return TRUE;
if(match(++s1, s2) == TRUE) return TRUE;
break;
default:
return FALSE;
break;
}//switch
}
}//While
if(*s2 == '*' || *s2 == '?') ++s2;
if(*s1 == '\0')
return TRUE;
else return FALSE;
}
This problem can be efficiently solved using the modified dynamic algorithm for finding longest common sub-sequence.
The algorithm should be modified as follows:
Assume S1 is the the top row and S2 is the first column.
Then, the algorithm is allowed to move without penalty:
- diagonally if both are letters
- vertically (downwards) if '*' or '?'
- horizontally: only one step if '?' and many steps if '*'
For any other mismatch, penalize with infinity.
The strings match if there is a solution with cost less than infinity.
The cost of the algorithm is O(mn), m, n being the lenghts of S1 and S2.
The recursive algorithm mentioned here could have exponential time complexity.
adding another iterative solution to the pool
public static void main(String[] args) {
System.out.println(matches("abcdefgh1", "a?d*"));
// System.out.println(matches("axyzbmnk", "a*m?k"));
}
public static boolean matches(String s1, String s2) {
if (s1 == null || "".equals(s1))
return false;
if (s2 == null || "".equals(s2) || "*".equals(s2) || "?".equals(s2)) {
return true;
}
String[] strArr2 = s2.split("\\*");
List<List<String>> starList = new ArrayList<List<String>>();
for (String str : strArr2) {
List<String> qList = Arrays.asList(str.split("\\?"));
starList.add(qList);
}
for (List<String> stars : starList) {
for (int i=0;i<stars.size();i++) {
String str = stars.get(i);
int ind = s1.indexOf(str);
if (ind < 0 || (i > 0 && ind > 1)) {
return false;
}
s1 = s1.substring(ind + str.length());
}
}
return true;
}
bool compS(char* s1,char* s2){
if(*s2=='\0'&&*s1=='\0')return true;
if(*s2=='?'){
return (compS(s1,s2+1)||compS(s1+1,s2+1));
}else if(*s2=='*'){
if(compS(s1,s2+1))return true;
char r=*s1;
if(r=='\0')return true;
while(*s1==r){
if(compS(s1+1,s2+1))return true;
++s1;
}
}else if(*s1==*s2)return compS(s1+1,s2+1);
return false;
}
private static boolean matches(char[] s, char[] p, int i, int j) {
if (j < 0) {
return i < 0;
}
if (i < -1)
return false;
if (i > -1 && s[i] == p[j]) {
return matches(s, p, i - 1, j - 1);
}
if (p[j] == '?') {
return matches(s, p, i, j - 1) || matches(s, p, i - 1, j - 1);
}
if (p[j] == '*') {
return matches(s, p, i, j - 1) || matches(s, p, i - 1, j - 1)
|| matches(s, p, i - 1, j);
}
return false;
}
public static boolean compareAnyStr(String s1, String s2) {
return compareStr(s1.toUpperCase(), s2.toUpperCase());
}
public static boolean compareStr(String s1, String s2) {
if(s1.length() == 0 || s2.length() == 0) {
//if both are empty then return true
if(s1.length() == 0 && s2.length() == 0) {
return true;
}
//if s1 is empty and s2 is of length=1 and contains '*' then return true
if(s1.length() == 0 && s2.length() == 1 && s2.charAt(0) == '*') {
return true;
}
return false;
}
char ch2 = s2.charAt(0);
if(ch2 >= 'A' && ch2 <= 'Z') {
char ch1= s1.charAt(0);
return (ch1 == ch2) && compareStr(s1.substring(1), s2.substring(1));
} else {
if(ch2 == '?') {
//two cases: ? matches 0 character, ? matches 1 character of s1
return compareStr(s1, s2.substring(1)) ||
compareStr(s1.substring(1), s2.substring(1));
} else {
//two cases: * matches 0 character, * matches 1 character but we don't remove * from s2
return compareStr(s1, s2.substring(1)) ||
compareStr(s1.substring(1), s2);
}
}
}
public static void main(String[] args) throws CloneNotSupportedException {
System.out.println(compareAnyStr("abc", "abc"));
System.out.println(compareAnyStr("abc", "?bc"));
System.out.println(compareAnyStr("abc", "*d"));
System.out.println(compareAnyStr("abcde", "?bc*e"));
}
Here's my solution in Java
public class Matches {
// I named s2 as “regex”
public static boolean matches(String s1, String regex) {
// Pointers to the next character that should be examined
int currentS1 = 0;
int currentRegex = 0;
while(currentS1 < s1.length() && currentRegex < regex.length()) {
if(currentRegex + 1 < regex.length() && regex.charAt(currentRegex + 1) == '?') {
// means that regex.charAt(currentRegex) should appear 1 or 0 times
if(s1.charAt(currentS1) == regex.charAt(currentRegex)) {
// advance s1’s pointer
currentS1++;
}
currentRegex +=2; // skip also the ‘?’
} else if(currentRegex + 1 < regex.length() && regex.charAt(currentRegex + 1) == '*') {
while(currentS1 < s1.length() && s1.charAt(currentS1) == regex.charAt(currentRegex)) {
// advance s1’s pointer
currentS1++;
}
currentRegex +=2; // skip also the ‘*’
} else {
if(s1.charAt(currentS1) == regex.charAt(currentRegex)) {
// advance s1’s pointer & regex’s pointer
currentS1++;
currentRegex++;
} else {
return false; // a mismatch was found
}
}
} // while
// reaching here means that at least one of the strings reached the end
if(currentS1 >= s1.length() && currentRegex >= regex.length()) {
return true; // both of the strings ended
} else if(currentS1 >= s1.length() ) { // only the string has ended
if(currentRegex == regex.length() -1)
return false;
currentRegex++;
while(currentRegex < regex.length()) {
if(regex.charAt(currentRegex) == '?' || regex.charAt(currentRegex) == '*'){
// all of the expressions in the regex are either ? or *
//return true;
} else {
return false;
}
currentRegex += 2;
}
return true;
} else {
return false;
}
}
public static void main(String[] args) {
System.out.println("Should be true: " + matches("aab","aab"));
System.out.println("Should be true: " + matches("aab","aab?"));
System.out.println("Should be true: " + matches("aa","aab?"));
System.out.println("Should be true: " + matches("aab","aab*"));
System.out.println("Should be true: " + matches("aa","aab*"));
System.out.println("Should be true: " + matches("aabbbbbbbbbb","aab*"));
System.out.println("Should be true: " + matches("aabbbbbbbbbbc","aab*c"));
System.out.println("Should be true: " + matches("aaaaaaaa","aa?a*"));
System.out.println("Should be true: " + matches("aaabbbbbcccccd","a*b*c*d*"));
System.out.println("Should be true: " + matches("aaabbbbbcccccd","a*b*c*d?"));
System.out.println("Should be false: " + matches("aaabbbbbcccccdd","a*b*c*d?"));
System.out.println("Should be false: " + matches("aaaaaaaa","b"));
System.out.println("Should be false: " + matches("aaaaaaaa","b?"));
System.out.println("Should be false: " + matches("aaaaaaaa","b*"));
System.out.println("Should be false: " + matches("ab","b*"));
System.out.println("Should be false: " + matches("abc","b*c"));
}
}
And here is a simple dumb regex engine in python
def dumb_regex(str1, str2):
#print str1, str2
if str1 == str2:
return True
if str2 == '*':
return True
if len(str1) == 1 and str2 == '?':
return True
if str2[0] == '?':
return dumb_regex(str1[1:], str2[1:]) or dumb_regex(str1, str2[1:])
elif str2[0] == '*':
for i in range(len(str1)):
if dumb_regex(str1[i:], str2[1:]):
return True
else:
return False
elif str1[0] != str2[0]:
return False
return dumb_regex(str1[1:], str2[1:])
if __name__=="__main__":
print dumb_regex('abcd', 'a*b*c?')
print dumb_regex('abcd', 'a?b*c?')
print dumb_regex('xyz', 'a?b*c?')
print dumb_regex('xyzabcd', 'xy*')
print dumb_regex('xyz', 'xy?')
bool isMatch(const char *s, const char *p) {
assert(s && p);
if (*p == '\0') return *s == '\0';
// next char is not '*': must match current character
if (*(p+1) != '*') {
assert(*p != '*');
return ((*p == *s) || (*p == '.' && *s != '\0')) && isMatch(s+1, p+1);
}
// next char is '*'
while ((*p == *s) || (*p == '.' && *s != '\0')) {
if (isMatch(s, p+2)) return true;
s++;
}
return isMatch(s, p+2);
}
- Z0nk3d August 16, 2013