Amazon Interview Question
SDE1sCountry: India
Suppose N is less than large paragraph length M. O(M) time is required.
#include <unordered_map>
#include <string>
#include <deque>
#include <iostream>
class Slider {
public:
Slider(const std::deque<std::string>& para, const std::deque<std::string>& words) : para_(para) {
if (para.size() < words.size())
return;
for (auto& w : words)
words_[w]++;
words_left_ = words.size();
min_para_ = std::make_pair(0, -1);
}
bool next_word() {
if (!words_left_) {
next_site();
}
if (idx_ >= para_.size())
return false;
auto w = para_[idx_++];
auto it = words_.find(w);
if (it == words_.end()) {
if (length_ == 0) {
// if slider is empty then start with the next word; this condition can be removed.
++site_;
return true;
} else {
++length_;
}
} else {
++length_;
// negative is ok, but if positive then less words left to find
if ((--it->second) >= 0) {
--words_left_;
}
}
return true;
}
int site() const {
return min_para_.first;
}
int length() const {
return min_para_.second;
}
private:
void next_site()
{
while (!words_left_) {
// if no words to find left then check if new subparagraph is less than prev one
if (min_para_.second == -1 || min_para_.second > length_) {
min_para_.first = site_;
min_para_.second = length_;
}
// try move slider right
--length_;
auto w = para_[site_++];
auto it = words_.find(w);
if (it != words_.end() && (++it->second) > 0) {
++words_left_;
}
}
}
const std::deque<std::string> ¶_;
std::unordered_map<std::string, int> words_; // words and word counts
int words_left_; // number of words left to find
int site_ = 0; // slider start
int length_ = 0; // slider length
int idx_ = 0; // next word following slider
std::pair<int, int> min_para_; // subparagraph idx and length
};
int main() {
std::deque<std::string> para = {"alpha", "beta", "gamma", "beta", "aba", "beta", "zeta", "beta", "zen", "cat", "car", "beta", "aba", "beta", "broad"};
std::deque<std::string> words = {"aba", "beta", "cat", "beta"};
Slider slider(para, words);
while (slider.next_word())
;
if (slider.length() > 0)
std::cout << "Min subparagraph length is " << slider.length() << std::endl;
}
O(N) solution taking constant space:fill a counting bloom filter with the needed value.
Create a window of M elements, fill another counting bloom filter with that, move the window by one word, and test if the new bloom filter is equivalent to the one needed. Continue till a match is found or end of word list. If match is found, check if the match is correct by using a hashmap equivalence which is O(M). So worst case, O(MN), best case is O(N). Minor optimisations, if a word is not in the list, then move the window to after that word.
O(n) operation.
The soluton is not different from folks commented above. We will implement a sliding window. The window contains all required words, and start sliding from 0 to end of string.
public int findMinParagraph(String doc, Set<String> reqWords) {
int ii =0, jj=0;
int wcount = 0;
String curW = null;
int minLen = Integer.MAX_VALUE;
char[] docChars = doc.toCharArray();
Set<String> reqSet = new HashSet<String>(reqWords);
Map<String, Integer> countMap = new HashMap<String, Integer>();
for (String ww : reqWords) {
countMap.put(ww, 0);
}
for (jj = 0; jj < doc.length(); jj++) {
if (!reqSet.isEmpty()) {
curW = getWord(docChars, jj);
if (curW == null) {
continue;
}
// not null from here
if (reqSet.contains(curW)) {
reqSet.remove(curW);
}
if (countMap.containsKey(curW)) {
countMap.put(curW, countMap.get(curW) + 1);
}
wcount++;
}
jj = jj + curW.length();
if (reqSet.isEmpty()) {
for (; ii < doc.length() && ii <= jj; ii++) {
curW = getWord(docChars, ii);
if (curW == null) {
continue;
}
if (!countMap.containsKey(curW)) {
wcount--;
}
else {
int curCount = countMap.get(curW);
countMap.put(curW, --curCount);
if (curCount > 0) {
wcount--;
}
else {
// add this back for next walk
reqSet.add(curW);
break;
}
}
ii = ii + curW.length();
}
// found the lower bound
minLen = Math.min(minLen, wcount);
// move up ii for next walk
ii = ii + curW.length();
wcount--;
}
}
return minLen;
}
public String getWord(char[] docChars,int startIdx) {
StringBuffer buf = new StringBuffer();
while (startIdx < docChars.length) {
if ((docChars[startIdx] >= 'a' && docChars[startIdx] <= 'z') ||
(docChars[startIdx] >= 'A' && docChars[startIdx] <= 'Z')) {
buf.append(docChars[startIdx]);
}
else {
break;
}
startIdx++;
}
return (buf.length() == 0) ? null : buf.toString();
}
from collections import defaultdict
from itertools import product
def next_word(paragraph, words):
idx = 0
for w in paragraph.split():
if w in words:
yield idx, w
idx += len(w) + 1
def find_min_subparagraph_length(paragraph, words):
word_starts = defaultdict(list)
word_lengths = {}
for idx, word in next_word(paragraph, set(words)):
word_starts[word].append(idx)
word_lengths[idx] = len(word)
return min(max(p)+word_lengths[max(p)]-min(p) \
for p in product(*word_starts.values()))
from collections import defaultdict
from itertools import product
def next_word(paragraph, words):
idx = 0
for w in paragraph.split():
if w in words:
yield idx, w
idx += len(w) + 1
def find_min_subparagraph_length(paragraph, words):
word_starts = defaultdict(list)
word_lengths = {}
for idx, word in next_word(paragraph, set(words)):
word_starts[word].append(idx)
word_lengths[idx] = len(word)
return min(max(p)+word_lengths[max(p)]-min(p) \
for p in product(*word_starts.values()))
public static String findParagraph(String[] paraArray, String[] wordsArray) {
int plength = paraArray.length;
int wlength = wordsArray.length;
if(plength < wlength ) return null;
HashMap<String, Integer> hmap = new HashMap<String, Integer>();
for(int i=0;i<wlength;i++){
if(hmap.containsKey(wordsArray[i])) {
hmap.put(wordsArray[i], hmap.get(wordsArray[i]) + 1);
}else {
hmap.put(wordsArray[i],1);
}
}
StringBuffer sb = new StringBuffer();
for(int i = 0 ;i<plength;i++){
if(!hmap.isEmpty()) {
System.out.println(" paraArray[i] --> "+paraArray[i]);
sb.append(paraArray[i] + " ");
if(hmap.containsKey(paraArray[i])) {
hmap.remove(paraArray[i]);
System.out.println(" paraArray[i] ***--> "+paraArray[i]);
}
}else{
break;
}
}
System.out.println("SB --> " + sb.toString().trim());
return sb.toString().trim();
}
I/P
====
String[] paraArray = {"alpha", "beta", "gamma", "beta", "aba", "beta", "zeta", "beta", "zen", "cat", "car", "beta", "aba", "beta", "broad"};
String[] wordsArray = {"aba", "beta", "cat", "beta"};
O/P
====
findParagraph--> alpha beta gamma beta aba beta zeta beta zen cat
Assuming that everything fits into memory and we don't need an iterator I would use a sliding window approach. O(m) space and O(n) time while m represents the words of the lookup paragraph words and n the number of characters in the entire paragraph.
- Scavi January 09, 2018