You are given with a large par

Amazon Interview Question for SDE1s

0

of 0 votes

8
Answers
You are given with a large paragraph and N words.
You have to find a min length subparagraph of the paragraph which contain all those N words in any order. Here length of a paragraph is the count of words in the paragraph.
- avi007 January 09, 2018 in India | Report Duplicate | Flag | PURGE
Amazon SDE1

Email me when people comment.

An error occurred in subscribing you.

Country: India

Email me when people comment.

An error occurred in subscribing you.

Comment hidden because of low score. Click to expand.

of 0 vote

Assuming that everything fits into memory and we don't need an iterator I would use a sliding window approach. O(m) space and O(n) time while m represents the words of the lookup paragraph words and n the number of characters in the entire paragraph.

public int shortestDistance(@Nonnull final String paragraph, @Nonnull final String lookupParagraph) {
        if (lookupParagraph.length() == 0) {
            throw new IllegalArgumentException();
        }

        String[] lookupWords = lookupParagraph.split(" ");
        Map<String, Integer> foundWordCount = new HashMap<>();
        Set<String> searchWords = new HashSet<>();
        initialize(lookupWords, foundWordCount, searchWords);

        int startSlidingWindow = 0;
        int currentWordCount = 0;
        int minWordCount = Integer.MAX_VALUE;
        StringBuilder currentWord = new StringBuilder();
        for (int i = 0; i < paragraph.length(); ++i) {
            char c = paragraph.charAt(i);

            if (isInWord(c)) {
                currentWord.append(c);
            } else {
                String word = currentWord.toString();
                currentWord = new StringBuilder();
                // test the current word
                if (word.length() > 0) {
                    currentWordCount++;
                    // found the word, remove it from the search words and increment the existence count for the
                    // sliding window
                    if (foundWordCount.containsKey(word)) {
                        foundWordCount.put(word, foundWordCount.get(word) + 1);
                        if (searchWords.contains(word)) {
                            searchWords.remove(word);
                        }
                    }

                    if (searchWords.size() == 0) {
                        for (int j = startSlidingWindow; j < i; ++j) {
                            char c2 = paragraph.charAt(j);
                            if (isInWord(c2)) {
                                currentWord.append(c2);
                            } else {
                                if (currentWord.length() > 0) {
                                    String tmpWord = currentWord.toString();
                                    if (foundWordCount.containsKey(tmpWord)) {
                                        if (foundWordCount.getOrDefault(tmpWord, 0) > 1) {
                                            foundWordCount.put(tmpWord, foundWordCount.get(tmpWord) - 1);
                                        }
                                        // stop sliding
                                        else {
                                            break;
                                        }
                                    }
                                    startSlidingWindow = j;
                                    currentWordCount--;
                                }
                                currentWord = new StringBuilder();
                            }
                        }
                        minWordCount = Math.min(currentWordCount, minWordCount);
                    }
                }
                currentWord = new StringBuilder();
            }
        }
        return minWordCount;
    }


    private void initialize(final String[] lookupWords, final Map<String, Integer> foundWordCount, final Set<String>
            searchWords) {
        for (String lookupWord : lookupWords) {
            searchWords.add(lookupWord);
            foundWordCount.put(lookupWord, 0);
        }
    }


    private boolean isInWord(final char c) {
        return (c >= 65 && c <= 90) || (c >= 97 && c <= 122);
    }

- Scavi January 09, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

Suppose N is less than large paragraph length M. O(M) time is required.

#include <unordered_map>
#include <string>
#include <deque>
#include <iostream>

class Slider {
 public:
  Slider(const std::deque<std::string>& para, const std::deque<std::string>& words) : para_(para) {
    if (para.size() < words.size())
      return;
    for (auto& w : words)
      words_[w]++;
    words_left_ = words.size();
    min_para_ = std::make_pair(0, -1);
  }

  bool next_word() {
    if (!words_left_) {
      next_site();
    }
    if (idx_ >= para_.size())
      return false;
    auto w = para_[idx_++];
    auto it = words_.find(w);
    if (it == words_.end()) {
      if (length_ == 0) {
        // if slider is empty then start with the next word; this condition can be removed.
        ++site_;
        return true;
      } else {
        ++length_;
      }
    } else {
      ++length_;
      // negative is ok, but if positive then less words left to find
      if ((--it->second) >= 0) {
        --words_left_;
      }
    }
    return true;
  }

  int site() const {
    return min_para_.first;
  }

  int length() const {
    return min_para_.second;
  }
 private:
  void next_site()
  {
    while (!words_left_) {
      // if no words to find left then check if new subparagraph is less than prev one
      if (min_para_.second == -1 || min_para_.second > length_) {
        min_para_.first = site_;
        min_para_.second = length_;
      }
      // try move slider right
      --length_;
      auto w = para_[site_++];
      auto it = words_.find(w);
      if (it != words_.end() && (++it->second) > 0) {
        ++words_left_;
      }
    }
  }

  const std::deque<std::string> &para_;
  std::unordered_map<std::string, int> words_; // words and word counts
  int words_left_;  // number of words left to find
  int site_ = 0;    // slider start
  int length_ = 0;  // slider length
  int idx_ = 0;     // next word following slider
  std::pair<int, int> min_para_;  // subparagraph idx and length
};

int main() {
  std::deque<std::string> para = {"alpha", "beta", "gamma", "beta", "aba", "beta", "zeta", "beta", "zen", "cat", "car", "beta", "aba", "beta", "broad"};
  std::deque<std::string> words = {"aba", "beta", "cat", "beta"};

  Slider slider(para, words);
  while (slider.next_word())
    ;
  if (slider.length() > 0)
    std::cout << "Min subparagraph length is " << slider.length() << std::endl;
}

- S.M. January 09, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

O(N) solution taking constant space:fill a counting bloom filter with the needed value.
Create a window of M elements, fill another counting bloom filter with that, move the window by one word, and test if the new bloom filter is equivalent to the one needed. Continue till a match is found or end of word list. If match is found, check if the match is correct by using a hashmap equivalence which is O(M). So worst case, O(MN), best case is O(N). Minor optimisations, if a word is not in the list, then move the window to after that word.

- Steffy January 13, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

In Python, Following code works. I am sure set can be done in Java aswell

def unique_words(string):
  words = string.split()
  print set(words)

unique_words("this string has repetation words, string has and this")

- Maddy January 28, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

O(n) operation.
The soluton is not different from folks commented above. We will implement a sliding window. The window contains all required words, and start sliding from 0 to end of string.

public int findMinParagraph(String doc, Set<String> reqWords) {
		int ii =0, jj=0;
		int wcount = 0;
		String curW = null;
		int minLen = Integer.MAX_VALUE;
		char[] docChars = doc.toCharArray();
		Set<String> reqSet = new HashSet<String>(reqWords);
		Map<String, Integer> countMap = new HashMap<String, Integer>();

		for (String ww : reqWords) {
			countMap.put(ww, 0);
		}

		for (jj = 0; jj < doc.length(); jj++) {
			if (!reqSet.isEmpty()) {
				curW = getWord(docChars, jj);
				if (curW == null) {
					continue;
				}
				// not null from here
				if (reqSet.contains(curW)) {
					reqSet.remove(curW);

				}
				if (countMap.containsKey(curW)) {
					countMap.put(curW, countMap.get(curW) + 1);
				}
				wcount++;
			}
			jj = jj + curW.length();

			if (reqSet.isEmpty()) {
				for (; ii < doc.length() && ii <= jj; ii++) {
					curW = getWord(docChars, ii);
					if (curW == null) {
						continue;
					}

					if (!countMap.containsKey(curW)) {
						wcount--;
					}
					else {
						int curCount = countMap.get(curW);
						countMap.put(curW, --curCount);
						if (curCount > 0) {
							wcount--;
						}
						else {
							// add this back for next walk
							reqSet.add(curW);
							break;
						}
					}
					ii = ii + curW.length();
				}

				// found the lower bound
				minLen = Math.min(minLen, wcount);

				// move up ii for next walk
				ii = ii + curW.length();
				wcount--;
			}
		}
		return minLen;
	}



	public String getWord(char[] docChars,int startIdx) {
		StringBuffer buf = new StringBuffer();
		while (startIdx < docChars.length) {
			if ((docChars[startIdx] >= 'a' && docChars[startIdx] <= 'z') ||
					(docChars[startIdx] >= 'A' && docChars[startIdx] <= 'Z')) {
				buf.append(docChars[startIdx]);
			}
			else {
				break;
			}
			startIdx++;
		}

		return (buf.length() == 0) ? null : buf.toString();
	}

- Nick N. January 29, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

from collections import defaultdict
from itertools import product

def next_word(paragraph, words):
    idx = 0
    for w in paragraph.split():
        if w in words:
            yield idx, w
        idx += len(w) + 1
    
def find_min_subparagraph_length(paragraph, words):
    word_starts = defaultdict(list)
    word_lengths = {}
    for idx, word in next_word(paragraph, set(words)):
        word_starts[word].append(idx)
        word_lengths[idx] = len(word)
    return min(max(p)+word_lengths[max(p)]-min(p) \
               for p in product(*word_starts.values()))

- Anonymous February 01, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

from collections import defaultdict
from itertools import product

def next_word(paragraph, words):
    idx = 0
    for w in paragraph.split():
        if w in words:
            yield idx, w
        idx += len(w) + 1
    
def find_min_subparagraph_length(paragraph, words):
    word_starts = defaultdict(list)
    word_lengths = {}
    for idx, word in next_word(paragraph, set(words)):
        word_starts[word].append(idx)
        word_lengths[idx] = len(word)
    return min(max(p)+word_lengths[max(p)]-min(p) \
               for p in product(*word_starts.values()))

- Isaac February 01, 2018 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

public static String findParagraph(String[] paraArray, String[] wordsArray) {
    
        int plength  = paraArray.length;
	int wlength = wordsArray.length;

	if(plength < wlength ) return null;
            
	HashMap<String, Integer> hmap = new HashMap<String, Integer>();

	for(int i=0;i<wlength;i++){
                if(hmap.containsKey(wordsArray[i])) {
			hmap.put(wordsArray[i], hmap.get(wordsArray[i]) + 1);
		}else {
			hmap.put(wordsArray[i],1);
		}
	}
        StringBuffer sb = new StringBuffer();
	for(int i = 0 ;i<plength;i++){
                if(!hmap.isEmpty()) {
                    System.out.println(" paraArray[i] --> "+paraArray[i]);
                    sb.append(paraArray[i] + " ");
                    if(hmap.containsKey(paraArray[i])) {
                        hmap.remove(paraArray[i]); 
                        System.out.println(" paraArray[i] ***--> "+paraArray[i]);
                    }
                }else{
                    
                    break;
                }
	}
        System.out.println("SB --> " + sb.toString().trim());
        return sb.toString().trim();
    }
I/P 
====
String[] paraArray = {"alpha", "beta", "gamma", "beta", "aba", "beta", "zeta", "beta", "zen", "cat", "car", "beta", "aba", "beta", "broad"};

String[] wordsArray = {"aba", "beta", "cat", "beta"};
        
O/P 
====
findParagraph--> alpha beta gamma beta aba beta zeta beta zen cat

- silvimasss April 03, 2018 | Flag Reply

CareerCup

Amazon Interview Question for SDE1s

Books

Videos

Resume Review

Mock Interviews