Santoso.Wijaya
BAN USERA solution in C++11:
static string _form_string(const map<char, int> &use_index)
{
using content_type = pair<char, int>;
vector<content_type> content;
for (auto it = use_index.begin(); it != use_index.end(); ++it)
{
content.push_back(*it);
}
std::sort(content.begin(), content.end(),
[](const content_type &first, const content_type &second) -> bool
{
assert (std::get<1>(first) != std::get<1>(second));
return std::get<1>(first) < std::get<1>(second);
});
string result;
for (auto &i : content)
{
result += std::get<0>(i);
}
return result;
}
string delete_dupes(const string &input)
{
map<char, int> use_index; // maps a character in the string to its use index
// e.g. if "abc" is formed from "bcabc", then
// 'a':2, 'b':3, 'c':4
string result;
// first pass: greedily select the last character found (discarding earlier dupes)
for (int i = 0; i < (int)input.size(); ++i)
{
char c = input[i];
auto found = result.find(c);
if (found != string::npos)
{
// a new duplicate character found; erase our usage of its earlier occurence
result.erase(found, 1);
}
result += c;
use_index[c] = i;
}
// at this point, we have a candidate string that should contain no duplicate
// characters, and we've also kept track of the last index of each character
// that we used to form this candidate string
assert (use_index.size() == result.size());
// second pass: for each duplicate character, try each of its earlier indices
// and greedily persist only if a smaller (lexicographically) string
// can be formed
for (int i = 0; i < (int)input.size(); ++i)
{
char c = input[i];
if (use_index[c] == i)
{
// the current candidate string already uses this character at this index
continue;
}
int temp = use_index[c];
// tentatively try using a character from this index, instead
use_index[c] = i;
// and see if the string formed is lexicographically smaller
string candidate = _form_string(use_index);
if (candidate < result)
{
result = candidate;
}
else
{
// not smaller, discard that attempt and restore our use index
use_index[c] = temp;
}
}
return result;
}
int main()
{
vector<tuple<string, string>> tests{
tuple<string, string>{ "bcabc", "abc" },
tuple<string, string>{ "cbacdcbc", "acdb" },
};
for (auto &test : tests)
{
string &input = std::get<0>(test);
string &expected = std::get<1>(test);
string output = delete_dupes(input);
cout << input << " -> " << output << endl;
assert (output == expected);
}
return 0;
}
You'll destroy the input "cba" into "abc".
- Santoso.Wijaya September 10, 2015Because of floating point rounding errors.
When you come up with two lines: one with the slope 5.00000001, and another with the slope 5.000000099, they should be treated as the same line. But if you just hash the two floats, you'll get different hash values.
import java.util.*;
public class UniqueLines {
public static final double EPSILON = 0.0001;
private static final String FORMAT = "%.4f";
public static class Point {
public final double x;
public final double y;
public Point(double x, double y) {
this.x = x;
this.y = y;
}
public boolean isEquivalent(Point other) {
return (Math.abs(other.x - x) < EPSILON) &&
(Math.abs(other.y - y) < EPSILON);
}
}
public static class Line {
public final double slope; // in the case of vertical lines: POSITIVE_INFINITY
public final double intercept; // in the case of vertical lines: the X-intercept
public Line(double slope, double intercept) {
// normalize -0.0/0.0
if (Math.abs(slope - 0.0) < EPSILON) {
this.slope = 0.0;
}
else {
this.slope = slope;
}
// normalize -0.0/0.0
if (Math.abs(intercept - 0.0) < EPSILON) {
this.intercept = 0.0;
}
else {
this.intercept = intercept;
}
}
@Override
public boolean equals(Object other) {
if (other instanceof Line) {
Line otherLine = (Line) other;
if (slope == Double.POSITIVE_INFINITY && otherLine.slope != Double.POSITIVE_INFINITY ||
otherLine.slope == Double.POSITIVE_INFINITY && slope != Double.POSITIVE_INFINITY) {
return false;
}
else if (slope == Double.POSITIVE_INFINITY && otherLine.slope == Double.POSITIVE_INFINITY) {
return String.format(FORMAT, intercept).equals(String.format(FORMAT, otherLine.intercept));
}
else {
return String.format(FORMAT, slope).equals(String.format(FORMAT, otherLine.slope)) &&
String.format(FORMAT, intercept).equals(String.format(FORMAT, otherLine.intercept));
}
}
return false;
}
@Override
public int hashCode() {
if (slope == Double.POSITIVE_INFINITY) {
return String.format(FORMAT, intercept).hashCode();
}
else {
return String.format(FORMAT, slope).hashCode() |
String.format(FORMAT, intercept).hashCode();
}
}
}
public static int numberOfUniqueLines(Point[] points) {
HashSet<Line> lines = new HashSet<Line>();
for (int i = 0; i < points.length; i++) {
for (int j = 0; j < points.length; j++) {
if (i == j) continue;
if (points[i].isEquivalent(points[j])) continue;
double slope, intercept;
// check for vertical line
if (Math.abs((points[j].x - points[i].x)) < EPSILON) {
slope = Double.POSITIVE_INFINITY;
intercept = points[i].x;
}
else {
slope = (points[j].y - points[i].y) / (points[j].x - points[i].x);
intercept = points[i].y - slope * points[i].x;
}
Line line = new Line(slope, intercept);
lines.add(line);
}
}
return lines.size();
}
}
This breaks on:
1. Vertical lines. You'll be dividing by zero!
2. Hashing floating points. Double(0.00001).hashCode() != Double(0.0000100001).hashCode()
SEP = '\\'
PARENT = '..'
CURRENT = '.'
def normalize_path(input):
stack = []
if input.startswith(SEP):
stack.append('') # marker for the initial root directory
input = input[1:]
comps = input.split(SEP)
for comp in comps:
if comp == '':
continue
elif comp == CURRENT:
continue
elif comp == PARENT:
discard = stack.pop()
if discard == '':
raise Exception('Cannot go back further than the root directory!')
else:
stack.append(comp)
return SEP.join(stack)
if __name__ == '__main__':
for path in [
r'\a\b\..\foo.txt',
r'a\b\..\foo.txt',
r'\a\b\..\..\foo.txt',
r'\a\b\c\..\d\..\..\foo.txt',
r'\.\a\b\..\foo.txt',
r'a\.\b\..\foo.txt',
r'\a\b\.\..\..\foo.txt',
r'\a\.\.\b\c\..\d\..\..\.\foo.txt',
]:
print normalize_path(path)
"""
Output:
\a\foo.txt
a\foo.txt
\foo.txt
\a\foo.txt
\a\foo.txt
a\foo.txt
\foo.txt
\a\foo.txt
"""
Using dynamic programming approach, while also saving ALL the numbers that are found following the above constraints:
def solution(distance, digits):
"""Find how many numbers of length n are there such that each digit in such a number
is at least k smaller/greater than the number before and after it.
e.g. If (n == digits == 5) and (k == distance == 4), then, the numbers "39518" and "15951"
are both part of the solution set.
"""
n = digits
k = distance
if (n < 2):
raise ValueError('# of digits must be 2 or above!')
if (distance >= 10):
raise ValueError('Distance must be between 0 and 9!')
# Using a dynamic programming approach:
# let dp be a two-dimensional array following this property:
# dpk[n][p] = the set of numbers with `n` digits and prefix number `p` that satisfy the above constraint
# for a constant `k`.
dpk = {}
# we can construct the solution set for n = 2
for i in range(10): # i is the digit in the tens position
dpk[(2, i)] = set() # initialize dpk[2][i] to empty
for j in range(10): # j is the digit in the ones position
if abs(i - j) < k:
continue
# add all possible two-digit numbers "ij" such that i and j are k distance apart
dpk[(2, i)].add('{:d}{:d}'.format(i, j))
# we now have the base dpk[2][i] for all 1-digit prefixes filled in
# now, iteratively build our solution from n = 3 onward
# here, for 3 <= n <= digits:
#
# dpk[n][p] = Union{ {"pD" for all D in dpk[n-1][i] } for all i such that |i - p| >= k
#
for n in range(3, n+1):
for p in range(10): # build a solution for all prefixes in [0,9]
build = set()
dpk[(n,p)] = build
for i in range(10): # find all possible suffixes with a given prefix
if abs(i - p) < k:
continue
suffixes = dpk[ ((n-1),i) ]
for suffix in suffixes:
build.add('{:d}{:s}'.format(p, suffix))
return dpk
if __name__ == '__main__':
distance = 4
digits = 5
print 'Problem parameters . . .'
print '# of digits: {:d}'.format(digits)
print 'Distance between digits: {:d}'.format(distance)
dpk = solution(distance, digits)
# from the above, only collect dpk[n] where n == digits
keys = [(n, p) for (n, p) in dpk if (n == digits and p != 0)]
# ignoring prefix '0', collect all numbers from each prefix
sol_numbers = set()
for key in keys:
subset = dpk[key]
for n in subset:
sol_numbers.add(int(n))
print
print 'Solution . . .'
#print sol_numbers
print 'There are {:d} such numbers.'.format(len(sol_numbers))
##
## Output:
##
## $ python numdistance.py
## Problem parameters . . .
## # of digits: 5
## Distance between digits: 4
##
## Solution . . .
## There are 3224 such numbers.
##
Indeed. In Python, for instance:
import io
import sys
from collections import deque
class Node(object):
def __init__(self, word, prev=None):
assert word
self._word = word
self._prev = prev
@property
def word(self):
return self._word
@property
def prev(self):
return self._prev
def contains(self, word):
"""Return True if this word is contained in this node,
or in any of its parent nodes.
"""
node = self
while node is not None:
if node.word == word:
return True
node = node.prev
return False
def to_chain(self):
"""Return the (reversed) chain of this word, starting from
this node and followed by its parents, as a list of words.
"""
ret = []
node = self
while node is not None:
ret.append(node.word)
node = node.prev
return ret
def __str__(self):
return str(self.to_chain())
def find_word_ladder(word_set, start, end):
"""Given a set of words (the dictionary, or the universe set of words),
a start word and an end word, return a list that represents a "word ladder",
if any exists.
A word ladder is a series of words, each one is one character off from its
previous.
"""
# first things first, sanitize user input
word_set = { word.upper() for word in word_set }
start = start.upper()
end = end.upper()
if not word_set or not start or not end:
return []
if len(start) != len(end):
return []
if start == end:
return [start,]
# reduce the universe set to only be those words with equal lengths
word_set = { word for word in word_set if len(word) == len(start) }
# remove the starting word
word_set.remove(start)
# now, prepare a BFS scratch spaces
q = deque()
q.append(Node(start))
terminus = None
while q:
node = q.popleft()
# generate all possible descendent words in the remaining universe set
# from this node's word
for word in word_set:
# the "children" of this node involve all words that
# have the "diff/distance" of 1 AND not already existing
# in this node's chain of words
if word == node.word:
continue
if _diff(word, node.word) == 1:
# found a child word of this node; create a child node
# based on this word, and linked to the currently examined node
child = Node(word, node)
if word == end:
# this is the earliest time where we can find the terminus!
terminus = child
break
elif not node.contains(word):
# cycle breaker!
q.append(Node(word, node))
if terminus is not None:
break
if terminus is None:
return []
# we found the terminus node! we can work backward from there to
# reconstruct the word ladder
return [word for word in reversed(terminus.to_chain())]
def _diff(word1, word2):
"""Given two words, return the number of differing characters in each
character position.
"""
assert len(word1) == len(word2)
diff = 0
for i, c in enumerate(word1):
if word2[i] != c:
diff += 1
assert diff != 0
return diff
def main(args):
wordfile, start, end = args[1:]
all_words = set()
# prepare a set (unique) of words from a dictionary file;
# make this set of words case-insensitive by transforming
# the input words to all UPPERCASE
with io.open(wordfile, 'r') as f:
for line in f:
for word in line.split():
all_words.add(word.upper())
ladder = find_word_ladder(all_words, start, end)
print repr(ladder)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))
# e.g.:
# python wordladder.py /usr/share/dict/words FOOL WARM
# ['FOOL', u'WOOL', u'WOOD', u'WORD', u'WARD', u'WARM']
Can't come up with a more efficient solution than the dynamic programming approach, but as an alternative, you can model the problem as a graph problem. More specifically, by building one or more acyclic directed graphs in this manner:
1. For each word, associate it with a graph node.
2. For each word, generate the list of words that can be derived from it based on the problem's rule (i.e. take one character out from each character position). Compare these to the rest of the words. For each match, link the node together, with the parent being the longer word.
3. Once you've passed through every word in (2), you'll have one or more ADGs and a list of nodes from said ADGs, but no knowledge of "root" nodes. But this doesn't matter.
4. For each node, compute its "longest distance to a leaf node"; you can use a cache of nodes:longest-distance to avoid doing duplicate computation for a given node.
The answer is the biggest number you find while iterating through nodes in step (4).
In Java:
import java.util.*;
/**
* Given a list of words, you can select any one word and remove a character from
* it such that it becomes another word in the library. Selected word is then discarded.
* You can then do the same with the new word in the latter.
*
* Now, given an arbitrary library of words, what is the longest chain of words you can
* play this game with?
*
* e.g.:
* For the library of words: ['a', 'aa', 'bb', 'bbc', 'cbbc'], the longest chain is 3 words
* because 'cbbc' -> 'bbc' -> 'bb' is the longest chain you can make out of the input set.
*
* (NOTE) This solution uses acyclic directed graph build-and-search method, and assumes
* or eliminates duplicate words.
*/
public class WordChainADG {
// the list of input words, in no particular order
private List<String> words;
public WordChainADG() {
}
public int longestChain(List<String> words) {
// initialize our scratch spaces
words = new ArrayList<String>(words);
HashMap<String, Node> nodes = new HashMap<String, Node>(); // mapping each word to a node
HashMap<Node, Integer> chainLengths = new HashMap<Node, Integer>(); // mapping each node to its longest chain length
int n = words.size();
// initialize the maps: create node for each word
for (String word : words) {
nodes.put(word, new Node(word));
}
// and a count of longest-chain-length for each node
for (String word : nodes.keySet()) {
Node node = nodes.get(word);
chainLengths.put(node, 0); // 0 indicates not yet determined
}
// build the (ADG) graphs
for (int i = 0; i < n; i++) {
String word = words.get(i);
int wordSize = word.length();
// try removing each character in the current word, and see if it matches any of the shorter words
for (int j = 0; j < wordSize; j++) {
String tempstr = trimCharacterAt(word, j);
Node node = nodes.get(tempstr);
if (node != null) {
// such a word exists! add the associated node to this word's node children
nodes.get(word).addChild(node);
}
}
}
// now we have 1 or more (ADG) graphs whose edges point from longer words to shorter words
// we need to find the maximum path in each graph, and then find the maximum of those maximum paths
int answer = 0;
// each node can be thought of as the root of a tree/ADG; so all we have to do is to find
// the maximum-path-to-a-leaf of each "root" and record the biggest number seen
HashSet<Node> allNodes = new HashSet<Node>(chainLengths.keySet());
for (Node node : allNodes) {
int longestPath = findLongestPath(node, chainLengths);
chainLengths.put(node, longestPath);
if (longestPath > answer) {
answer = longestPath;
}
}
System.out.println(chainLengths);
return answer;
}
// given a string, and an index within that string, take out
// the character on that index, and return the resulting string
private static String trimCharacterAt(String input, int index) {
if (index < 0 || index >= input.length()) {
throw new IndexOutOfBoundsException();
}
char[] astr = new char[input.length() - 1];
for (int i = 0, j = 0; i < input.length(); i++) {
if (i == index)
continue;
astr[j++] = input.charAt(i);
}
return new String(astr);
}
// a simple graph node in an ADG; associated with a word, and
// can have 0-N children (pointees of its outgoing edges)
private class Node {
private String word;
private ArrayList<Node> children;
public Node(String word) {
this.word = word;
children = new ArrayList<Node>();
}
public List<Node> getChildren() {
return children;
}
public void addChild(Node child) {
children.add(child);
}
@Override
public String toString() {
return "<Node(\"" + word + "\")>";
}
}
// given a node, find the longest path to a leaf;
// since we can assume that, starting from `node` the graph is ADG, we don't have to worry
// about cycles; additionally once the longest path of a node is computed, we can update
// this value in the cache given (`longestPaths`) so that the same computation for a given
// node is not repeated
private static int findLongestPath(Node node, Map<Node, Integer> longestPaths) {
int computed = longestPaths.get(node);
if (computed != 0) {
// already determined
return computed;
}
List<Node> children = node.getChildren();
// base case
if (children.size() == 0) {
longestPaths.put(node, 1);
return 1;
}
// recursive case
int childPath = 0;
for (Node child : node.getChildren()) {
int childLongestPath = findLongestPath(child, longestPaths);
if (childLongestPath > childPath) {
childPath = childLongestPath;
}
}
assert (childPath > 0);
longestPaths.put(node, 1 + childPath);
return 1 + childPath;
}
public static void main(String[] args) {
List<String> words = new ArrayList<String>();
Scanner input = new Scanner(System.in);
while (input.hasNext()) {
String word = input.next();
words.add(word);
}
int answer = new WordChainADG().longestChain(words);
System.out.println("Longest chain: " + answer);
}
}
Input: "cbac"
- Santoso.Wijaya September 12, 2015Your program will output: "abc", which is not correct.