Instead of turning the dictionary into a full graph, use something with a little less structure:
For each word
in the dictionary, you get a shortened_word
by deleting character number i
for each i
in len(word)
. Map the pair (shortened_word, i)
to a list of all the word
s.
This helps looking up all words with one replaced letter (because they must be in the same (shortened_word, i)
bin for some i
, and words with one more letter (because they must be in some (word, i)
bin for some i
.
The Python code:
from collections import defaultdict, deque
from itertools import chain
def shortened_words(word):
for i in range(len(word)):
yield word[:i] + word[i + 1:], i
def prepare_graph(d):
g = defaultdict(list)
for word in d:
for short in shortened_words(word):
g[short].append(word)
return g
def walk_graph(g, d, start, end):
todo = deque([start])
seen = {start: None}
while todo:
word = todo.popleft()
if word == end: # end is reachable
break
same_length = chain(*(g[short] for short in shortened_words(word)))
one_longer = chain(*(g[word, i] for i in range(len(word) + 1)))
one_shorter = (w for w, i in shortened_words(word) if w in d)
for next_word in chain(same_length, one_longer, one_shorter):
if next_word not in seen:
seen[next_word] = word
todo.append(next_word)
else: # no break, i.e. not reachable
return None # not reachable
path = [end]
while path[-1] != start:
path.append(seen[path[-1]])
return path[::-1]
And the usage:
dictionary = ispell_dict # list of 47158 words
graph = prepare_graph(dictionary)
print(" -> ".join(walk_graph(graph, dictionary, "hands", "feet")))
print(" -> ".join(walk_graph(graph, dictionary, "brain", "game")))
Output:
hands -> bands -> bends -> bents -> beets -> beet -> feet
brain -> drain -> drawn -> dawn -> damn -> dame -> game
A word about speed: building the 'graph helper' is fast (1 second), but hands -> feet takes 14 seconds, and brain --> game takes 7 seconds.
Edit: If you need more speed, you can try using a graph or network library. Or you actually build the full graph (slow) and then find paths much faster. This mostly consists of moving the look-up of edges from the walking function to the graph-building function:
def prepare_graph(d):
g = defaultdict(list)
for word in d:
for short in shortened_words(word):
g[short].append(word)
next_words = {}
for word in d:
same_length = chain(*(g[short] for short in shortened_words(word)))
one_longer = chain(*(g[word, i] for i in range(len(word) + 1)))
one_shorter = (w for w, i in shortened_words(word) if w in d)
next_words[word] = set(chain(same_length, one_longer, one_shorter))
next_words[word].remove(word)
return next_words
def walk_graph(g, start, end):
todo = deque([start])
seen = {start: None}
while todo:
word = todo.popleft()
if word == end: # end is reachable
break
for next_word in g[word]:
if next_word not in seen:
seen[next_word] = word
todo.append(next_word)
else: # no break, i.e. not reachable
return None # not reachable
path = [end]
while path[-1] != start:
path.append(seen[path[-1]])
return path[::-1]
Usage: Build the graph first (slow, all timings on some i5 laptop, YMMV).
dictionary = ispell_dict # list of 47158 words
graph = prepare_graph(dictionary) # more than 6 minutes!
Now find the paths (much faster than before, times without printing):
print(" -> ".join(walk_graph(graph, "hands", "feet"))) # 10 ms
print(" -> ".join(walk_graph(graph, "brain", "game"))) # 6 ms
print(" -> ".join(walk_graph(graph, "tampering", "crunchier"))) # 25 ms
Output:
hands -> lands -> lends -> lens -> lees -> fees -> feet
brain -> drain -> drawn -> dawn -> damn -> dame -> game
tampering -> tapering -> capering -> catering -> watering -> wavering -> havering -> hovering -> lovering -> levering -> leering -> peering -> peeping -> seeping -> seeing -> sewing -> swing -> swings -> sings -> sines -> pines -> panes -> paces -> peaces -> peaches -> beaches -> benches -> bunches -> brunches -> crunches -> cruncher -> crunchier