Use a Trie. There are several python libraries, such as Marisa-trie. Or see this stack overflow answer to create your own How to create a TRIE in Python. Increment a counter each time a new word is added to the Trie.
Here's a simple nested dictionary implementation. It keeps track of the total number of words and the number of each word.
END = 'end'
class Trie:
def __init__(self, words_iterable):
self.root = {}
self.size = 0
for word in iter(words_iterable):
self.insert(word)
def insert(self, word):
current_dict = self.root
for letter in word:
current_dict = current_dict.setdefault(letter, {})
if END not in current_dict:
current_dict[END] = 0
self.size += 1
current_dict[END] += 1
def count(self, word):
current_dict = self.root
for letter in word:
current_dict = current_dict.setdefault(letter, {})
return current_dict.get(END, 0)
def __len__(self):
return self.size
def __str__(self):
return str(self.root)
Examples:
trie = Trie('one two one three four'.split())
trie.insert('four')
print(trie)
>>> {'o': {'n': {'e': {'end': 2}}}, 't': {'w': {'o': {'end': 1}}, 'h': {'r':
{'e': {'e': {'end': 1}}}}}, 'f': {'o': {'u': {'r': {'end': 2}}}}}
len(trie)
>>> 4
trie.count('four')
>>> 2