def get_word_frequencys(words):
"""given a list of words, returns a dictionary of the words,
and their frequencys"""
words_and_freqs = {}
for word in words:
words_and_freqs[word] = words.count(word)
return words_and_freqs
The above function works fine for small files, however, I need it to work on a file 264505 words long, currently, my program takes several minutes for files this size.
How can I construct a dictionary in a more efficient way?
all relevent code:
def main(words):
"""
given lots of words do things
"""
words_and_frequencys = get_word_frequencys(words)
print("loaded ok.")
print()
print_max_frequency(words, words_and_frequencys)
def get_word_frequencys(words):
"""given a list of words, returns a dictionary of the words,
and their frequencys"""
words_and_freqs = {}
for word in words:
words_and_freqs[word] = words.count(word)
return words_and_freqs
def print_max_frequency(words, words_and_frequencys):
"""given a dict of words and their frequencys,
prints the max frequency of any one word"""
max_frequency = 0
for word in words:
if words_and_frequencys.get(word) > max_frequency:
max_frequency = words_and_frequencys.get(word)
print(" " + "Maximum frequency = {}".format(max_frequency))
note for those suggesting Counter instead of Count(), I'm not allowed to import any modules apart from os and re.