This is the code am working on and I want the output as the count in descending and if the count is same then order by name.
from collections import Counter
import re
from nltk.corpus import stopwords
import operator
text = "The quick brown fox jumped over the lazy dogs bowl. The dog was angry with the fox considering him lazy."
def tokenize(text):
tokens = re.findall(r"\w+|\S", text.lower())
#print(tokens)
tokens1 = []
for i in tokens:
x = re.findall(r"\w+|\S", i, re.ASCII)
for j in x:
tokens1.append(j)
return tokens
tok = tokenize(text)
punctuations = ['(',')',';',':','[',']',',', '...', '.', '&']
keywords = [word for word in tok if not word in punctuations]
cnt = Counter()
d= {}
for word in keywords:
cnt[word] += 1
print(cnt)
freq = operator.itemgetter(1)
for k, v in sorted(cnt.items(), reverse=True, key=freq):
print("%3d %s" % (v, k))
Current output:
4 the
2 fox
2 lazy
1 quick
1 brown
1 jumped
1 over
1 dogs
1 bowl
1 dog
1 was
1 angry
1 with
1 considering
1 him
Required output:
4 the
2 fox
2 lazy
1 angry
1 bowl
1 brown
1 considering
1 dog
1 dogs
etc.