Scorelife_disct={'scorelife41': ['c', 'hindi', 'sql', 'scala', 'love'],
'scorelife42': ['c', 'sql', 'english', 'Cat', 'html', 'cPlus', 'love'],
'scorelife43': ['c', 'Cat', 'friend', 'love']}
User_life_disct ={'scorelifeLife1': ['c', 'hindi', 'python', 'scala', 'graphics'],
'scorelifeLife10': ['c', 'hindi', 'perl'],
'scorelifeLife11': ['hindi', 'perl', 'spark']}
import collections
d = collections.defaultdict(dict)
from __future__ import division
for userid in Scorelife_disct:
#print userid
for life_disct in User_life_disct:
u1= Scorelife_disct[userid]
u2= User_life_disct[life_disct]
k1=len(set(u1)&set(u2))/len(set(u1)|set(u2))
#print life_disct
#print k1
d[userid][life_disct] = k1
print d
dict(d)
Output:
{' scorelife41': {' scorelifeLife1': 0.42857142857142855,
' scorelifeLife10': 0.3333333333333333,
' scorelifeLife11': 0.14285714285714285 }}
I am using two python dictionary with Jaccard similarity between each list of dictionary, but my program is taking too much time for huge amount of data. How do I reduce the time complexity problem (although the output is correct) so that it works without taking not much time?