The solution below might perform better for large lists but might also need more memory due to the sorting step.
The intersection can either be done over a defined sortKey e.g. 'count' or the the hash of the dictionary will be used as suggested by https://stackoverflow.com/a/60765557/1497139. The algorithm sorts the two lists and iterates in parallel checking for the three possible states of the two iterators:
- first iterator behind second -> progress first iterator
- second iterator behind first -> progress second iterator
- both at same position -> found an intersection element
In the given example using the "count" field as a sortKey gives the same reuslt as using the dict hashes as keys.
[{'count': 351, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'}, {'count': 359, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'}]
[{'count': 351, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'}, {'count': 359, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'}]
python unit test
def testIntersection(self):
'''
test creating the intersection of a list of dictionaries
'''
list1 = [{'count': 351, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'},
{'count': 332, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'},
{'count': 336, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'},
{'count': 359, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'},
{'count': 309, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'}]
list2 = [{'count': 359, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'},
{'count': 351, 'evt_datetime': datetime.datetime(2015, 10, 23, 8, 45), 'att_value': 'red'},
{'count': 381, 'evt_datetime': datetime.datetime(2015, 10, 22, 8, 45), 'att_value': 'red'}]
listi=ListOfDict.intersect(list1, list2,'count')
print(listi)
self.assertEquals(2,len(listi))
listi=ListOfDict.intersect(list1, list2)
print(listi)
self.assertEquals(2,len(listi))
ListOfDict
'''
Created on 2020-08-11
@author: wf
'''
class ListOfDict(object):
'''
https://stackoverflow.com/questions/33542997/python-intersection-of-2-lists-of-dictionaries/33543164
'''
@staticmethod
def sortKey(d,key=None):
''' get the sort key for the given dict d with the given key
'''
if key is None:
# https://stackoverflow.com/a/60765557/1497139
return hash(tuple(d.items()))
else:
return d[key]
@staticmethod
def intersect(listOfDict1,listOfDict2,key=None):
'''
get the intersection lf the two lists of Dicts by the given key
'''
i1=iter(sorted(listOfDict1, key=lambda k: ListOfDict.sortKey(k, key)))
i2=iter(sorted(listOfDict2, key=lambda k: ListOfDict.sortKey(k, key)))
c1=next(i1)
c2=next(i2)
lr=[]
while True:
try:
val1=ListOfDict.sortKey(c1,key)
val2=ListOfDict.sortKey(c2,key)
if val1<val2:
c1=next(i1)
elif val1>val2:
c2=next(i2)
else:
lr.append(c1)
c1=next(i1)
c2=next(i2)
except StopIteration:
break
return lr