Here is a comparison of three methods. The with_intersection
method allows for repeated values within each list, the other two do not. The test considers two sorted lists, each with one million distinct integers.
The using_sorted
method takes advantage of the fact that both lists are sorted and does not use sets. At the same time, it is the slowest, most verbose and error-prone.
import numpy as np # only for data generation
lst1 = np.random.randint(1, 20, 10**6).cumsum().tolist()
lst2 = np.random.randint(1, 20, 10**6).cumsum().tolist()
def with_intersection(lst1, lst2):
common = set(lst1).intersection(lst2)
res1 = [x for x in lst1 if x not in common]
res2 = [x for x in lst2 if x not in common]
return res1, res2
def set_then_sort(foo, bar):
_foo = set(foo)
_bar = set(bar)
in_foo = _foo - _bar
in_bar = _bar - _foo
return sorted(in_foo), sorted(in_bar)
def using_sorted(lst1, lst2):
res1 = list()
res2 = list()
n1 = len(lst1)
n2 = len(lst2)
i = j = 0
while True:
while i < n1 and j < n2 and lst1[i] < lst2[j]:
res1.append(lst1[i])
i += 1
while j < n2 and i < n1 and lst1[i] > lst2[j]:
res2.append(lst2[j])
j += 1
while i < n1 and j < n2 and lst1[i] == lst2[j]:
i += 1
j += 1
if i == n1:
res2.extend(lst2[j:])
break
elif j == n2:
res1.extend(lst1[i:])
break
return res1, res2
assert with_intersection(lst1, lst2) == set_then_sort(lst1, lst2) == using_sorted(lst1, lst2)
# %timeit with_intersection(lst1, lst2) # 306 ms
# %timeit set_then_sort(lst1, lst2) # 491 ms
# %timeit using_sorted(lst1, lst2) # 870 ms