The sortednp package implements an efficient merge of sorted numpy-arrays:
import numpy as np
import sortednp
a = np.array([1,3,5])
b = np.array([2,4,6])
c = sortednp.merge(a, b) # c == np.array([1,2,3,4,5,6])
Inspired by Sander's post, I measured numpy's mergesort (v1.17.4), Sander's answer, and sortednp (v0.2.1) for different array-sizes and ratios of the sizes between a and b using the following code:
from timeit import timeit as t
import sortednp as snp
import numpy as np
def numpy_mergesort(a, b):
c = np.concatenate((a,b))
c.sort(kind='mergesort')
return c
def sanders_merge(a, b):
if len(a) < len(b):
b, a = a, b
c = np.empty(len(a) + len(b), dtype=a.dtype)
b_indices = np.arange(len(b)) + np.searchsorted(a, b)
a_indices = np.ones(len(c), dtype=bool)
a_indices[b_indices] = False
c[b_indices] = b
c[a_indices] = a
return c
results = []
for size_factor in range(3):
for max_digits in range(3, 8):
size = 10**max_digits
# size difference of a factor 10 here makes the difference!
a = np.arange(size // 10**size_factor, dtype=np.int)
b = np.arange(size, dtype=np.int)
assert np.array_equal(numpy_mergesort(a, b), sanders_merge(a, b))
assert np.array_equal(numpy_mergesort(a, b), snp.merge(a, b))
classic = t(lambda: numpy_mergesort(a, b), number=10)
sanders = t(lambda: sanders_merge(a, b), number=10)
snp_result = t(lambda: snp.merge(a, b), number=10)
results.append((size_factor, max_digits, classic, sanders, snp_result))
text_format = " ".join(["{:<18}"] * 5)
print(text_format.format("log10(size factor)", "log10(max size)", "np mergesort", "Sander's merge", "sortednp"))
table_format = " ".join(["{:.5f}"] * 5)
for result in results:
print(table_format.format(*result))
The results show that sortednp consistently is the fastest implementation:
log10(size factor) log10(max size) np mergesort Sander's merge sortednp
0.00000 3.00000 0.00016 0.00062 0.00005
0.00000 4.00000 0.00135 0.00469 0.00029
0.00000 5.00000 0.01160 0.03813 0.00292
0.00000 6.00000 0.14952 0.54160 0.03527
0.00000 7.00000 2.00566 5.91691 0.67119
1.00000 3.00000 0.00005 0.00017 0.00002
1.00000 4.00000 0.00019 0.00058 0.00014
1.00000 5.00000 0.00304 0.00633 0.00137
1.00000 6.00000 0.03743 0.06893 0.01828
1.00000 7.00000 0.62334 1.01523 0.38732
2.00000 3.00000 0.00004 0.00015 0.00002
2.00000 4.00000 0.00012 0.00028 0.00013
2.00000 5.00000 0.00217 0.00275 0.00122
2.00000 6.00000 0.03457 0.03205 0.01524
2.00000 7.00000 0.51307 0.50120 0.34335