One way could be to sort a
, b
first, for each b[i]
, find the closest element in a
, call this a[j_i]
; then throw away the elements smaller than a[j_i]
(i.e. a=a[j_i:]
), repeat for b[i+1]
. Use whatever algorithm you want to find the closest element in a
to a given value:
a = [1, 4, 11, 20, 25]
b = [3, 10, 20]
a_tmp = sorted(a)
# Sort `b` but keep a record of each element's original index.
b_tmp = sorted(enumerate(b), key=lambda x: x[1])
# Initialise an 'empty' output array.
out = [None]*len(b)
for i, v in b_tmp:
# Throw away elements in `a_tmp` smaller than the "current closest element to `v`"
# (`v` is the current element of `b` being considered) since they can't possibly
# be closer to the next larger element in `b` than the
# "current closest element to `v`".
a_tmp = a_tmp[min(enumerate(a_tmp),
key=lambda x: abs(x[1]-v))[0]:]
out[i] = a_tmp[0]
print(out)
The 'brute force' method list(map(lambda y:min(a, key=lambda x:abs(x-y)),b))
(from this answer) will be faster for small a
, b
lists (around len(a)=10
, len(b)=5
) since (I believe) it doesn't carry the overhead of first sorting the input lists.
Timings:
import random
param = 10000
a = [random.randint(-100*param,100*param) for i in range(param)]
b = [random.randint(-100*param,100*param) for i in range(param//100)]
def f(a,b):
return [min(a, key=lambda x:abs(x-y)) for y in b]
def g(a,b):
a = sorted(a)
ib = sorted(enumerate(b), key=lambda x: x[1])
out = [None]*len(b)
for i, b_i in ib:
a = a[min(enumerate(a),key=lambda ia: abs(ia[1]-b_i))[0]:]
out[i] = a[0]
return out
%timeit f(a,b)
%timeit g(a,b)
285 ms ± 26.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
172 ms ± 7.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Using Numpy
import random
import numpy as np
param = 10000
a = [random.randint(-100*param,100*param) for i in range(param)]
b = [random.randint(-100*param,100*param) for i in range(param//100)]
# `f_np()` and `g_np()` expect regular Python lists as arguments and convert them
# to numpy arrays internally, returning the results as Python lists.
def f_np(a,b): # from https://stackoverflow.com/a/52798995/8944057
a = np.array(a)
b = np.array(b)
return [a[np.abs(a - i).argmin()] for i in b]
def g_np(a,b):
a = np.sort(a)
b_idx = np.argsort(b)
out = [None]*len(b)
for i in b_idx:
a = a[np.abs(a - b[i]).argmin():]
out[i] = a[0]
return out
%timeit f_np(a,b)
%timeit g_np(a,b)
3.47 ms ± 105 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.82 ms ± 149 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
is a lot faster, even the equivalent 'brute force' method is many times faster. But if you're going to open up yourself to other libraries then also see this answer.