If you are currently using basic loops to perform your calculations, it is likely you can get much improved performance without changing your algorithm, just through restructuring how you are doing the calculations.
I have run a series of benchmarks to illustrate how different approaches impact performance. For simplicity, I have set up these benchmarks using simple x/y coordinates rather than the Haversine formula or something equivalent, but the same principles apply.
There are two key ideas in the benchmark progression. Firstly, where possible simplify the calculation: the fewer operations the less time it will take. Secondly, move loop calculations into numpy which has a much lower overhead performing vector operations.
The performance of the different approaches:
Method |
Time |
Relative |
Basic loop method |
45.039 |
22.68 |
Checking against r squared removes square root operation |
28.941 |
14.58 |
Using list comprehension speeds construction of list |
26.190 |
13.19 |
Using 1d arrays means loops are performed in C vector operations |
1.986 |
1.00 |
Using 2d numpy arrays can help with memory locality, but not in this case |
3.452 |
1.74 |
These results come from the following test code:
import numpy as np
import random
import timeit
random.seed(1)
count_a = 10000000
r = 2.5
r_squared = r * r
a_tuples = [(random.uniform(0, 10), random.uniform(0, 10)) for _ in range(count_a)]
b = (random.uniform(4, 8), random.uniform(4, 8))
# tuple of 1d numpy arrays
a_arrs = (np.array([v[0] for v in a_tuples]), np.array([v[1] for v in a_tuples]))
# 2d numpy arrays
a_2d = np.array(a_tuples)
b_arr = np.array(b)
def method0():
"""Basic loop method"""
out = []
for x, y in a_tuples:
dx = x - b[0]
dy = y - b[1]
distance = ((dx * dx) + (dy * dy)) ** .5
if distance <= r:
out.append((x, y))
return out
def method1():
"""Checking against r squared removes square root operation"""
out = []
for x, y in a_tuples:
dx = x - b[0]
dy = y - b[1]
if ((dx * dx) + (dy * dy)) <= r_squared:
out.append((x, y))
return out
def method2():
"""Using list comprehension speeds construction of list"""
return [(x, y) for x, y in a_tuples if (((dx := x - b[0]) * dx) + ((dy := y - b[1]) * dy)) <= r_squared]
def method3():
"""Using 1d arrays means loops are performed in C vector operations"""
x, y = a_arrs
dx = x - b[0]
dy = y - b[1]
return np.nonzero(((dx * dx) + (dy * dy)) <= r_squared)[0]
def method4():
"""Using 2d numpy arrays can help with memory locality, but not in this case"""
a_b = a_2d - b
a_b2 = a_b * a_b
sum_sq = np.sum(a_b2, axis=1)
return np.nonzero(sum_sq <= r_squared)[0]
m0 = method0()
assert m0 == method1()
assert m0 == method2()
assert m0 == [(a_arrs[0][i], a_arrs[1][i]) for i in method3()]
assert m0 == [(a_2d[i][0], a_2d[i][1]) for i in method4()]
number = 10
t0, t1, t2, t3 = None, None, None, None
t0 = timeit.timeit(lambda: method0(), number=number)
t1 = timeit.timeit(lambda: method1(), number=number)
t2 = timeit.timeit(lambda: method2(), number=number)
t3 = timeit.timeit(lambda: method3(), number=number)
t4 = timeit.timeit(lambda: method4(), number=number)
tmin = min((t0, t1, t2, t3, t4))
print(f'| Method | Time | Relative |')
print(f'|------------------ |------ |--------------- |')
print(f'| {method0.__doc__} | {t0:.3f} | {t0 / tmin:.2f} |')
print(f'| {method1.__doc__} | {t1:.3f} | {t1 / tmin:.2f} |')
print(f'| {method2.__doc__} | {t2:.3f} | {t2 / tmin:.2f} |')
print(f'| {method3.__doc__} | {t3:.3f} | {t3 / tmin:.2f} |')
print(f'| {method4.__doc__} | {t4:.3f} | {t4 / tmin:.2f} |')