I'm working on a large code and I find myself in the need to speed up a specific bit of it. I've created a MWE
shown below:
import numpy as np
import time
def random_data(N):
# Generate some random data.
return np.random.uniform(0., 10., N).tolist()
# Lists that contain all the data.
list1 = [random_data(10) for _ in range(1000)]
list2 = [random_data(1000), random_data(1000)]
# Start taking the time.
tik = time.time()
list4 = []
# Loop through all elements in list1.
for elem in list1:
list3 = []
# Loop through elements in list2.
for elem2 in zip(*list2):
A = np.exp(-0.5*((elem[0]-elem2[0])/elem[3])**2)
B = np.exp(-0.5*((elem[1]-elem2[1])/elem[3])**2)
list3.append(A*B)
# Sum elements in list3 and append result to list4.
sum_list3 = sum(list3) if sum(list3)>0. else 1e-06
list4.append(sum_list3)
# Print the elapsed time.
print time.time()-tik
The weird format of list1
and list2
is because that's how this block of code receives them.
The obvious part where most of the time is spent is in the recursive calculation of the A
and B
terms.
Is there any way I could speed up this block of code without having to parallelize it (I've tried it before and it gave me a lot of troubles)? I'm open to use any package, numpy
, scipy
, etc..
Add
This is the result of applying abarnert's optimizations and also Jaime's advise to make only one exponentiation. The optimized function is on average ~60x faster on my system.
import numpy as np
import timeit
def random_data(N):
return np.random.uniform(0., 10., N).tolist()
# Lists that contain all the data.
list1 = [random_data(10) for _ in range(1000)]
list2 = [random_data(1000), random_data(1000)]
array1 = np.array(list1)
array2 = np.array(zip(*list2))
# Old non-optimezed function.
def func1():
list4 = []
# Process all elements in list1.
for elem in list1:
# Process all elements in list2.
list3 = []
for elem2 in zip(*list2):
A = np.exp(-0.5*((elem[0]-elem2[0])/elem[3])**2)
B = np.exp(-0.5*((elem[1]-elem2[1])/elem[3])**2)
list3.append(A*B)
# Sum elements in list3 and append result to list4.
sum_list3 = sum(list3) if sum(list3)>0. else 1e-06
list4.append(sum_list3)
# New optimized function.
def func2():
list4 = []
# Process all elements in list1.
for elem in array1:
# Broadcast over elements in array2.
A = -0.5*((elem[0]-array2[:,0])/elem[3])**2
B = -0.5*((elem[1]-array2[:,1])/elem[3])**2
array3 = np.exp(A+B)
# Sum elements in array3 and append result to list4.
sum_list3 = max(array3.sum(), 1e-10)
list4.append(sum_list3)
# Get time for both functions.
func1_time = timeit.timeit(func1, number=10)
func2_time = timeit.timeit(func2, number=10)
# Print hom many times faster func2 is versus func1.
print func1_time/func2_time