I have a function to calculate the log gamma function that I am decorating with numba.njit
.
import numpy as np
from numpy import log
from scipy.special import gammaln
from numba import njit
coefs = np.array([
57.1562356658629235, -59.5979603554754912,
14.1360979747417471, -0.491913816097620199,
.339946499848118887e-4, .465236289270485756e-4,
-.983744753048795646e-4, .158088703224912494e-3,
-.210264441724104883e-3, .217439618115212643e-3,
-.164318106536763890e-3, .844182239838527433e-4,
-.261908384015814087e-4, .368991826595316234e-5
])
@njit(fastmath=True)
def gammaln_nr(z):
"""Numerical Recipes 6.1"""
y = z
tmp = z + 5.24218750000000000
tmp = (z + 0.5) * log(tmp) - tmp
ser = np.ones_like(y) * 0.999999999999997092
n = coefs.shape[0]
for j in range(n):
y = y + 1
ser = ser + coefs[j] / y
out = tmp + log(2.5066282746310005 * ser / z)
return out
When I use gammaln_nr
for a large array, say np.linspace(0.001, 100, 10**7)
, my run time is about 7X slower than scipy (see code in appendix below). However, if I run for any individual value, my numba function is always about 2X faster. How is this happening?
z = 11.67
%timeit gammaln_nr(z)
%timeit gammaln(z)
>>> 470 ns ± 29.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
>>> 1.22 µs ± 28.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
My intuition is that if my function is faster for one value, it should be faster for an array of values. Of course, this may not be the case because I don't know whether numba is using SIMD instructions or some other sort of vectorization, whereas scipy may be.
Appendix
import matplotlib.pyplot as plt
import seaborn as sns
n_trials = 8
scipy_times = np.zeros(n_trials)
fastats_times = np.zeros(n_trials)
for i in range(n_trials):
zs = np.linspace(0.001, 100, 10**i) # evaluate gammaln over this range
# dont take first timing - this is just compilation
start = time.time()
gammaln_nr(zs)
end = time.time()
start = time.time()
gammaln_nr(zs)
end = time.time()
fastats_times[i] = end - start
start = time.time()
gammaln(zs)
end = time.time()
scipy_times[i] = end - start
fig, ax = plt.subplots(figsize=(12,8))
sns.lineplot(np.logspace(0, n_trials-1, n_trials), fastats_times, label="numba");
sns.lineplot(np.logspace(0, n_trials-1, n_trials), scipy_times, label="scipy");
ax.set(xscale="log");
ax.set_xlabel("Array Size", fontsize=15);
ax.set_ylabel("Execution Time (s)", fontsize=15);
ax.set_title("Execution Time of Log Gamma");