I have 2 versions of a function that appends a row to a 2d array; one in Cython and another in Numba.
The performance of the Cython version is a lot slower than the Numba version. I would like to optimise the Cython version so that it performs atleast as well as the Numba version.
I am timing the code with this timer.py
modules:
import time
class Timer(object):
def __init__(self, name='', output=print):
self._name = name
self._output = output
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, a, b, c):
self.end = time.time()
self.time_taken = self.end - self.start
self._output('%s Took %0.2fs seconds' % (self._name, self.time_taken))
My append_2d_cython.pyx
module is:
#!python
#cython: boundscheck=False
#cython: wraparound=False
import numpy as np
cimport numpy as cnp
cnp.import_array() # needed to initialize numpy-API
cpdef empty_2d(int d1, int d2):
cdef:
cnp.npy_intp[2] dimdim
dimdim[0] = d1
dimdim[1] = d2
return cnp.PyArray_SimpleNew(2, dimdim, cnp.NPY_INT32)
cpdef append_2d(int[:, :] arr, int[:] value):
cdef int[:, :] result
result = empty_2d(arr.shape[0]+1, arr.shape[1])
result[:arr.shape[0], :] = arr
result[arr.shape[0], :] = value
return result
My append_2d_numba.py
module is:
import numba as nb
import numpy as np
@nb.jit(nopython=True)
def append_2d(arr, value):
result = np.empty((arr.shape[0]+1, arr.shape[1]), dtype=arr.dtype)
result[:-1] = arr
result[-1] = value
return result
I am comparing the Numba and Cython versions of append_2d
with this script:
import pyximport
import numpy as np
pyximport.install(setup_args={'include_dirs': np.get_include()})
from timer import Timer
from append_2d_cython import append_2d as append_2d_cython
from append_2d_numba import append_2d as append_2d_numba
arr_2d = np.random.randint(0, 100, size=(5, 4), dtype=np.int32)
arr_1d = np.array([0, 1, 2, 3], np.int32)
num_tests = 100000
with Timer('append_2d_cython'):
for _ in range(num_tests):
r_cython = append_2d_cython(arr_2d, arr_1d)
# # JIT Compile it
append_2d_numba(arr_2d, arr_1d)
with Timer('append_2d_numba'):
for _ in range(num_tests):
r_numba = append_2d_numba(arr_2d, arr_1d)
Which prints:
make many with cython Took 0.36s seconds
make many with numba Took 0.12s seconds
So, for this code, numba is 3 times faster than Cython. I would like to refactor the Cython code to be atleast as fast as the Numba code. How can I do that?