It's important to separate aesthetics from performance. Sometimes ugly code is
fast. In fact, that's the case here. Although creating an empty array and then
assigning values to slices may not look beautiful, it is fast.
import numpy as np
import timeit
import itertools as IT
import pandas as pd
def using_empty(x):
x = np.asarray(x)
result = np.empty(x.shape + (2,))
result[..., 0] = x
result[..., 1] = np.log(x)
return result
def using_concat(x):
x = np.asarray(x)
return np.concatenate([x, np.log(x)], axis=-1).reshape(x.shape+(2,), order='F')
def using_stack(x):
x = np.asarray(x)
return np.stack([x, np.log(x)], axis=x.ndim)
def using_ufunc(x):
return np.array([x, np.log(x)])
using_ufunc = np.vectorize(using_ufunc, otypes=[np.ndarray])
tests = [np.arange(600),
np.arange(600).reshape(20,30),
np.arange(960).reshape(8,15,8)]
# check that all implementations return the same result
for x in tests:
assert np.allclose(using_empty(x), using_concat(x))
assert np.allclose(using_empty(x), using_stack(x))
timing = []
funcs = ['using_empty', 'using_concat', 'using_stack', 'using_ufunc']
for test, func in IT.product(tests, funcs):
timing.append(timeit.timeit(
'{}(test)'.format(func),
setup='from __main__ import test, {}'.format(func), number=1000))
timing = pd.DataFrame(np.array(timing).reshape(-1, len(funcs)), columns=funcs)
print(timing)
yields, the following timeit results on my machine:
using_empty using_concat using_stack using_ufunc
0 0.024754 0.025182 0.030244 2.414580
1 0.025766 0.027692 0.031970 2.408344
2 0.037502 0.039644 0.044032 3.907487
So using_empty
is the fastest (of the options tested applied to tests
).
Note that np.stack
does exactly what you want, so
np.stack([x, np.log(x)], axis=x.ndim)
looks reasonably pretty, but it is also the slowest of the three options tested.
Note that along with being much slower, using_ufunc
returns an array of object dtype:
In [236]: x = np.arange(6)
In [237]: using_ufunc(x)
Out[237]:
array([array([ 0., -inf]), array([ 1., 0.]),
array([ 2. , 0.69314718]),
array([ 3. , 1.09861229]),
array([ 4. , 1.38629436]), array([ 5. , 1.60943791])], dtype=object)
which is not the same as the desired result:
In [240]: using_empty(x)
Out[240]:
array([[ 0. , -inf],
[ 1. , 0. ],
[ 2. , 0.69314718],
[ 3. , 1.09861229],
[ 4. , 1.38629436],
[ 5. , 1.60943791]])
In [238]: using_ufunc(x).shape
Out[238]: (6,)
In [239]: using_empty(x).shape
Out[239]: (6, 2)