I know this is an old question, but I was recently working on a similar thing where performance is critical and so I experimented extensively with timing. I hope my findings will be beneficial to the community.
Jaime's solution based on np.unique
is the fastest algorithm possible in Python, but with one caveat: the indices are not ordered (because numpy uses quicksort
by default) and the result is different from OP's original algorithm (called hereafter naive). Using the stable
option fixes it, but it slows things down a bit.
The naive method can be improved using Python's built-in array
module like this:
import array
from collections import defaultdict
a = np.array(...) # 1D, int array
d = defaultdict(lambda: array.array("L"))
alist = array.array("L")
alist.frombytes(a.tobytes())
for n in range(len(alist)):
d[alist[n]].append(n)
It's just fractions slower than Jaime's solution with stable sort.
Here's some testing done on my platform with Python 3
Best of 5
Naive method: 0.21274029999999988 s
Naive improved: 0.13265090000000002 s
Unique quick: 0.073496 s
Unique stable: 0.1235801999999997 s
The results from the naive method, naive improved, and unique stable are dictionaries with sorted lists of indices. Indices from unique quick are not sorted.
The benchmark code
import array
import timeit
from collections import defaultdict
import numpy as np
def count_naive(a):
d = defaultdict(list)
for n, e in enumerate(a):
d[e].append(n)
return dict(d)
def count_improved(a):
d = defaultdict(lambda: array.array("L"))
alist = array.array("L")
alist.frombytes(a.tobytes())
for n in range(len(alist)):
d[alist[n]].append(n)
return {n: indices.tolist() for n, indices in d.items()}
def count_unique(a):
sorted_idx = np.argsort(a) # , kind='stable')
counts = np.bincount(a)
split_idx = np.split(sorted_idx, np.cumsum(counts[:-1]))
return {n: indices.tolist() for n, indices in enumerate(split_idx)}
def count_stable(a):
sorted_idx = np.argsort(a, kind="stable")
counts = np.bincount(a)
split_idx = np.split(sorted_idx, np.cumsum(counts[:-1]))
return {n: indices.tolist() for n, indices in enumerate(split_idx)}
a = np.random.randint(1000, size=1000000)
trials = 5
t_naive = timeit.repeat("count_naive(a)", globals=globals(), repeat=trials, number=1)
t_improved = timeit.repeat("count_improved(a)", globals=globals(), repeat=trials, number=1)
t_unique = timeit.repeat("count_unique(a)", globals=globals(), repeat=trials, number=1)
t_stable = timeit.repeat("count_stable(a)", globals=globals(), repeat=trials, number=1)
print(f"Best of {trials}")
print(f"Naive method: {min(t_naive)} s")
print(f"Naive improved: {min(t_improved)} s")
print(f"Unique quick: {min(t_unique)} s")
print(f"Unique stable: {min(t_stable)} s")
N.B. All functions are written in a way that they all return Dict[int, list]
so the results can be directly compared.