You can use np.unique
with a bit of trickery:
import numpy as np
def safe_method(image, k):
# a bit of black magic to make np.unique handle triplets
out = np.zeros(image.shape[:-1], dtype=np.int32)
out8 = out.view(np.int8)
# should really check endianness here
out8.reshape(image.shape[:-1] + (4,))[..., 1:] = image
uniq, map_ = np.unique(out, return_inverse=True)
assert uniq.size == k
map_.shape = image.shape[:-1]
# map_ contains the desired result. However, order of colours is most
# probably different from original
colours = uniq.view(np.uint8).reshape(-1, 4)[:, 1:]
return colours, map_
However, if the number of pixels is much larger than the number of colours,
the following heuristic algorithm may deliver huge speedups.
It tries to find a cheap hash function (such as only looking at the red channel) and if it succeds it uses that to create a lookup table. If not it falls back to the above safe method.
CHEAP_HASHES = [lambda x: x[..., 0], lambda x: x[..., 1], lambda x: x[..., 2]]
def fast_method(image, k):
# find all colours
chunk = int(4 * k * np.log(k)) + 1
colours = set()
for chunk_start in range(0, image.size // 3, chunk):
colours |= set(
map(tuple, image.reshape(-1,3)[chunk_start:chunk_start+chunk]))
if len(colours) == k:
break
colours = np.array(sorted(colours))
# find hash method
for method in CHEAP_HASHES:
if len(set(method(colours))) == k:
break
else:
safe_method(image, k)
# create lookup table
hashed = method(colours)
# should really provide for unexpected colours here
lookup = np.empty((hashed.max() + 1,), int)
lookup[hashed] = np.arange(k)
return colours, lookup[method(image)]
Testing and timings:
from timeit import timeit
def create_image(k, M, N):
colours = np.random.randint(0, 256, (k, 3)).astype(np.uint8)
map_ = np.random.randint(0, k, (M, N))
image = colours[map_, :]
return colours, map_, image
k, M, N = 12, 1000, 1000
colours, map_, image = create_image(k, M, N)
for f in fast_method, safe_method:
print('{:16s} {:10.6f} ms'.format(f.__name__, timeit(
lambda: f(image, k), number=10)*100))
rec_colours, rec_map_ = f(image, k)
print('solution correct:', np.all(rec_colours[rec_map_, :] == image))
Sample output (12 colours, 1000x1000 pixels):
fast_method 3.425885 ms
solution correct: True
safe_method 73.622813 ms
solution correct: True