Profiling 1:
When there are 100 connected components and the image size is 2000x2000, finding the centroid is the slowest step. The whole program takes 28 seconds to run on a laptop.

from skimage import measure
from skimage import filters
import numpy as np
import cProfile
def make_blobs(size=256, n_blobs=12):
np.random.seed(1)
im = np.zeros((size, size))
points = size * np.random.random((2, n_blobs ** 2))
im[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1
im = filters.gaussian(im, sigma=size / (4. * n_blobs))
blobs = im > 0.7 * im.mean()
return blobs
def faster_centroid(img):
s = 1 / np.mean(img)
shape = img.shape
x_coords = np.arange(shape[0])
y_coords = np.arange(shape[1])
x_mean = np.mean(img * x_coords[:, np.newaxis]) * s
y_mean = np.mean(img * y_coords[np.newaxis, :]) * s
return x_mean, y_mean
def label_blobs(blobs):
all_labels = measure.label(blobs)
blobs_labels = measure.label(blobs, background=0)
return all_labels, blobs_labels
def find_all_centroids(all_labels):
max_ix = np.max(all_labels)
centroid_list = []
for i in range(max_ix + 1):
centroid = faster_centroid(all_labels == i)
centroid_list.append(centroid)
return centroid_list
def main():
blobs = make_blobs(2000, n_blobs=100)
# Label connected regions of an integer array.
all_labels, blobs_labels = label_blobs(blobs)
print(all_labels)
all_centroids = find_all_centroids(all_labels)
print(all_centroids)
cProfile.run("main()", "results.cprofile")
Profiling 2:
[['<function get_centroids1 at 0x7f0027ba6280>', 1.3774937389971456],
['<function get_centroids2 at 0x7f0027ba6310>', 2.308947408993845],
['<function get_centroids3 at 0x7f0027ba63a0>', 0.695534451995627]]
4.262 main red3.py:61
├─ 2.245 get_centroids2 red3.py:36
│ ├─ 1.258 [self]
│ └─ 0.954 mean <__array_function__ internals>:2
│ [5 frames hidden] <__array_function__ internals>, numpy...
│ 0.954 ufunc.reduce <built-in>:0
├─ 1.334 get_centroids1 red3.py:25
│ ├─ 1.031 where <__array_function__ internals>:2
│ │ [3 frames hidden] <__array_function__ internals>, <buil...
│ │ 1.031 implement_array_function <built-in>:0
│ ├─ 0.188 [self]
│ └─ 0.080 mean <__array_function__ internals>:2
│ [5 frames hidden] <__array_function__ internals>, numpy...
└─ 0.683 get_centroids3 red3.py:51
├─ 0.333 <dictcomp> red3.py:57
├─ 0.233 nonzero <__array_function__ internals>:2
│ [5 frames hidden] <__array_function__ internals>, numpy...
└─ 0.048 [self]
from skimage import measure
from skimage import filters
import numpy as np
#import cProfile
from pyinstrument import Profiler
import timeit
def make_blobs(size=256, n_blobs=12):
np.random.seed(1)
im = np.zeros((size, size))
points = size * np.random.random((2, n_blobs ** 2))
im[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1
im = filters.gaussian(im, sigma=size / (4. * n_blobs))
blobs = im > 0.7 * im.mean()
return blobs
def label_blobs(blobs):
all_labels = measure.label(blobs)
blobs_labels = measure.label(blobs, background=0)
return all_labels, blobs_labels
def get_centroids1(all_labels):
n_blobs = np.max(all_labels) + 1
centroid_list = []
for i in range(n_blobs):
locations = np.where(all_labels == i)
x_avg = np.mean(locations[1])
y_avg = np.mean(locations[0])
centroid_list.append([x_avg, y_avg])
return centroid_list
def get_centroids2(all_labels):
n_blobs = np.max(all_labels) + 1
centroid_list = []
for i in range(n_blobs):
img = (all_labels == i)
s = 1 / np.mean(img)
shape = img.shape
x_coords = np.arange(shape[0])
y_coords = np.arange(shape[1])
x_mean = np.mean(img * x_coords[:, np.newaxis]) * s
y_mean = np.mean(img * y_coords[np.newaxis, :]) * s
centroid_list.append([x_mean, y_mean])
return centroid_list
def get_centroids3(x):
# https://stackoverflow.com/questions/32748950/
n_blobs = np.max(x) + 1
nz = np.nonzero(x)
coords = np.column_stack(nz)
nzvals = x[nz[0], nz[1]]
res = {k: coords[nzvals == k] for k in range(1, n_blobs + 1)}
return res
def main():
f_list = [get_centroids1, get_centroids2, get_centroids3]
blobs = make_blobs(2000, n_blobs=5)
# Label connected regions of an integer array.
all_labels, blobs_labels = label_blobs(blobs)
profiler = Profiler()
profiler.start()
timings = []
for f in f_list:
s = timeit.default_timer()
for i in range(10):
r = f(all_labels)
e = timeit.default_timer()
print(r)
timings.append([str(f), e - s])
print(timings)
profiler.stop()
print(profiler.output_text(unicode=True, color=True))
main()