You can use the binning method from Most efficient way to sort an array into bins specified by an index array?
To get an index array from y,x
coordinates you can use np.searchsorted
and np.ravel_multi_index
Here is a sample implementation, stb module is the code from the linked post.
import numpy as np
from stb import sort_to_bins_sparse as sort_to_bins
def grid1D(u, N):
mn, mx = u.min(), u.max()
return np.linspace(mn, mx, N, endpoint=False)
def gridify(yxz, N):
try:
Ny, Nx = N
except TypeError:
Ny = Nx = N
y, x, z = yxz.T
yg, xg = grid1D(y, Ny), grid1D(x, Nx)
yidx, xidx = yg.searchsorted(y, 'right')-1, xg.searchsorted(x, 'right')-1
yx = np.ravel_multi_index((yidx, xidx), (Ny, Nx))
zs = sort_to_bins(yx, z)
return np.concatenate([[0], np.bincount(yx).cumsum()]), zs, yg, xg
def bin(yxz, N, binning_method='min'):
boundaries, binned, yg, xg = gridify(yxz, N)
result = np.full((yg.size, xg.size), np.nan)
if binning_method == 'min':
result.reshape(-1)[:len(boundaries)-1] = np.minimum.reduceat(binned, boundaries[:-1])
elif binning_method == 'max':
result.reshape(-1)[:len(boundaries)-1] = np.maximum.reduceat(binned, boundaries[:-1])
elif binning_method == 'mean':
result.reshape(-1)[:len(boundaries)-1] = np.add.reduceat(binned, boundaries[:-1]) / np.diff(boundaries)
else:
raise ValueError
result.reshape(-1)[np.where(boundaries[1:] == boundaries[:-1])] = np.nan
return result
def test():
yxz = np.random.uniform(0, 100, (100000, 3))
N = 20
boundaries, binned, yg, xg = gridify(yxz, N)
binmin = bin(yxz, N)
binmean = bin(yxz, N, 'mean')
y, x, z = yxz.T
for i in range(N-1):
for j in range(N-1):
msk = (y>=yg[i]) & (y<yg[i+1]) & (x>=xg[j]) & (x<xg[j+1])
assert (z[msk].min() == binmin[i, j]) if msk.any() else np.isnan(binmin[i, j])
assert np.isclose(z[msk].mean(), binmean[i, j]) if msk.any() else np.isnan(binmean[i, j])