0

I have a list of 3d points such as

np.array([
    [220, 114, 2000],
    [125.24, 214, 2519],
    ...
    [54.1, 254, 1249]
])

The points are in no meaningful order. I'd like to sort and reshape the array in a way that better represents a coordinate grid (such that I have a known width and height and can retrieve Z values by index). I would also like to down sample the points into say whole integers to handle collisions. Applying min,max, or mean during the down sampling.

I know I can down sample a 1d array using np.mean and np.shape

The approach I'm currently using finds the min and max in X,Y and then puts the Z values into a 2d array while doing the down sampling manually.

This iterates the giant array numerous times and I'm wondering if there is a way to do this with np.meshgrid or some other numpy functionality that I'm overlooking.

Thanks

jdavison
  • 322
  • 1
  • 9

1 Answers1

0

You can use the binning method from Most efficient way to sort an array into bins specified by an index array? To get an index array from y,x coordinates you can use np.searchsorted and np.ravel_multi_index

Here is a sample implementation, stb module is the code from the linked post.

import numpy as np
from stb import sort_to_bins_sparse as sort_to_bins

def grid1D(u, N):
    mn, mx = u.min(), u.max()
    return np.linspace(mn, mx, N, endpoint=False)

def gridify(yxz, N):
    try:
        Ny, Nx = N
    except TypeError:
        Ny = Nx = N
    y, x, z = yxz.T
    yg, xg = grid1D(y, Ny), grid1D(x, Nx)
    yidx, xidx = yg.searchsorted(y, 'right')-1, xg.searchsorted(x, 'right')-1
    yx = np.ravel_multi_index((yidx, xidx), (Ny, Nx))
    zs = sort_to_bins(yx, z)
    return np.concatenate([[0], np.bincount(yx).cumsum()]), zs, yg, xg

def bin(yxz, N, binning_method='min'):
    boundaries, binned, yg, xg = gridify(yxz, N)
    result = np.full((yg.size, xg.size), np.nan)
    if binning_method == 'min':
        result.reshape(-1)[:len(boundaries)-1] = np.minimum.reduceat(binned, boundaries[:-1])
    elif binning_method == 'max':
        result.reshape(-1)[:len(boundaries)-1] = np.maximum.reduceat(binned, boundaries[:-1])
    elif binning_method == 'mean':
        result.reshape(-1)[:len(boundaries)-1] = np.add.reduceat(binned, boundaries[:-1]) / np.diff(boundaries)
    else:
        raise ValueError
    result.reshape(-1)[np.where(boundaries[1:] == boundaries[:-1])] = np.nan
    return result

def test():
    yxz = np.random.uniform(0, 100, (100000, 3))
    N = 20
    boundaries, binned, yg, xg = gridify(yxz, N)
    binmin = bin(yxz, N)
    binmean = bin(yxz, N, 'mean')
    y, x, z = yxz.T
    for i in range(N-1):
        for j in range(N-1):
            msk = (y>=yg[i]) & (y<yg[i+1]) & (x>=xg[j]) & (x<xg[j+1])
            assert (z[msk].min() == binmin[i, j]) if msk.any() else np.isnan(binmin[i, j])
            assert np.isclose(z[msk].mean(), binmean[i, j]) if msk.any() else np.isnan(binmean[i, j])
Paul Panzer
  • 51,835
  • 3
  • 54
  • 99