I need to quickly process a huge two-dimensional array and have already pre-marked the required data.
array([[ 0., 1., 2., 3., 4., 5. , 6. , 7.],
[ 6., 7., 8., 9., 10., 4.2, 4.3, 11.],
[ 12., 13., 14., 15., 16., 4.2, 4.3, 17.],
[ 18., 19., 20., 21., 22., 4.2, 4.3, 23.]])
array([[False, True, True, True, False, True, True , False],
[False, False, False, True, True, True, True , False],
[False, False, True, True, False, False, False, False],
[False, True, True, False, False, False, True , True ]])
I expect to sum up the data of the markers in each row of the array.But np.cumsum can't do this, I need solutions or good ideas, thanks
Expected output:
array([[ 0., 1., 3., 6., 0., 5. , 11. , 0.],
[ 0., 0., 0., 9., 19., 23.2, 27.5, 0.],
[ 0., 0., 14., 29., 0., 0, 0, 0.],
[ 0., 19., 39., 0., 0., 0, 4.3, 27.3]])
The difficulty of the solution is that each fragment cannot contain the result of the previous fragment
def mask_to_size(self,axis=-1):
if self.ndim==2:
if axis == 0:
mask = np.zeros((self.shape[0]+1,self.shape[1]), dtype=bool)
mask[:-1] = self ; mask[0] = False ; mask = mask.ravel('F')
else:
mask = np.zeros((self.shape[0],self.shape[1]+1), dtype=bool)
mask[:,0:-1]= self ;mask[:,0]=False; mask = mask.ravel('C')
else:
mask = np.zeros((self.shape[0]+1), dtype=bool)
mask[:-1] = self ; mask[0] = False
return np.diff(np.nonzero(mask[1:]!= mask[:-1])[0])[::2].astype(int)
# https://stackoverflow.com/a/49179628/ by @Divakar
def intervaled_cumsum(ar, sizes):
out = ar.copy()
arc = ar.cumsum() ; idx = sizes.cumsum()
out[idx[0]] = ar[idx[0]] - arc[idx[0]-1]
out[idx[1:-1]] = ar[idx[1:-1]] - np.diff(arc[idx[:-1]-1])
return out.cumsum()
def cumsum_masked(self,mask,axis=-1):
sizes = mask_to_size(mask,axis);out = np.zeros(self.size);shape = self.shape
if len(shape)==2:
if axis == 0:
mask = mask.ravel('F') ; self = self.ravel('F')
else:
mask = mask.ravel('C') ; self = self.ravel('C')
out[mask] = intervaled_cumsum(self[mask],sizes)
if len(shape)==2:
if axis == 0:
return out.reshape(shape[1],shape[0]).T
else:
return out.reshape(shape)
return out
cumsum_masked(a,m,axis=1)
I sorted out the answers and tried to optimize the speed but it didn't work.I think other people may need it.