The following justify function should move the data fown in the rows, if there is a NaN value (so that the NaN will be at the top. But somehow the "down" option does the same as the "right". How to fix?
Here is the reproducable data (its a 3d array):
import numpy as np
def justify(a, invalid_val=0, axis=1, side='left'):
""" Justifies a 2D array
Parameters
----------
A : ndarray
Input array to be justified
axis : int
Axis along which justification is to be made
side : str
Direction of justification. It could be 'left', 'right', 'up', 'down'
It should be 'left' or 'right' for axis=1 and 'up' or 'down' for axis=0.
"""
if invalid_val is np.nan:
mask = ~np.isnan(a)
else:
mask = a!=invalid_val
justified_mask = np.sort(mask,axis=axis)
if (side=='up') | (side=='left'):
justified_mask = np.flip(justified_mask,axis=axis)
out = np.full(a.shape, invalid_val)
if axis==1:
out[justified_mask] = a[mask]
else:
out.T[justified_mask.T] = a.T[mask.T]
return out
a = np.asarray([
np.asarray([np.asarray([ 1.21643707, 0.9280912, 5.20711915]),
np.asarray([-2.01148217, 2.72869681, 2.54161257]),
np.asarray([ 0.49170286, 0.72304396, 1.56706948]),
np.asarray([-0.77553082, 2.74300372, 0.10107189]),
np.asarray([ 2.54368976, 0.53107898, 0.09351025]),
np.asarray([ 1.03176737, -0.54742843, np.nan])])])
b = justify(a, invalid_val=np.nan, axis=1, side='down')
print(a)
print(b)
real output:
#a
[[[ 1.21643707 0.9280912 5.20711915]
[-2.01148217 2.72869681 2.54161257]
[ 0.49170286 0.72304396 1.56706948]
[-0.77553082 2.74300372 0.10107189]
[ 2.54368976 0.53107898 0.09351025]
[ 1.03176737 -0.54742843 nan]]]
_
#b
[[[ 1.21643707 0.9280912 nan]
[ 5.20711915 -2.01148217 2.72869681]
[ 2.54161257 0.49170286 0.72304396]
[ 1.56706948 -0.77553082 2.74300372]
[ 0.10107189 2.54368976 0.53107898]
[ 0.09351025 1.03176737 -0.54742843]]]
actually expected output:
#a
[[[ 1.21643707 0.9280912 5.20711915]
[-2.01148217 2.72869681 2.54161257]
[ 0.49170286 0.72304396 1.56706948]
[-0.77553082 2.74300372 0.10107189]
[ 2.54368976 0.53107898 0.09351025]
[ 1.03176737 -0.54742843 nan]]]
_
# expected b
[[[ 1.21643707 0.9280912 nan]
[-2.01148217 2.72869681 5.20711915]
[ 0.49170286 0.72304396 2.54161257]
[-0.77553082 2.74300372 1.56706948]
[ 2.54368976 0.53107898 0.10107189]
[ 1.03176737 -0.54742843 0.09351025]]]
So the 'real' output is the same as the input side="right". Why is that happening, or better is there a way to fix it?
EDIT: unconsciously wrote "my", i didnt mean its mine, sry. (credit: Divakar)
EDIT2: more detailled sample (3d):
# in reality were dealing with about 1million samples (==2d matrices) of 50 'columns' and 10.000 'rows'
samples = np.asarray([np.asarray([
np.asarray([89.319787,1.329743,99.234670,52.329743,0.319787,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,np.nan,33.329743,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,23.329743,52.329743,0.319787,2.319787]),
np.asarray([23.319787,1.329743,45.234670,52.329743,0.32721,2.319787]),
np.asarray([89.319787,np.nan,99.234670,np.nan,np.nan,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,1.329743,np.nan,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,np.nan,52.329743,np.nan,2.319787])
]),
np.asarray([
np.asarray([89.319787,1.329743,99.234670,52.329743,0.319787,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,np.nan,33.329743,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,23.329743,52.329743,0.319787,2.319787]),
np.asarray([23.319787,1.329743,45.234670,52.329743,0.32721,2.319787]),
np.asarray([89.319787,np.nan,99.234670,np.nan,np.nan,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,1.329743,np.nan,52.329743,0.319787,2.319787]),
np.asarray([33.319787,np.nan,np.nan,52.329743,np.nan,2.319787])
]),
np.asarray([
np.asarray([89.319787,1.329743,99.234670,52.329743,0.319787,2.319787]),
np.asarray([np.nan,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,np.nan,33.329743,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,23.329743,52.329743,0.319787,2.319787]),
np.asarray([23.319787,1.329743,45.234670,52.329743,0.32721,2.319787]),
np.asarray([89.319787,np.nan,99.234670,np.nan,np.nan,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,1.329743,np.nan,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,np.nan,52.329743,np.nan,2.319787])])])