0

The following justify function should move the data fown in the rows, if there is a NaN value (so that the NaN will be at the top. But somehow the "down" option does the same as the "right". How to fix?

Here is the reproducable data (its a 3d array):

import numpy as np
def justify(a, invalid_val=0, axis=1, side='left'):    
    """  Justifies a 2D array 
    Parameters
    ----------
    A : ndarray
        Input array to be justified
    axis : int
        Axis along which justification is to be made
    side : str
        Direction of justification. It could be 'left', 'right', 'up', 'down'
        It should be 'left' or 'right' for axis=1 and 'up' or 'down' for axis=0. 
    """ 
    if invalid_val is np.nan:
        mask = ~np.isnan(a)
    else:
        mask = a!=invalid_val
    justified_mask = np.sort(mask,axis=axis)
    if (side=='up') | (side=='left'):
        justified_mask = np.flip(justified_mask,axis=axis)
    out = np.full(a.shape, invalid_val) 
    if axis==1:
        out[justified_mask] = a[mask]
    else:
        out.T[justified_mask.T] = a.T[mask.T]
    return out

a = np.asarray([
 np.asarray([np.asarray([ 1.21643707,  0.9280912,   5.20711915]),
  np.asarray([-2.01148217,  2.72869681,  2.54161257]),
  np.asarray([ 0.49170286,  0.72304396,  1.56706948]),
  np.asarray([-0.77553082,  2.74300372,  0.10107189]),
  np.asarray([ 2.54368976,  0.53107898,  0.09351025]),
  np.asarray([ 1.03176737, -0.54742843,      np.nan])])])


b = justify(a, invalid_val=np.nan, axis=1, side='down')
print(a) 
print(b)

real output:

#a
[[[ 1.21643707  0.9280912   5.20711915]
  [-2.01148217  2.72869681  2.54161257]
  [ 0.49170286  0.72304396  1.56706948]
  [-0.77553082  2.74300372  0.10107189]
  [ 2.54368976  0.53107898  0.09351025]
  [ 1.03176737 -0.54742843         nan]]]
_
#b
[[[ 1.21643707  0.9280912          nan]
  [ 5.20711915 -2.01148217  2.72869681]
  [ 2.54161257  0.49170286  0.72304396]
  [ 1.56706948 -0.77553082  2.74300372]
  [ 0.10107189  2.54368976  0.53107898]
  [ 0.09351025  1.03176737 -0.54742843]]]

actually expected output:

#a
[[[ 1.21643707  0.9280912   5.20711915]
  [-2.01148217  2.72869681  2.54161257]
  [ 0.49170286  0.72304396  1.56706948]
  [-0.77553082  2.74300372  0.10107189]
  [ 2.54368976  0.53107898  0.09351025]
  [ 1.03176737 -0.54742843         nan]]]
_
# expected b
[[[ 1.21643707  0.9280912          nan]
  [-2.01148217  2.72869681  5.20711915]
  [ 0.49170286  0.72304396  2.54161257]
  [-0.77553082  2.74300372  1.56706948]
  [ 2.54368976  0.53107898  0.10107189]
  [ 1.03176737 -0.54742843  0.09351025]]]

So the 'real' output is the same as the input side="right". Why is that happening, or better is there a way to fix it?

EDIT: unconsciously wrote "my", i didnt mean its mine, sry. (credit: Divakar)

EDIT2: more detailled sample (3d):

# in reality were dealing with about 1million samples (==2d matrices) of 50 'columns' and 10.000 'rows'

samples = np.asarray([np.asarray([
np.asarray([89.319787,1.329743,99.234670,52.329743,0.319787,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,np.nan,33.329743,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,23.329743,52.329743,0.319787,2.319787]),
np.asarray([23.319787,1.329743,45.234670,52.329743,0.32721,2.319787]),
np.asarray([89.319787,np.nan,99.234670,np.nan,np.nan,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,1.329743,np.nan,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,np.nan,52.329743,np.nan,2.319787])
]),
np.asarray([
np.asarray([89.319787,1.329743,99.234670,52.329743,0.319787,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,np.nan,33.329743,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,23.329743,52.329743,0.319787,2.319787]),
np.asarray([23.319787,1.329743,45.234670,52.329743,0.32721,2.319787]),
np.asarray([89.319787,np.nan,99.234670,np.nan,np.nan,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,1.329743,np.nan,52.329743,0.319787,2.319787]),
np.asarray([33.319787,np.nan,np.nan,52.329743,np.nan,2.319787])
]),
np.asarray([
np.asarray([89.319787,1.329743,99.234670,52.329743,0.319787,2.319787]),
np.asarray([np.nan,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,np.nan,33.329743,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,23.329743,52.329743,0.319787,2.319787]),
np.asarray([23.319787,1.329743,45.234670,52.329743,0.32721,2.319787]),
np.asarray([89.319787,np.nan,99.234670,np.nan,np.nan,2.319787]),
np.asarray([84.319787,1.329743,49.329743,52.329743,0.319,2.319787]),
np.asarray([12.319787,1.329743,np.nan,52.329743,0.319787,2.319787]),
np.asarray([33.319787,1.329743,np.nan,52.329743,np.nan,2.319787])])])

0 Answers0