how can I get this to work while keeping the entire rows intact?
I only want to replace the individual scalar outlier cell values.
Please help. My current code doesn't seem to have any effect?
import pandas as pd
df = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [1, 1, 42000]})
# expected to replace 42000 by np.NaN
def remove_outliers(df):
from scipy import stats
for col in df.select_dtypes(include=['number']).columns:
outliers = df.loc[stats.zscore(df[col]) > remove_outliers_above_absolute_standard_deviation_of, col]
display("Outliers found: " + str(len(outliers.index)))
df.loc[stats.zscore(df[col]) > remove_outliers_above_absolute_standard_deviation_of, col] = np.nan
display("Dataframe after outlier removal: ")
display(df)
return df
remove_outliers(df)
Returns identical df without any filtering of outliers:
Thank you.