I am trying to write a function to update all the outliers in all the columns in a dataset
with the interquartile
range. It is working when I pass a column as input but if I add another loop to iterate through all the columns its not working.
df2ColumnNames=df2.columns
def fixoutliers(x):
for i in df2ColumnNames:
print("colnames ",i)
xy=x[i]
print(xy)
updated=[]
Q1,Q3=np.percentile(xy,[25,75])
IQR=Q3-Q1
#print(IQR)
minimum=Q1-1.5*IQR
maximum=Q3+1.5*IQR
print("maximum",maximum)
for i in xy:
if(i>maximum):
i=maximum
updated.append(i)
elif(i<minimum):
i=minimum
updated.append(i)
else:
print("In else")
updated.append(i)
return updated