I try to remove outliers in a python list. But it removes only the first one (190000) and not the second (20000). What is the problem ?
import statistics
dataset = [25000, 30000, 52000, 28000, 150000, 190000, 200000]
def detect_outlier(data_1):
threshold = 1
mean_1 = statistics.mean(data_1)
std_1 = statistics.stdev(data_1)
#print(std_1)
for y in data_1:
z_score = (y - mean_1)/std_1
print(z_score)
if abs(z_score) > threshold:
dataset.remove(y)
return dataset
dataset = detect_outlier(dataset)
print(dataset)
Output:
[25000, 30000, 52000, 28000, 150000, 200000]