I have a project about anomaly detection. I have already identified the predicted values. What I want now is to make the status of the energy consumption based on the predicted values.
These are the basis of the labels:
This is the result that I want:
I have tried the following but I always get an error:
def status(x):
if df_an['powerconsumption'] <= (df_an['predicted_values'] + (1*df_an['predicted_values'].std())):
return "Normal"
elif df_an['powerconsumption'] >= (df_an['predicted_values'] - (1*df_an['predicted_values'].std())):
return "Normal"
elif df_an['powerconsumption'] <= (df_an['predicted_values'] + (2*df_an['predicted_values'].std())):
return "Above Normal"
elif df_an['powerconsumption'] >= (df_an['predicted_values'] - (2*df_an['predicted_values'].std())):
return "Above Normal"
elif df_an['powerconsumption'] <= (df_an['predicted_values'] + (3*df_an['predicted_values'].std())):
return "Normal"
elif df_an['powerconsumption'] >= (df_an['predicted_values'] - (3*df_an['predicted_values'].std())):
return "Normal"
else:
return "Anomalous"
for col in df_an.columns:
df_an['status'] = df_an['powerconsumption'].apply(lambda x: status(x))
I have also tried this code:
filter_method = lambda x:
'Normal' if (df_an['powerconsumption'] =< (df_an['predicted_values'].mean() + (1*df_an['predicted_values'].std())))
else 'Normal' if df_an['powerconsumption'] => df_an['predicted_values'].mean() - (1*df_an['predicted_values'].std())
else 'Above Normal' if df_an['powerconsumption'] =< df_an['predicted_values'].mean() + (2*df_an['predicted_values'].std())
else 'Above Normal' if df_an['powerconsumption'] => df_an['predicted_values'].mean() - (2*df_an['predicted_values'].std())
else 'Nearing Anomalous' if df_an['powerconsumption'] =< df_an['predicted_values'].mean() + (3*df_an['predicted_values'].std())
else 'Nearing Anomalous' if df_an['powerconsumption'] => df_an['predicted_values'].mean() - (3*df_an['predicted_values'].std())
else 'Anomalous'df_an['powerconsumption'] > df_an['predicted_values'].mean() + (3*df_an['predicted_values'].std())
else 'Anomalous'df_an['powerconsumption'] < df_an['predicted_values'].mean() - (3*df_an['predicted_values'].std())
I always get a syntax error.
For the first one, this is the error message:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\Public\Documents\Wondershare\CreatorTemp/ipykernel_4112/2640136280.py in <module>
16
17 for col in df_an.columns:
---> 18 df_an['status'] = df_an['powerconsumption'].apply(lambda x: status(x))
c:\users\lenovo\anaconda3\envs\project\lib\site-packages\pandas\core\series.py in apply(self, func, convert_dtype, args, **kwargs)
4355 dtype: float64
4356 """
-> 4357 return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
4358
4359 def _reduce(
c:\users\lenovo\anaconda3\envs\project\lib\site-packages\pandas\core\apply.py in apply(self)
1041 return self.apply_str()
1042
-> 1043 return self.apply_standard()
1044
1045 def agg(self):
c:\users\lenovo\anaconda3\envs\project\lib\site-packages\pandas\core\apply.py in apply_standard(self)
1100 values,
1101 f, # type: ignore[arg-type]
-> 1102 convert=self.convert_dtype,
1103 )
1104
c:\users\lenovo\anaconda3\envs\project\lib\site-packages\pandas\_libs\lib.pyx in pandas._libs.lib.map_infer()
C:\Users\Public\Documents\Wondershare\CreatorTemp/ipykernel_4112/2640136280.py in <lambda>(x)
16
17 for col in df_an.columns:
---> 18 df_an['status'] = df_an['powerconsumption'].apply(lambda x: status(x))
C:\Users\Public\Documents\Wondershare\CreatorTemp/ipykernel_4112/2640136280.py in status(x)
1 def status(x):
----> 2 if df_an['powerconsumption'] <= (df_an['predicted_values'] + (1*df_an['predicted_values'].std())):
3 return "Normal"
4 elif df_an['powerconsumption'] >= (df_an['predicted_values'] - (1*df_an['predicted_values'].std())):
5 return "Normal"
c:\users\lenovo\anaconda3\envs\project\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1536 def __nonzero__(self):
1537 raise ValueError(
-> 1538 f"The truth value of a {type(self).__name__} is ambiguous. "
1539 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
1540 )
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
For the second one, this is the error message:
File "C:\Users\Public\Documents\Wondershare\CreatorTemp/ipykernel_4112/3716694249.py", line 1
filter_method = lambda x: 'Normal' if (df_an['powerconsumption'] > (df_an['predicted_values'].mean() + (1*df_an['predicted_values'].std())))
^
SyntaxError: invalid syntax