0

I try to aggregate column contains numpy arrays unfortunately, I have a error message Function does not reduce

results = pd.DataFrame([['p1', 'v1', 1, 0 ,np.array([1,3, 4])], ['p1', 'v1', 2, 0 ,np.array([1,3, 4])],['p1', 'v1', 1, 1 ,np.array([1,3, 4])], ['p1', 'v1', 2, 1 ,np.array([1,3, 4])],['p1', 'v2', 1, 0 ,np.array([1,3, 4])], ['p1', 'v2', 2, 0 ,np.array([1,3, 4])],['p1', 'v2', 2, 1 ,np.array([1,3, 4])], ['p1', 'v2', 1, 1 ,np.array([1,3, 4])],['p1', 'v3', 1, 0 ,np.array([1,3, 4])], ['p1', 'v3', 2, 0 ,np.array([1,3, 4])],['p1', 'v3', 3, 0 ,np.array([1,3, 4])], ['p1', 'v3', 4, 0 ,np.array([1,3, 4])],['p1', 'v4', 1, 0 ,np.array([1,3, 4])], ['p1', 'v4', 2, 0 ,np.array([1,3, 4])],['p1', 'v4', 3, 0 ,np.array([1,3, 4])], ['p1', 'v4', 4, 0 ,np.array([1,3, 4])]],columns=['P', 'V', 'G', 'month', 'Values'])
resultsilter = results.query('V=="v1" or V=="v2"')
resultsilter = resultsilter.groupby(['G','month']).agg({'Values': 'sum'})
print(resultsilter)

I would like to get this results like:

[[1, 0 ,np.array(2,6,8])],[2, 0 ,np.array([2,6,8])],[1, 1 ,np.array([2,6,8])],[2, 1 ,np.array([2,6,8])]]

any ideas?

1 Answers1

0

So I read up on the query() method and there is an alternative method. This is what I did:

import pandas as pd
import numpy as np

results = pd.DataFrame([['p1', 'v1', 1, 0 ,np.array([1,3, 4])], ['p1', 'v1', 2, 0 ,np.array([1,3, 4])],['p1', 'v1', 1, 1 ,np.array([1,3, 4])], ['p1', 'v1', 2, 1 ,np.array([1,3, 4])],['p1', 'v2', 1, 0 ,np.array([1,3, 4])], ['p1', 'v2', 2, 0 ,np.array([1,3, 4])],['p1', 'v2', 2, 1 ,np.array([1,3, 4])], ['p1', 'v2', 1, 1 ,np.array([1,3, 4])],['p1', 'v3', 1, 0 ,np.array([1,3, 4])], ['p1', 'v3', 2, 0 ,np.array([1,3, 4])],['p1', 'v3', 3, 0 ,np.array([1,3, 4])], ['p1', 'v3', 4, 0 ,np.array([1,3, 4])],['p1', 'v4', 1, 0 ,np.array([1,3, 4])], ['p1', 'v4', 2, 0 ,np.array([1,3, 4])],['p1', 'v4', 3, 0 ,np.array([1,3, 4])], ['p1', 'v4', 4, 0 ,np.array([1,3, 4])]], columns=['P', 'V', 'G', 'month', 'Values'])
resultsilter = results[(results["V"] == "v1") | (results["V"] == "v2")] #this is the equivalent of  query('V=="v1" or V=="v2"')
resultsilter = resultsilter.groupby(['G','month']).agg({'Values': 'sum'})
resultsilter.reset_index(inplace=True)#fixes format after groupby().agg() is used
print(resultsilter.head())
Sten Healey
  • 106
  • 6