IIUC you need convert column duration
to_timedelta
with unit=h
(hours
) and then pivot_table
(I remove []
in parameter value
for remove Multiindex
in columns):
import pandas as pd
dict_data = [{'duration': 0.7, 'project_id': 3, 'resource': u'Arya Stark', 'activity': u'Development'},
{'duration': 0.9, 'project_id': 4, 'resource': u'Ned Stark', 'activity': u'Development'},
{'duration': 2.88, 'project_id': 7, 'resource': u'Robb Stark', 'activity': u'Development'},
{'duration': 0.22, 'project_id': 9, 'resource': u'Jon Snow', 'activity': u'Support'},
{'duration': 0.3, 'project_id': 9, 'resource': u'Jon Snow', 'activity': u'Support'},
{'duration': 2.15, 'project_id': 3, 'resource': u'Arya Stark', 'activity': u'Practise'},
{'duration': 3.35, 'project_id': 4, 'resource': u'Sansa Stark', 'activity': u'Development'},
{'duration': 2.17, 'project_id': 9, 'resource': u'Rickon Stark', 'activity': u'Development'},
{'duration': 1.03, 'project_id': 4, 'resource': u'Benjan Stark', 'activity': u'Design'},
{'duration': 1.77, 'project_id': 4, 'resource': u'Bran Stark', 'activity': u'Testing'},
{'duration': 1.17, 'project_id': 4, 'resource': u'Ned Stark', 'activity': u'Development'},
{'duration': 0.17, 'project_id': 9, 'resource': u'Jon Snow', 'activity': u'Support'},
{'duration': 1.77, 'project_id': 3, 'resource': u'catelyn stark', 'activity': u'Development'},
{'duration': 0.3, 'project_id': 9, 'resource': u'Jon Snow', 'activity': u'Support'},
{'duration': 0.45, 'project_id': 9, 'resource': u'Jon Snow', 'activity': u'Support'}]
df = pd.DataFrame(dict_data)
df['duration1'] = pd.to_timedelta(df['duration'], unit='h')
print (df)
activity duration project_id resource duration1
0 Development 0.70 3 Arya Stark 00:42:00
1 Development 0.90 4 Ned Stark 00:54:00
2 Development 2.88 7 Robb Stark 02:52:48
3 Support 0.22 9 Jon Snow 00:13:12
4 Support 0.30 9 Jon Snow 00:18:00
5 Practise 2.15 3 Arya Stark 02:09:00
6 Development 3.35 4 Sansa Stark 03:21:00
7 Development 2.17 9 Rickon Stark 02:10:12
8 Design 1.03 4 Benjan Stark 01:01:48
9 Testing 1.77 4 Bran Stark 01:46:12
10 Development 1.17 4 Ned Stark 01:10:12
11 Support 0.17 9 Jon Snow 00:10:12
12 Development 1.77 3 catelyn stark 01:46:12
13 Support 0.30 9 Jon Snow 00:18:00
14 Support 0.45 9 Jon Snow 00:27:00
pvt1 = pd.pivot_table(df,
values='duration1',
index=['project_id','resource'],
columns=['activity'],
aggfunc=np.sum,
fill_value=0)
print (pvt1)
activity Design Development Practise Support Testing \
project_id resource
3 Arya Stark 00:00:00 00:42:00 02:09:00 00:00:00 00:00:00
catelyn stark 00:00:00 01:46:12 00:00:00 00:00:00 00:00:00
4 Benjan Stark 01:01:48 00:00:00 00:00:00 00:00:00 00:00:00
Bran Stark 00:00:00 00:00:00 00:00:00 00:00:00 01:46:12
Ned Stark 00:00:00 02:04:12 00:00:00 00:00:00 00:00:00
Sansa Stark 00:00:00 03:21:00 00:00:00 00:00:00 00:00:00
7 Robb Stark 00:00:00 02:52:48 00:00:00 00:00:00 00:00:00
9 Jon Snow 00:00:00 00:00:00 00:00:00 01:26:24 00:00:00
Rickon Stark 00:00:00 02:10:12 00:00:00 00:00:00 00:00:00
All 01:01:48 12:56:24 02:09:00 01:26:24 01:46:12
activity All
project_id resource
3 Arya Stark 02:51:00
catelyn stark 01:46:12
4 Benjan Stark 01:01:48
Bran Stark 01:46:12
Ned Stark 02:04:12
Sansa Stark 03:21:00
7 Robb Stark 02:52:48
9 Jon Snow 01:26:24
Rickon Stark 02:10:12
All 19:19:48