I have a dataframe where I have converted the dates into months, changed the numbers to strings i.e. Apr, May. I then grouped by months to have a smaller dataset. However, the months are in the wrong order and I want to get them to go from April to March.
I wanted to sort by column, but it seems that the month is no longer a column after I grouped it, so I presume it is an index.
This is my code:
aDF = pd.DataFrame(e)
rgDF = aDF[['GrantRefNumber','Call','FirstReceivedDate','TotalGrantValue']]
rgaDF = rgDF.drop('GrantRefNumber',1)
rgbDF = rgaDF.drop('Call',1)
rgbDF['month'] = pd.DatetimeIndex(rgbDF['FirstReceivedDate']).month
rgbDF['month'] = rgbDF['month'].apply(lambda x: calendar.month_abbr[x])
gA = rgbDF.groupby('month').agg({'TotalGrantValue':sum, 'FirstReceivedDate':'count'}).rename(columns={'FirstReceivedDate':'Count'})
gA['TotalGrantValue'] = gA['TotalGrantValue'].map('{:,.2f}'.format)
Which produces this:
Count TotalGrantValue
month
Apr 29 14,039,166.51
Aug 32 15,340,273.93
Dec 28 14,801,964.91
Feb 28 15,946,952.06
Jan 33 17,820,324.72
Jul 31 16,870,364.57
Jun 37 18,472,945.88
Mar 39 22,387,224.85
May 24 11,517,789.48
Nov 31 16,761,506.64
Oct 28 12,965,535.58
Sep 34 16,752,631.34
I am trying this, but it isnt working (throwing errors):
gA.sort_index(by=['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar'])
gA
FULL ERROR:
KeyErrorTraceback (most recent call last)
<ipython-input-43-66c4870a7499> in <module>()
9
10 vals = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']
---> 11 gA = gA.set_index('month').reindex(a).reset_index()
C:\Anaconda\lib\site-packages\pandas\core\frame.pyc in set_index(self, keys, drop, append, inplace, verify_integrity)
2926 names.append(None)
2927 else:
-> 2928 level = frame[col]._values
2929 names.append(col)
2930 if drop:
C:\Anaconda\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
2060 return self._getitem_multilevel(key)
2061 else:
-> 2062 return self._getitem_column(key)
2063
2064 def _getitem_column(self, key):
C:\Anaconda\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
2067 # get column
2068 if self.columns.is_unique:
-> 2069 return self._get_item_cache(key)
2070
2071 # duplicate columns & possible reduce dimensionality
C:\Anaconda\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
1532 res = cache.get(item)
1533 if res is None:
-> 1534 values = self._data.get(item)
1535 res = self._box_item_values(item, values)
1536 cache[item] = res
C:\Anaconda\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.pyc in get_loc(self, key, method, tolerance)
2393 return self._engine.get_loc(key)
2394 except KeyError:
-> 2395 return self._engine.get_loc(self._maybe_cast_indexer(key))
2396
2397 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()
KeyError: 'month'