I'm getting a keyerror when trying to set the index of my dataframe. I've not encountered this before when setting the index in the same way, and am wondering what's going wrong? The data has no column headers, therefore the DataFrame headers are 0,1,2,4,5 etc. The error occurs on any column header.
I receive KeyError: '0' when trying to use the first column (which I want to use as the only index).
For context: In the sample below, I'm selecting macro enabled excel spreadsheets, squeezing the data, reading and converting them into DataFrames.
I then want to include the filename in a column, set the index and strip whitespace so that I can use index labels to extract the data I need. Not every worksheet will have the index labels so I have the try and except to skip the worksheets which don't contain those labels in the index. I then want to concatenate each result into one DataFrame and squeeze unused columns.
import itertools
import glob
from openpyxl import load_workbook
from pandas import DataFrame
import pandas as pd
import os
def get_data(ws):
for row in ws.values:
row_it = iter(row)
for cell in row_it:
if cell is not None:
yield itertools.chain((cell,), row_it)
break
def read_workbook(file_):
wb = load_workbook(file_, data_only=True)
for sheet in wb.worksheets:
ws = sheet
return DataFrame(get_data(ws))
path =r'dir'
allFiles = glob.glob(path + "/*.xlsm")
frame = pd.DataFrame()
list_ = []
for file_ in allFiles:
parsed_file = read_workbook(file_)
parsed_file['filename'] = os.path.basename(file_)
parsed_file.set_index(['0'], inplace = True)
parsed_file.index.str.strip()
try:
parsed_file.loc["Staff" : "Total"].copy()
list_.append(parsed_file)
except KeyError:
pass
frame = pd.concat(list_)
print(frame.dropna(axis='columns', thresh=2, inplace = True))
example dataframe, index position needed and labels to be extracted.
index
0 1 2
0 5 2 4
1 RTJHD 5 9
2 ABCD 4 6
3 Staff 9 3 --- extract from here
4 FHDHSK 3 2
5 IRRJWK 7 1
6 FJDDCN 1 8
7 67 4 7
8 Total 5 3 --- to here
Error
Traceback (most recent call last):
File "<ipython-input-29-d8fd24ca84ec>", line 1, in <module>
runfile('dir.py', wdir='C:/dir/Documents')
File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "dir.py", line 36, in <module>
parsed_file.set_index(['0'], inplace = True)
File "C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\frame.py", line 2830, in set_index
level = frame[col]._values
File "C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\frame.py", line 1964, in __getitem__
return self._getitem_column(key)
File "C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\frame.py", line 1971, in _getitem_column
return self._get_item_cache(key)
File "C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\generic.py", line 1645, in _get_item_cache
values = self._data.get(item)
File "C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\internals.py", line 3590, in get
loc = self.items.get_loc(item)
File "C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\indexes\base.py", line 2444, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5280)
File "pandas\_libs\index.pyx", line 154, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5126)
File "pandas\_libs\hashtable_class_helper.pxi", line 1210, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20523)
File "pandas\_libs\hashtable_class_helper.pxi", line 1218, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20477)
KeyError: '0'