I have a folder full of XLS files that I am attempting to read into a Pandas data frame.
My code:
daily_postings_folder = '//DAILY POSTINGS - MAY23/'
daily_postings_list = glob.glob(os.path.join(daily_postings_folder, "*.xls"))
daily_postings_df = pd.concat((pd.read_excel(g, engine='xlrd', header=None) for g in daily_postings_list), ignore_index=True)
daily_postings_df.head()
The error I'm getting is this:
Traceback (most recent call last):
File "z:\Trade Reconciliation\Testing\Python_Testing\daily_trade_rec.v3(DEV).py", line 20, in <module>
daily_postings_df = pd.concat((pd.read_excel(g, engine='xlrd', header=None) for g in daily_postings_list), ignore_index=True)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\core\reshape\concat.py", line 347, in concat
op = _Concatenator(
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\core\reshape\concat.py", line 401, in __init__
objs = list(objs)
File "z:\Trade Reconciliation\Testing\Python_Testing\daily_trade_rec.v3(DEV).py", line 20, in <genexpr>
daily_postings_df = pd.concat((pd.read_excel(g, engine='xlrd', header=None) for g in daily_postings_list), ignore_index=True)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 457, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 1419, in __init__
self._reader = self._engines[engine](self._io, storage_options=storage_options)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\io\excel\_xlrd.py", line 25, in __init__
super().__init__(filepath_or_buffer, storage_options=storage_options)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 518, in __init__
self.book = self.load_workbook(self.handles.handle)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\pandas\io\excel\_xlrd.py", line 38, in load_workbook
return open_workbook(file_contents=data)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\xlrd\__init__.py", line 172, in open_workbook
bk = open_workbook_xls(
File "C:\Users\tac6328\Anaconda3\lib\site-packages\xlrd\book.py", line 79, in open_workbook_xls
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
File "C:\Users\tac6328\Anaconda3\lib\site-packages\xlrd\book.py", line 1284, in getbof
bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
File "C:\Users\tac6328\Anaconda3\lib\site-packages\xlrd\book.py", line 1278, in bof_error
raise XLRDError('Unsupported format, or corrupt file: ' + msg)
xlrd.biffh.XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'\xef\xbb\xbf<?xml'
The files are all XLS files so not sure what the problem is here.