excel = win32.gencache.EnsureDispatch('Excel.Application')
for dirpath, dirnames, filenames in os.walk(r'C:\Users\lwsee\Documents\08. Data'):
for filename in filenames:
name, ext = os.path.splitext(filename)
if ext == '.xls':
wb = excel.Workbooks.Open(os.path.join(dirpath, filename))
wb.DoNotPromptForConvert = True
wb.CheckCompatibility = False
excel.DisplayAlerts = False
wb.SaveAs(os.path.join(dirpath, name + '.xlsx'), FileFormat=51, ConflictResolution=2)
# FileFormat=51 = .xlsx extension, FileFormat=56 = .xls extension
excel.Application.Quit()
print('END:')
I had this code but used wrong FileFormat.
Then, used this to concat all my .xlsx files
from pathlib import Path
# path to files
dir1 = r'C:\Users\lwsee\Documents\08. Data'
p = Path(dir1)
# find the xlsx files
files = p.rglob('*.xlsx') #use Path.glob() for files in directory, Path.rglob() to include subdirectories
# create the dataframe
df = pd.concat([pd.read_excel(file) for file in files])
# save the file
df.to_csv(dir1+'\\test.csv', index=False)
print('END1:')