-2
def find_excel_files_in(directory:pathlib.Path) -> List[pathlib.Path]:
    files:List[pathlib.Path] = list()

    for filepath in directory.rglob('*.xls'): # Extension needed to find
        if filepath.is_file():
            files.append(filepath)

    return files

# List of your directories
directories:List[str] = ['C:/Users/lwsee/Documents/08. Data/'] # Directory, including subfolder
                         
found_files:List[pathlib.Path] = list()

for directory in directories:
    directory:pathlib.Path = pathlib.Path(directory)
    found_files.extend(find_excel_files_in(directory))

for filepath in found_files:
    print(filepath)
    df = pd.read_excel(filepath)
    df.to_excel(filepath+'xlsx', index=False)

Tried using this and my error came out

TypeError                                 Traceback (most recent call last)
<ipython-input-27-42e2e29b682b> in <module>
     20     print(filepath)
     21     df = pd.read_excel(filepath)
---> 22     df.to_excel(filepath+'xlsx', index=False)
     23 

TypeError: unsupported operand type(s) for +: 'WindowsPath' and 'str'
  1. I'm trying to convert all my .XLS to .XLSX
  2. Then I want to append/concat all my xlsx file. As I'm stuck with step 1, I'm unable to proceed further.
Seen Biz
  • 1
  • 2
  • If you're using `pathlib` why not do `filepath.withsuffix('xlsx')`? The error suggests that the `WindowsPath` object can't be added to a sting. If you want to do it like that you just need `str(filepath)+'xlsx'` – Tomerikoo Aug 26 '21 at 08:31
  • Does this answer your question? [how to convert xls to xlsx](https://stackoverflow.com/q/9918646/6045800) – Tomerikoo Aug 26 '21 at 08:33

1 Answers1

0
excel = win32.gencache.EnsureDispatch('Excel.Application')

for dirpath, dirnames, filenames in os.walk(r'C:\Users\lwsee\Documents\08. Data'):
    for filename in filenames:
        name, ext = os.path.splitext(filename)
        if ext == '.xls':
            wb = excel.Workbooks.Open(os.path.join(dirpath, filename))
            wb.DoNotPromptForConvert = True
            wb.CheckCompatibility = False
            excel.DisplayAlerts = False
            wb.SaveAs(os.path.join(dirpath, name + '.xlsx'), FileFormat=51, ConflictResolution=2) 
            # FileFormat=51 = .xlsx extension, FileFormat=56 = .xls extension
           

excel.Application.Quit()
print('END:')

I had this code but used wrong FileFormat.

Then, used this to concat all my .xlsx files

from pathlib import Path

# path to files
dir1 = r'C:\Users\lwsee\Documents\08. Data'
p = Path(dir1)

# find the xlsx files
files = p.rglob('*.xlsx') #use Path.glob() for files in directory, Path.rglob() to include subdirectories

# create the dataframe 
df = pd.concat([pd.read_excel(file) for file in files])

# save the file
df.to_csv(dir1+'\\test.csv', index=False)
print('END1:')
Seen Biz
  • 1
  • 2