1

I've got the following code to merge excel files. But it keeps losing the formatting. Is there any way to merge multiple excel files whilst retaining the cell formatting of each?

import numpy as np
import pandas as pd
import openpyxl
import os
import glob

all_names = []

for x in os.listdir(r'C:\<directory to excel files>'):
    if x.endswith(".xlsx"):
        all_names.append(x[:-5])
        print(all_names)

all_data = pd.DataFrame()
for f in glob.glob("*.xlsx"):
    df = pd.read_excel(f)
    all_data = all_data.append(df,ignore_index=True)

appended_df = pd.concat((all_data), keys=(all_names))
appended_df.to_excel(r"C:\<directory for merged file>")
ChaddRobertson
  • 605
  • 3
  • 11
  • 30
keepanda
  • 11
  • 2

1 Answers1

0

One way to preserve formatting it to append DF to the existing Excel file, kind of like this.

def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False,
                       **to_excel_kwargs):

    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl')

    try:
        # try to open an existing workbook
        writer.book = load_workbook(filename)

        # get the last row in the existing Excel sheet
        # if it was not specified explicitly
        if startrow is None and sheet_name in writer.book.sheetnames:
            startrow = writer.book[sheet_name].max_row

        # truncate sheet
        if truncate_sheet and sheet_name in writer.book.sheetnames:
            # index of [sheet_name] sheet
            idx = writer.book.sheetnames.index(sheet_name)
            # remove [sheet_name]
            writer.book.remove(writer.book.worksheets[idx])
            # create an empty sheet [sheet_name] using old index
            writer.book.create_sheet(sheet_name, idx)

        # copy existing sheets
        writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
    except FileNotFoundError:
        # file does not exist yet, we will create it
        pass

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

    # save the workbook
    writer.save()
Oleg O
  • 1,005
  • 6
  • 11
  • I keep getting error: writer = `pd.ExcelWriter(filename, engine='openpyxl')` it says _Abstract class 'ExcelWriter' with abstract methods instantiated_ – keepanda Mar 05 '20 at 10:06
  • Seems like a known bug of pylint. https://github.com/PyCQA/pylint/issues/3060 Try setting it inside a context manager with the pylint instruction, like they suggest: `with pd.ExcelWriter(filename, engine='openpyxl') as writer: #pylint: disable=abstract-class-instantiated` – Oleg O Mar 05 '20 at 10:27
  • 1
    okay there are no errors anymore but it doesnt show any formatting.. – keepanda Mar 05 '20 at 12:40