8

I am trying to write a series of pandas DataFrames to an Excel worksheet such that:

  1. The existing contents of the worksheet are not overwritten or erased, and
  2. the Excel column widths are adjusted to fit the lengths of the column entries (so that I don't have to manually do this in Excel).

For 1), I have found an excellent solution in the form of a helper function written by @MaxU: How to write to an existing excel file without overwriting data (using pandas)?. For 2) I found what looked like a good solution here. But when I try to put these solutions together, the column widths don't change at all. Here's my full code:

import pandas as pd
import os
from openpyxl import load_workbook

def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False, 
                       **to_excel_kwargs):
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('d:/temp/test.xlsx', df)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2', 
                           index=False, startrow=25)

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    # Excel file doesn't exist - saving and exiting
    if not os.path.isfile(filename):
        df.to_excel(
            filename,
            sheet_name=sheet_name, 
            startrow=startrow if startrow is not None else 0, 
            **to_excel_kwargs)
        return
    
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')

    # try to open an existing workbook
    writer.book = load_workbook(filename)
    
    # get the last row in the existing Excel sheet
    # if it was not specified explicitly
    if startrow is None and sheet_name in writer.book.sheetnames:
        startrow = writer.book[sheet_name].max_row

    # truncate sheet
    if truncate_sheet and sheet_name in writer.book.sheetnames:
        # index of [sheet_name] sheet
        idx = writer.book.sheetnames.index(sheet_name)
        # remove [sheet_name]
        writer.book.remove(writer.book.worksheets[idx])
        # create an empty sheet [sheet_name] using old index
        writer.book.create_sheet(sheet_name, idx)
    
    # copy existing sheets
    writer.sheets = {ws.title:ws for ws in writer.book.worksheets}

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

   
  """
   Now attempt to adjust the column widths as necessary so that all the cell contents are visible 
   in Excel. The code below is taken from https://towardsdatascience.com/how-to-auto-adjust-the-width-of-excel-columns-with-pandas-excelwriter-60cee36e175e.
 """
    for column in df:
      column_width = max(df[column].astype(str).map(len).max(), len(column))
      col_idx = df.columns.get_loc(column)
      writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)

    writer.save()

Now I tried testing the function:

df = pd.DataFrame({'A_Very_Long_Column_Name': [10, 20, 30, 20, 15, 30, 45]})
append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df, "Sheet1")

A new Excel workbook named test.xlsx is created along with a sheet named Sheet1, and the contents of df are written to Sheet1, but the column widths are completely unaffected: enter image description here

And strangely, when I try to execute the function a second time (without changing the arguments), I get an error:

runcell(2, 'C:/Users/Leonidas/Documents/write_to_excel2.py')
Traceback (most recent call last):

  File "C:\Users\Leonidas\Documents\write_to_excel2.py", line 125, in <module>
    append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df,

  File "C:\Users\Leonidas\Documents\write_to_excel2.py", line 100, in append_df_to_excel
    writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)

AttributeError: 'Worksheet' object has no attribute 'set_column'

I'm pretty confused at this point...Any suggestions for how to fix the code would be greatly appreciated.

Leonidas
  • 613
  • 1
  • 6
  • 23
  • According to the article you mentioned, do you install the module xlsxwriter by `pip install xlsxwriter` – SCKU Jun 13 '21 at 05:24
  • or if you use Windows and have Office in it, you can try `win32com` with `AutoFit`, see https://stackoverflow.com/a/33665967/3214422 for more detailed. – SCKU Jun 13 '21 at 05:29

3 Answers3

6

Try to use this helper function (updated version):


Old version, which is no longer compatible with Pandas 1.3.0+:

import numpy as np
import pandas as pd
from pathlib import Path
from typing import Union, Optional, List, Tuple
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter


def append_df_to_excel(
        filename: Union[str, Path],
        df: pd.DataFrame,
        sheet_name: str = 'Sheet1',
        startrow: int = None,
        max_col_width: int = 40,
        autofilter: bool = False,
        fmt_int: str = "#,##0",
        fmt_float: str = "#,##0.00",
        fmt_date: str = "yyyy-mm-dd",
        fmt_datetime: str = "yyyy-mm-dd hh:mm",
        truncate_sheet: bool = False,
        **to_excel_kwargs
) -> None:
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param max_col_width: maximum column width in Excel. Default: 30
    @param autofilter: boolean - whether add Excel autofilter or not. Default: True
    @param fmt_int: Excel format for integer numbers
    @param fmt_float: Excel format for float numbers
    @param fmt_date: Excel format for dates
    @param fmt_datetime: Excel format for datetime's
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('d:/temp/test.xlsx', df, autofilter=True,
                           freeze_panes=(1,0))

    >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False, startrow=25)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, index=False,
                           fmt_datetime="dd.mm.yyyy hh:mm")

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    def set_column_format(ws, column_letter, fmt):
        for cell in ws[column_letter]:
            cell.number_format = fmt
    filename = Path(filename)
    file_exists = filename.is_file()
    # process parameters
    first_col = int(to_excel_kwargs.get("index", True)) + 1
    sheet_name = to_excel_kwargs.get("sheet_name", "Sheet1")
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    with pd.ExcelWriter(
        filename.with_suffix(".xlsx"),
        engine="openpyxl",
        mode="a" if file_exists else "w",
        date_format=fmt_date,
        datetime_format=fmt_datetime,
        **to_excel_kwargs
    ) as writer:
        if file_exists:
            # try to open an existing workbook
            writer.book = load_workbook(filename)
            # get the last row in the existing Excel sheet
            # if it was not specified explicitly
            if startrow is None and sheet_name in writer.book.sheetnames:
                startrow = writer.book[sheet_name].max_row
            # truncate sheet
            if truncate_sheet and sheet_name in writer.book.sheetnames:
                # index of [sheet_name] sheet
                idx = writer.book.sheetnames.index(sheet_name)
                # remove [sheet_name]
                writer.book.remove(writer.book.worksheets[idx])
                # create an empty sheet [sheet_name] using old index
                writer.book.create_sheet(sheet_name, idx)

            # copy existing sheets
            writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
        else:
            # file doesn't exist, we are creating a new one
            startrow = 0

        # write out the DataFrame to an ExcelWriter
        df.to_excel(writer, sheet_name=sheet_name, startrow=startrow,
                    **to_excel_kwargs)

        # automatically set columns' width
        worksheet = writer.sheets[sheet_name]
        for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
            col_no = xl_col_no - first_col
            width = max(df.iloc[:, col_no].astype(str).str.len().max(),
                        len(df.columns[col_no]) + 6)
            width = min(max_col_width, width)
            # print(f"column: [{df.columns[col_no]} ({dtyp.name})]\twidth:\t[{width}]")
            column_letter = get_column_letter(xl_col_no)
            worksheet.column_dimensions[column_letter].width = width
            if np.issubdtype(dtyp, np.integer):
                set_column_format(worksheet, column_letter, fmt_int)
            if np.issubdtype(dtyp, np.floating):
                set_column_format(worksheet, column_letter, fmt_float)
        if autofilter:
            worksheet.auto_filter.ref = worksheet.dimensions
MaxU - stand with Ukraine
  • 205,989
  • 36
  • 386
  • 419
  • Thanks for your help! This almost does what I want: When I try write to dataframes side by side, the column width adjustment seems to work perfectly for first df, but doesn't seem to affect the second one. I'll try tweaking the code and see if I can fix it... – Leonidas Jun 10 '21 at 19:00
2

You can also try using the openpyxl bestFit attribute, which sets the column width to the same width that double clicking on the border of the column does. It should do the trick. Try doing something like this:

for column in df:
    ws.column_dimensions[column].bestFit = True

Depending on why you're exporting to Excel, you could also look into a number of different Python based spreadsheets. I'm the author of one called Mito. It lets you display your pandas dataframe as an interactive spreadsheet.

0

This code works when using 'openpyxl' as your engine. Edit any part as you wish.

def text_length(text):
    """
    Get the effective text length in characters, taking into account newlines
    """
    if not text:
        return 0
    lines = text.split("\n")
    return max(len(line) for line in lines)

def _to_str_for_length(v, decimals=3):
    """
    Like str() but rounds decimals to predefined length
    """
    if isinstance(v, float):
        # Round to [decimal] places
        return str(Decimal(v).quantize(Decimal('1.' + '0' * decimals)).normalize())
    else:
        return str(v)


def auto_adjust_xlsx_column_width(df, writer, sheet_name, margin=3, length_factor=1.0, decimals=3, index=False):

    sheet = writer.sheets[sheet_name]
    _to_str = functools.partial(_to_str_for_length, decimals=decimals)
    # Compute & set column width for each column
    for column_name in df.columns:
        # Convert the value of the columns to string and select the 
        column_length =  max(df[column_name].apply(_to_str).map(text_length).max(), text_length(column_name)) + 5
        # Get index of column in XLSX
        # Column index is +1 if we also export the index column
        col_idx = df.columns.get_loc(column_name)
        if index:
            col_idx += 1
        # Set width of column to (column_length + margin)
        sheet.column_dimensions[openpyxl.utils.cell.get_column_letter(col_idx + 1)].width = column_length * length_factor + margin
    # Compute column width of index column (if enabled)
    if index: # If the index column is being exported
        index_length =  max(df.index.map(_to_str).map(text_length).max(), text_length(df.index.name))
        sheet.column_dimensions["A"].width = index_length * length_factor + margin

Example of using it

with pd.ExcelWriter('report.xlsx', sheet_name="results", engine='openpyxl') as writer:
    report_df.to_excel(writer,index=False, sheet_name="results")
    auto_adjust_xlsx_column_width(report_df, writer, "results")
Imperial_J
  • 306
  • 1
  • 7
  • 23