I have a various amount of input .xlsx documents that contain 12 sheets (all sheets have the same name within each .xlsx document). I need to combine these into one .xlsx document while retaining the original sheets' names, but the data from all documents for each sheets appended to the original sheets.
For example, see my original output:
Currently, I am not adding the inputFile name anywhere and just trying to merge into one workbook. However, I keep receiving an error:
def createEmptyWorkbook(self, outputFileName):
logging.info('creating empty workbook: %s' % (outputFileName))
# create empty workbook
ncoa_combined_report = openpyxl.Workbook()
# save new file
ncoa_combined_report.save(outputFileName)
ncoa_combined_report = openpyxl.load_workbook(filename=outputFileName)#, data_only=True)
return ncoa_combined_report
def combine_sheets(self, inputFiles):
logging.info('combining ncoa reports to one workbook')
# new output files
outputFile = os.path.join(self.processingDir, 'combined_ncoa_report.xlsx')
# create empty workbook
ncoa_combined_report = self.createEmptyWorkbook(outputFile)
# get a list of sheet names created in output file
outputSheetNames = ncoa_combined_report.sheetnames
for inputFile in inputFiles:
logging.info('reading ncoa report: %s' % (os.path.split(inputFile)[-1]))
# load entire input file into memory
input_wb = openpyxl.load_workbook(filename = inputFile)#, data_only=True)
# get sheet name values in inputFile
sheets = input_wb.sheetnames
# iterate worksheets in input file
for worksheet in input_wb.worksheets:
outputSheetMaxRow = 0
currentSheet = ''
row = ''
column = ''
logging.info('working on sheet: %s' % (worksheet.title))
# check if sheet exist in output file and add if neccissary
if not worksheet.title in outputSheetNames:
logging.info('creating sheet: %s' % (worksheet.title))
currentSheet = ncoa_combined_report.create_sheet(worksheet.title)
else:
currentSheet = worksheet.title
## check if default sheet name is in output
#if 'Sheet' in outputSheetNames:
# ncoa_combined_report.remove_sheet(ncoa_combined_report.get_sheet_by_name('Sheet'))
outputSheetMaxRow = currentSheet.max_row
for row, entry in enumerate(worksheet, start=1):
logging.info('working on row: %s' % (row))
for cell in entry:
try:
outputSheetMaxRow = currentSheet.max_row
# add cell value to output file
#currentSheet[cell.coordinate].value
currentSheet.cell(row=row+outputSheetMaxRow, column=cell.column).value = cell.value #, value=cell
except:
logging.critical('could not add row:%s, cell:%s' % (row, entry))
raise ValueError('could not add row:%s, cell:%s' % (row, entry))
# save new file
ncoa_combined_report.save(outputFile)
I am not sure why I am getting the error or what I need to update to correct it. Any guidance is appreciated.