I've been wrestling with this for a number of days and researching without success. I am trying to write a function that
- loops through a directory
- opens a excel file that matches a string pattern
- opens the file and searches for a specific worksheet ('importer')
- copies the data into csv and continues to append to the csv until all files complete.
- I'd like the function to ignore files that do not include the 'importer' tab or simply access the next file in the FOR loop without doing the remainder ('CSV FILE CREATION').
- File creation should only happen where both the file name matches the pattern and the 'importer' worksheet exist. I feel like I am close but need just a little direction.
def append_all(input_directory):
for file in os.listdir(input_directory):
# Qualify if file exist
if bool(re.search(pattern, file)) == True:
# Join directory path name to file name
in_fpath = os.path.join(input_directory, file)
out_fpath = os.path.join(input_directory, 'history.csv')
wrkbk = xlrd.open_workbook(in_fpath)
if wrkbk.sheet_names() == 'importer':
wrksht = wrkbk.sheet_by_name('importer')
# Handling excel refresh date value to be used to populate csv file
refresh_date_float = wrksht.cell_value(1, 4)
refresh_date_value = xlrd.xldate_as_datetime(refresh_date_float, wrkbk.datemode).strftime(
'%Y/%m/%d %H:%M')
# else:
# continue
# CSV FILE CREATION
# Qualify if file exist. Default returns TRUE
if os.path.isfile(out_fpath) == False:
# os.mkdir(output_directory)
# file will be created if it does not exist
with open(out_fpath, 'w', newline='') as csvfile:
wr = csv.writer(csvfile)
# start row index 3 to skip unecessary data
for rownum in range(3, wrksht.nrows):
# wr.writerow(wrksht.row_values(rownum) + list(refresh_date_value))
wr.writerow(list(wrksht.row_values(rownum)) + [refresh_date_value])
# Start append data
else:
with open(out_fpath, 'a', newline='') as csvfile:
wr = csv.writer(csvfile)
# start row index 4 to skip header row
for rownum in range(4, wrksht.nrows):
# wr.writerow(wrksht.row_values(rownum) + list(refresh_date_value))
wr.writerow(list(wrksht.row_values(rownum)) + [refresh_date_value])
csvfile.close()
print('process complete')