I'm trying to convert a directory full of XLSX files to CSV. Everything is working except I'm encountering an issue with the columns containing time information. The XLSX file is being created by another program that I can not modify. But I want to maintain the same times that show up when I view the XLSX file in Excel as when it is converted to CSV and viewed in any text editor.
My code:
import csv
import xlrd
import os
import fnmatch
import Tkinter, tkFileDialog, tkMessageBox
def main():
root = Tkinter.Tk()
root.withdraw()
print 'Starting .xslx to .csv conversion'
directory = tkFileDialog.askdirectory()
for fileName in os.listdir(directory):
if fnmatch.fnmatch(fileName, '*.xlsx'):
filePath = os.path.join(directory, fileName)
saveFile = os.path.splitext(filePath)[0]+".csv"
savePath = os.path.join(directory, saveFile)
workbook = xlrd.open_workbook(filePath)
sheet = workbook.sheet_by_index(0)
csvOutput = open(savePath, 'wb')
csvWriter = csv.writer(csvOutput, quoting=csv.QUOTE_ALL)
for row in xrange(sheet.nrows):
csvWriter.writerow(sheet.row_values(row))
csvOutput.close()
print '.csv conversion complete'
main()
To add some detail, if I open one file in Excel I see this in a time column:
00:10.3
00:14.2
00:16.1
00:20.0
00:22.0
But after I convert to CSV I see this in the same location:
0.000118981
0.000164005
0.000186227
0.000231597
0.000254861
Thanks to seanmhanson with his answer https://stackoverflow.com/a/25149562/1858351 I was able to figure out that Excel is dumping the times as decimals of a day. While I should try to learn and use xlrd better, for a quick short term fix I was instead able to convert that into seconds and then from seconds back into the time format originally seen of HH:MM:SS. My (probably ugly) code below in case anyone might be able to use it:
import csv
import xlrd
import os
import fnmatch
from decimal import Decimal
import Tkinter, tkFileDialog
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
def seconds_to_hms(seconds):
input = Decimal(seconds)
m, s = divmod(input, 60)
h, m = divmod(m, 60)
hm = "%02d:%02d:%02.2f" % (h, m, s)
return hm
def main():
root = Tkinter.Tk()
root.withdraw()
print 'Starting .xslx to .csv conversion'
directory = tkFileDialog.askdirectory()
for fileName in os.listdir(directory):
if fnmatch.fnmatch(fileName, '*.xlsx'):
filePath = os.path.join(directory, fileName)
saveFile = os.path.splitext(filePath)[0]+".csv"
savePath = os.path.join(directory, saveFile)
workbook = xlrd.open_workbook(filePath)
sheet = workbook.sheet_by_index(0)
csvOutput = open(savePath, 'wb')
csvWriter = csv.writer(csvOutput, quoting=csv.QUOTE_ALL)
rowData = []
for rownum in range(sheet.nrows):
rows = sheet.row_values(rownum)
for cell in rows:
if is_number(cell):
seconds = float(cell)*float(86400)
hms = seconds_to_hms(seconds)
rowData.append((hms))
else:
rowData.append((cell))
csvWriter.writerow(rowData)
rowData = []
csvOutput.close()
print '.csv conversion complete'
main()