I have written below code to convert a CSV file to an XML file. Input file has 10 millions records. Problem is it runs for hours with 10 millions records. With less number of records like 2000 it takes 5-10 seconds.
Is there a way to do it efficiently in less time?
import csv
import sys
import os
from xml.dom.minidom import Document
filename = sys.argv[1]
filename = os.path.splitext(filename)[0]+'.xml'
pathname = "/tmp/"
output_file = pathname + filename
f = sys.stdin
reader = csv.reader(f)
fields = next(reader)
fields = [x.lower() for x in fields]
fieldsR = fields
doc = Document()
dataRoot = doc.createElement("rowset")
dataRoot.setAttribute('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance")
dataRoot.setAttribute('xsi:schemaLocation', "./schema.xsd")
doc.appendChild(dataRoot)
for line in reader:
dataElt = doc.createElement("row")
for i in range(len(fieldsR)):
dataElt.setAttribute(fieldsR[i], line[i])
dataRoot.appendChild(dataElt)
xmlFile = open(output_file,'w')
xmlFile.write(doc.toprettyxml(indent = '\t'))
xmlFile.close()
sys.stdout.write(output_file)