I have lots of xml files that I need to merge. I have tried this link at merging xml files using python's ElementTree whose code is (Edited as per my need):
import os, os.path, sys
import glob
from xml.etree import ElementTree
def run(files):
xml_files = glob.glob(files +"/*.xml")
xml_element_tree = None
for xml_file in xml_files:
print xml_file
data = ElementTree.parse(xml_file).getroot()
# print ElementTree.tostring(data)
for result in data.iter('TALLYMESSAGE'):
if xml_element_tree is None:
xml_element_tree = data
insertion_point = xml_element_tree.findall("./BODY/DATA/TALLYMESSAGE")[0]
else:
insertion_point.extend(result)
if xml_element_tree is not None:
f = open("myxmlfile.xml", "wb")
f.write(ElementTree.tostring(xml_element_tree))
run("F:/data/data")
But the problem is that I have lots of XML file, 365 to be precise and each one is atleast 2 mb. merging them all has lead to crashing of my PC.
This is the image of the xml tree of my xml file:
My new updated code is:
import os, os.path, sys
import glob
from lxml import etree
def XSLFILE(files):
xml_files = glob.glob(files +"/*.xml")
#print xml_files[0]
xslstring = """<?xml version="1.0" ?>
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="/DATA">
<DATA>
<xsl:copy>
<xsl:copy-of select="TALLYMESSAGE"/>\n"""
#print xslstring
for xmlfile in xml_files[1:]:
xslstring = xslstring + '<xsl:copy-of select="document(\'' + xmlfile[-16:] + "')/BODY/DATA/TALLYMESSAGE\"/>\n"
xslstring = xslstring + """</xsl:copy>+
</DATA>
</xsl:template>
</xsl:transform>"""
#print xslstring
with open("parsingxsl.xsl", "w") as f:
f.write(xslstring)
with open(xml_files[0], "r") as f:
dom = etree.XML(f.read())
print etree.tostring(dom)
with open('F:\data\parsingxsl.xsl', "r") as f:
xslt_tree = etree.XML(f.read())
print xslt_tree
transform = etree.XSLT(xslt_tree)
newdom = transform(dom)
#print newdom
tree_out = etree.tostring(newdom, encoding='UTF-8', pretty_print=True, xml_declaration=True)
print(tree_out)
xmlfile = open('F:\data\OutputFile.xml','wb')
xmlfile.write(tree_out)
xmlfile.close()
XSLFILE("F:\data\data")
The same when run creates the following error:
Traceback (most recent call last):
File "F:\data\xmlmergexsl.py", line 38, in <module>
XSLFILE("F:\data\data")
File "F:\data\xmlmergexsl.py", line 36, in XSLFILE
xmlfile.write(tree_out)
TypeError: must be string or buffer, not None