I have an xml file which I want to sort based on attribute values. The following is the xml file:
<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?>
<dataset>
<name>imglab dataset</name>
<comment>Created by imglab tool.</comment>
<images>
<image file="/home/iris/Documents/SONY_MAX-20150408-200026-210358-00003.jpg">
<box top="175" left="59" width="73" height="29">
<label>groundpainting_hotstar</label>
</box>
<box top="174" left="205" width="56" height="24">
<label>groundpainting_yesbank</label>
</box>
<box top="170" left="141" width="44" height="32">
<label>groundpainting_vodafone</label>
</box>
</image>
<image file="/home/iris/Documents/SONY_MAX-20150408-200026-210358-00001.jpg"/>
<image file="/home/iris/Documents/SONY_MAX-20150408-200026-210358-00002.jpg">
<box top="198" left="17" width="32" height="10">
<label>sightscreen_pepsi</label>
</box>
</image>
</images>
</dataset>
The desired output is this:
<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?>
<dataset>
<name>imglab dataset</name>
<comment>Created by imglab tool.</comment>
<images>
<image file="/home/iris/Documents/SONY_MAX-20150408-200026-210358-00001.jpg"/>
<image file="/home/iris/Documents/SONY_MAX-20150408-200026-210358-00002.jpg">
<box top="198" left="17" width="32" height="10">
<label>sightscreen_pepsi</label>
</box>
</image>
<image file="/home/iris/Documents/SONY_MAX-20150408-200026-210358-00003.jpg">
<box top="175" left="59" width="73" height="29">
<label>groundpainting_hotstar</label>
</box>
<box top="174" left="205" width="56" height="24">
<label>groundpainting_yesbank</label>
</box>
<box top="170" left="141" width="44" height="32">
<label>groundpainting_vodafone</label>
</box>
</image>
</images>
</dataset>
I tried the following two options:
import xml.etree.ElementTree as ET
tree = ET.parse("finalxml.xml")
container = tree.find("images")
data = []
for elem in container:
key = elem.findtext("image")
data.append((key,elem))
data.sort()
container[:] = [item[-1] for item in data]
tree.write("new-data.xml")
This code just realigns the box attributes and not the image file attribute, which is not desirable. The following is something I have taken from SO, but doesn't do anything.
# =======================================================================
# Monkey patch ElementTree
import xml.etree.ElementTree as ET
def _serialize_xml(write, elem, encoding, qnames, namespaces):
tag = elem.tag
text = elem.text
if tag is ET.Comment:
write("<!--%s-->" % ET._encode(text, encoding))
elif tag is ET.ProcessingInstruction:
write("<?%s?>" % ET._encode(text, encoding))
else:
tag = qnames[tag]
if tag is None:
if text:
write(ET._escape_cdata(text, encoding))
for e in elem:
_serialize_xml(write, e, encoding, qnames, None)
else:
write("<" + tag)
items = elem.items()
if items or namespaces:
if namespaces:
for v, k in sorted(namespaces.items(),
key=lambda x: x[1]): # sort on prefix
if k:
k = ":" + k
write(" xmlns%s=\"%s\"" % (
k.encode(encoding),
ET._escape_attrib(v, encoding)
))
#for k, v in sorted(items): # lexical order
for k, v in items: # Monkey patch
if isinstance(k, ET.QName):
k = k.text
if isinstance(v, ET.QName):
v = qnames[v.text]
else:
v = ET._escape_attrib(v, encoding)
write(" %s=\"%s\"" % (qnames[k], v))
if text or len(elem):
write(">")
if text:
write(ET._escape_cdata(text, encoding))
for e in elem:
_serialize_xml(write, e, encoding, qnames, None)
write("</" + tag + ">")
else:
write(" />")
if elem.tail:
write(ET._escape_cdata(elem.tail, encoding))
ET._serialize_xml = _serialize_xml
from collections import OrderedDict
class OrderedXMLTreeBuilder(ET.XMLTreeBuilder):
def _start_list(self, tag, attrib_in):
fixname = self._fixname
tag = fixname(tag)
attrib = OrderedDict()
if attrib_in:
for i in range(0, len(attrib_in), 2):
attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
return self._target.start(tag, attrib)
tree = ET.parse("example1.xml", OrderedXMLTreeBuilder())
tree.write("new-data.xml")
How do I get the xml sorted?