1

I am trying to parse a xml in python with lxml and elementree, but it doesn't works due to namespaces.

I tried xpath but I had no luck. Also, how to convert an xml document into utf-8 format because now I need to add to xml for it to parse.

data = """<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
<![CDATA[<?xml version='1.0' encoding='UTF-8'?>
<soapenv:Header>
<messageHeader:messageHeader xmlns:messageHeader="http://www.xyx.co.nz/ismm/common/messageHeader/v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="messageHeader:MessageHeader">
<messageHeader:application>THOM</messageHeader:application>
<messageHeader:transactionId>BVCQWAC</messageHeader:transactionId>
<messageHeader:correlationId>1771518</messageHeader:correlationId>
<messageHeader:timeStamp>2016-11-18T20:41:16</messageHeader:timeStamp>
</messageHeader:messageHeader>
</soapenv:Header>
<soapenv:Body>
<submitSupplierPartner xmlns:customerBill="http://www.xyzaaa.com/ismm/common/customerBill/v1" xsi:type="messaging_supplierPartner:SubmitSupplierPartner">
<logisticsOrder>
<interactionDateTime>2016-11-18T20:41:16</interactionDateTime>
<businessInteractionRole xsi:type="bi:PartyInteractionRole">
<interactionRole>Customer</interactionRole>
<partyRole xsi:type="customer:Customer">
<contactMedium xsi:type="party:DeliveryContact">
.....
.....

namespace = "http://schemas.xmlsoap.org/soap/envelope/"
namespace_c = "{" + namespace + "}"
NSMAP = {"soapenv": namespace}
root = lxml.etree.fromstring(data)

# for i, element in enumerate(root.getiterator()):
#     print(element.tag)

#get data from header tag
records = root.xpath('//messageHeader:messageHeader/messageHeader:correlationId', namespaces = {'messageHeader': 'http://www.xyx.co.nz/ismm/common/messageHeader/v1'})
for record in records:
    print(record.text)

#get data from body tag
records = root.xpath('//submitSupplierPartner', namespaces = {"customerBill": "http://www.xyzaaa.com/ismm/common/customerBill/v1"})
for record in records:
    print(record.text)
Vivek
  • 31
  • 4
  • did you read/try http://stackoverflow.com/questions/14853243/parsing-xml-with-namespace-in-python-via-elementtree?rq=1 or http://stackoverflow.com/questions/5572247/how-to-find-xml-elements-via-xpath-in-python-in-a-namespace-agnostic-way?rq=1? – Gerrit Verhaar Dec 03 '16 at 11:51

1 Answers1

0

It worked for me.

data id the xml

    root = lxml.etree.fromstring(data)

#orderId / uniqueCreatorId
records = root.xpath('//submitSupplierPartner/logisticsOrder/orderId')
for record in records:
    orderID=record.text
    print(orderID)

#sim and devices  ID

hardwareID = []

records = root.xpath('//submitSupplierPartner/logisticsOrder/resourceOrderItem/resourceSpecification/ID')
for record in records:
    hardwareID.append(record.text)
print(hardwareID)

#get the no of items for shipping
noOfItems = len(hardwareID)
print("Total items for shipping are :")
print(noOfItems)

#sim and devices  skuNumber
hardwaresku = []
records = root.xpath('//submitSupplierPartner/logisticsOrder/resourceOrderItem/resourceSpecification/skuNumber')
for record in records:
    hardwaresku.append(record.text)
print(hardwaresku)

#sim and devices  itemId
hardwareitemID = []
records = root.xpath('//submitSupplierPartner/logisticsOrder/resourceOrderItem/itemId')
for record in records:
    hardwareitemID.append(record.text)
print(hardwareitemID)

#correlation ID  / Rom
records = root.xpath('//messageHeader:messageHeader/messageHeader:correlationId/text()',namespaces={"messageHeader": "http://www.xyz.co.nz/ismm/common/messageHeader/v1"})
for record in records:
    correlationID = record
    print(record)
Community
  • 1
  • 1
Vivek
  • 31
  • 4