I have to trasform an xml file into a data frame pandas. I have tryed in much mode but the result is the same: None, None ... What am I wrong? is another library better? Is it possible that it is because of my XML format? The xml file is of the type:
<Document xmlns="xxx/zzz/yyy">
<Header>
<DocumentName>GXXXXXXXXXX</DocumentName>
<DocumentType>G10</DocumentType>
<Version>2.0.0.0</Version>
<Created>2018-12-11T09:00:02.987777+00:00</Created>
<TargetProcessingDate>2019-02-11</TargetProcessingDate>
<Part>
<CurrentPage>1</CurrentPage>
<TotalPages>1</TotalPages>
</Part>
</Header>
<Body>
<Accounts>
<Account>
<Type>20WE</Type>
<OldType>19WE</OldType>
<Kids>
<Kid>
<Name>marc</Name>
<BirthDate>2000-02-06</BirthDate>
<Year>19</Year>
<Email>marc@xxx.com</Email>
</Kid>
</Kids>
</Account>
</Accounts>
</Body>
</Document>
one of tryed codes
import xml.etree.ElementTree as ET
import pandas as pd
class XML2DataFrame:
def __init__(self, xml_data):
self.root = ET.XML(xml_data)
def parse_root(self, root):
"""Return a list of dictionaries from the text and attributes of the
children under this XML root."""
return [parse_element(child) for child in root.getchildren()]
def parse_element(self, element, parsed=None):
""" Collect {key:attribute} and {tag:text} from thie XML
element and all its children into a single dictionary of strings."""
if parsed is None:
parsed = dict()
for key in element.keys():
if key not in parsed:
parsed[key] = element.attrib.get(key)
if element.text:
parsed[element.tag] = element.text
else:
raise ValueError('duplicate attribute {0} at element {1}'.format(key, element.getroottree().getpath (element)))
""" Apply recursion"""
for child in list(element):
self.parse_element(child, parsed)
return parsed
def process_data(self):
""" Initiate the root XML, parse it, and return a dataframe"""
structure_data = self.parse_root(self.root)
return pd.DataFrame(structure_data)
xml2df = XML2DataFrame(xml_data)
xml_dataframe = xml2df.process_data()
expected output
Type OldType Name BirthDate Year Email
20WE 19WE marc 2000-02-06 19 marc@xxx.com