I have an "XLS" file that I downloaded from Netsuite ERP. The file root says ".XLS" but it's actually an XML file. I have a pandas script that will combine several XLS or XLSX files but pandas can't seem to handle this weird XLS/XML file type, so I have another script attempting to parse the XML data and save to XLS or XLSX. The below script doesn't seem to work however, as it results in "None". Could anyone point me in the right direction, either with my sample code, new code, or a new method for solving this odd XLS/XML parsing issue?
Thank you in advance!
XML Sample Code:
<?xml version="1.0" encoding="utf-16"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:html="http://www.w3.org/TR/REC-html40">
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<Author>NetSuite Reports</Author>
<LastAuthor>NetSuite Reports</LastAuthor>
<Company>NetSuite</Company>
</DocumentProperties>
<Styles>
<Style ss:ID="company">
<Alignment ss:Horizontal="Center" />
<Font ss:Size="12" ss:Bold="1" />
</Style>
<Style ss:ID="subcompany">
<Alignment ss:Horizontal="Center" />
<Font ss:Size="14" ss:Bold="1" />
</Style>
<Style ss:ID="error">
<Alignment ss:Horizontal="Center" />
<Interior ss:Color="#f0d0d0" ss:Pattern="Solid" />
<Font ss:Bold="1" />
</Style>
<Style ss:ID="header_l">
<Alignment ss:Horizontal="Left" />
<Font ss:Size="7" ss:Bold="1" />
<Interior ss:Color="#d0d0d0" ss:Pattern="Solid" />
</Style>
<Style ss:ID="header_r">
<Alignment ss:Horizontal="Right" />
<Font ss:Size="7" ss:Bold="1" />
<Interior ss:Color="#d0d0d0" ss:Pattern="Solid" />
</Style>
<Style ss:ID="header_c">
<Alignment ss:Horizontal="Center" />
<Font ss:Size="7" ss:Bold="1" />
<Interior ss:Color="#d0d0d0" ss:Pattern="Solid" />
</Style>
<Style ss:ID="scheckbox">
<Alignment ss:Vertical="Center" ss:Horizontal="Center" />
</Style>
<Style ss:ID="Default" ss:Name="Normal">
<Alignment ss:Vertical="Bottom" />
<Borders />
<Font ss:FontName="Arial" ss:Size="8" />
<Interior />
<NumberFormat />
<Protection />
</Style>
<Style ss:ID="s53">
<Alignment ss:Vertical="Center" ss:Horizontal="Left" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders>
<Border ss:Position="Top" ss:LineStyle="Dash" ss:Weight="1" ss:Color="#cccccc" />
</Borders>
</Style>
<Style ss:ID="s52">
<Alignment ss:Horizontal="Left" ss:Indent="1" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="0" ss:Italic="0" />
<Borders />
</Style>
<Style ss:ID="s51">
<Alignment ss:Vertical="Center" ss:Horizontal="Right" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="0" ss:Italic="0" />
<NumberFormat ss:Format=""€"#,##0.00" />
<Borders />
</Style>
<Style ss:ID="s50">
<Alignment ss:Vertical="Center" ss:Horizontal="Left" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders />
</Style>
<Style ss:ID="s58">
<Alignment ss:Horizontal="Left" ss:Indent="2" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders>
<Border ss:Position="Top" ss:LineStyle="Dash" ss:Weight="1" ss:Color="#cccccc" />
</Borders>
</Style>
<Style ss:ID="s54">
<Alignment ss:Vertical="Center" ss:Horizontal="Right" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<NumberFormat ss:Format=""€"#,##0.00" />
<Borders>
<Border ss:Position="Top" ss:LineStyle="Dash" ss:Weight="1" ss:Color="#cccccc" />
</Borders>
</Style>
<Style ss:ID="s59">
<Alignment ss:Horizontal="Left" ss:Indent="1" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders>
<Border ss:Position="Top" ss:LineStyle="Dash" ss:Weight="1" ss:Color="#cccccc" />
</Borders>
</Style>
<Style ss:ID="s56">
<Alignment ss:Horizontal="Left" ss:Indent="2" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders />
</Style>
<Style ss:ID="s57">
<Alignment ss:Horizontal="Left" ss:Indent="3" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="0" ss:Italic="0" />
<Borders />
</Style>
<Style ss:ID="s55">
<Alignment ss:Horizontal="Left" ss:Indent="1" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders />
</Style>
<Style ss:ID="s60">
<Alignment ss:Vertical="Center" ss:Horizontal="Left" />
<Font ss:FontName="Arial" ss:Size="8" ss:Color="#000000" ss:Bold="1" ss:Italic="0" />
<Borders>
<Border ss:Position="Top" ss:LineStyle="Dash" ss:Weight="1" ss:Color="#cccccc" />
</Borders>
</Style>
</Styles>
<Worksheet ss:Name="TrialBalance">
<Table>
<Row>
<Cell ss:StyleID="company" ss:MergeAcross="1">
<Data ss:Type="String">Parent Company</Data>
</Cell>
</Row>
<Row>
<Cell ss:StyleID="company" ss:MergeAcross="1">
<Data ss:Type="String">Company Holdings Inc. : Company A B.V.</Data>
</Cell>
</Row>
<Row>
<Cell ss:StyleID="subcompany" ss:MergeAcross="1">
<Data ss:Type="String">Trial Balance</Data>
</Cell>
</Row>
<Row>
<Cell ss:StyleID="subcompany" ss:MergeAcross="1">
<Data ss:Type="String">End of Feb 2020</Data>
</Cell>
</Row>
<Row>
<Cell ss:StyleID="subcompany" ss:MergeAcross="1">
<Data ss:Type="String" />
</Cell>
</Row>
<Row>
<Cell ss:StyleID="subcompany" ss:MergeAcross="1">
<Data ss:Type="String" />
</Cell>
</Row>
<Row>
<Cell ss:StyleID="header_l">
<Data ss:Type="String">Account</Data>
</Cell>
<Cell ss:StyleID="header_r" ss:MergeDown="0" ss:Index="2">
<Data ss:Type="String">Total</Data>
</Cell>
</Row>
<Row>
<Cell ss:StyleID="s50">
<Data ss:Type="String">10000 - CASH & CASH EQUIVALENTS</Data>
</Cell>
<Cell ss:StyleID="s51" />
</Row>
<Row>
<Cell ss:StyleID="s52">
<Data ss:Type="String">10101 - Bank - 9999 - Company A - EUR</Data>
</Cell>
<Cell ss:StyleID="s51">
<Data ss:Type="Number">1234567.01</Data>
</Cell>
</Row>
<Row>
<Cell ss:StyleID="s53">
<Data ss:Type="String">Total - 10000 - CASH & CASH EQUIVALENTS</Data>
</Cell>
<Cell ss:Formula="SUM(R[-1]C)" ss:StyleID="s54">
<Data ss:Type="Number">1234567.01</Data>
</Cell>
</Row>
</Table>
</Worksheet>
</Workbook>
Python Code Parsing XML to XLS:
import pandas as pd
import xml.etree.cElementTree as ET
tree = ET.parse(r"C:\Users\NAME\Documents\rootfolder\examplefile.xls")
root = tree.getroot()
def getvalueofnode(node):
""" return node text or None """
return node.text if node is not None else None
def main():
""" main """
parsed_xml = tree
dfcols = ['account', 'total']
df_xml = pd.DataFrame(columns=dfcols)
for node in parsed_xml.getroot():
account = node.attrib.get('Type="String"')
total = node.find('Type="Number"')
df_xml = df_xml.append(
pd.Series([account, getvalueofnode(total)], index=dfcols),
ignore_index=True)
print(df_xml)
main()
Python Parsing XML File Results:
account total
0 None None