parser = lxml.etree.XMLParser(strip_cdata=False)
tree = lxml.etree.parse(file_name, parser)
root = tree.getroot()
for test in root.iter('job'):
for stuff in test.findall('cpc'):
stuff.text = ratestr
# test.remove(stuff)
# tree.find('.//cpc').text = '0'
tree.write("neuvoo_usa.xml")
print ("Done\n")
print ("Downloading USA Entire Feed adzuna")
file_name = r'adzuna_usa.xml'
print ("changingCPC\n")
parser = lxml.etree.XMLParser(strip_cdata=False,huge_tree=True)
tree = lxml.etree.parse(file_name, parser)
root = tree.getroot()
for test in root.iter('job'):
for stuff in test.findall('cpc'):
stuff.text = ratestr
# test.remove(stuff)
# tree.find('.//cpc').text = '0'
tree.write("adzuna_usa.xml")
print ("Done changing\n")
print ("merging \n")
file_name = r'neuvoo_usa.xml'
print ("neuvoo\n")
parser = lxml.etree.XMLParser(strip_cdata=False)
tree = lxml.etree.parse(file_name, parser)
root = tree.getroot()
file_name = r'adzuna_usa.xml'
print ("adzuna\n")
parser = lxml.etree.XMLParser(strip_cdata=False)
tree1 = lxml.etree.parse(file_name, parser)
root1 = tree1.getroot()
for child in root1:
root.append(child)
tree.write("merged_usa.xml")
print ("Files Merged....\n")
I have an XML file for about 1.1 GB and I want to parse it though it's huge so there a memory allocation error. Is there any way around as I tried all possible solutions but none worked. Thanks in advance
following is the error:
File "src\lxml\etree.pyx", line 3519, in lxml.etree.parse
File "src\lxml\parser.pxi", line 1839, in lxml.etree._parseDocument
File "src\lxml\parser.pxi", line 1865, in lxml.etree._parseDocumentFromURL
File "src\lxml\parser.pxi", line 1769, in lxml.etree._parseDocFromFile
File "src\lxml\parser.pxi", line 1163, in lxml.etree._BaseParser._parseDocFromFile
File "src\lxml\parser.pxi", line 601, in lxml.etree._ParserContext._handleParseResultDoc
File "src\lxml\parser.pxi", line 711, in lxml.etree._handleParseResult
File "src\lxml\parser.pxi", line 640, in lxml.etree._raiseParseError
File "adzuna_usa.xml", line 4263411
lxml.etree.XMLSyntaxError: Memory allocation failed : xmlSAX2Characters, line 4263411, column 5