0
parser = lxml.etree.XMLParser(strip_cdata=False)
    tree = lxml.etree.parse(file_name, parser)
    root = tree.getroot()
    for test in root.iter('job'):
        for stuff in test.findall('cpc'):
            stuff.text = ratestr
        #    test.remove(stuff)
    # tree.find('.//cpc').text = '0'
    tree.write("neuvoo_usa.xml")
    print ("Done\n")

    print ("Downloading USA Entire Feed adzuna")
    file_name = r'adzuna_usa.xml'
    print ("changingCPC\n")
    parser = lxml.etree.XMLParser(strip_cdata=False,huge_tree=True)
    tree = lxml.etree.parse(file_name, parser)
    root = tree.getroot()
    for test in root.iter('job'):
        for stuff in test.findall('cpc'):
            stuff.text = ratestr
        #    test.remove(stuff)
    # tree.find('.//cpc').text = '0'
    tree.write("adzuna_usa.xml")
    print ("Done changing\n")
    print ("merging \n")
    
    file_name = r'neuvoo_usa.xml'
    print ("neuvoo\n")
    parser = lxml.etree.XMLParser(strip_cdata=False)
    tree = lxml.etree.parse(file_name, parser)
    root = tree.getroot()

    file_name = r'adzuna_usa.xml'
    print ("adzuna\n")
    parser = lxml.etree.XMLParser(strip_cdata=False)
    tree1 = lxml.etree.parse(file_name, parser)
    root1 = tree1.getroot()

    for child in root1:
        root.append(child)
    tree.write("merged_usa.xml")
    print ("Files Merged....\n")

I have an XML file for about 1.1 GB and I want to parse it though it's huge so there a memory allocation error. Is there any way around as I tried all possible solutions but none worked. Thanks in advance

following is the error:

File "src\lxml\etree.pyx", line 3519, in lxml.etree.parse
  File "src\lxml\parser.pxi", line 1839, in lxml.etree._parseDocument
  File "src\lxml\parser.pxi", line 1865, in lxml.etree._parseDocumentFromURL
  File "src\lxml\parser.pxi", line 1769, in lxml.etree._parseDocFromFile
  File "src\lxml\parser.pxi", line 1163, in lxml.etree._BaseParser._parseDocFromFile
  File "src\lxml\parser.pxi", line 601, in lxml.etree._ParserContext._handleParseResultDoc
  File "src\lxml\parser.pxi", line 711, in lxml.etree._handleParseResult
  File "src\lxml\parser.pxi", line 640, in lxml.etree._raiseParseError
  File "adzuna_usa.xml", line 4263411
lxml.etree.XMLSyntaxError: Memory allocation failed : xmlSAX2Characters, line 4263411, column 5
Ahmad Ishaq
  • 11
  • 1
  • 5
  • Use `iterparse`. See https://stackoverflow.com/q/10855921/407651. See also https://stackoverflow.com/q/9856163/407651, https://stackoverflow.com/q/61813902/407651 – mzjn Jul 17 '20 at 17:20
  • tried didn't worked because I have to change a tag and merge two files afterwards. any solution? – Ahmad Ishaq Jul 17 '20 at 17:29
  • ok I edited the question actually there are two XML files and after changing the CPC tag to the specific value I have to merge both files. Please help if you can thanks. or if you can have an alternative to this that would be fine too. – Ahmad Ishaq Jul 17 '20 at 20:55

0 Answers0