1
{

public class XmlSplit {

public static void main(String [] args) throws Exception {
File input = new File("C:\\Users\\Edit5.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document doc = (Document) dbf.newDocumentBuilder().parse(input);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList) xpath.evaluate("//transaction", doc,    XPathConstants.NODESET);
int itemsPerFile = 2000;
int fileNumber = 0;
Document currentDoc = (Document) dbf.newDocumentBuilder().newDocument();
Node rootNode;


rootNode = currentDoc.createElement("transactions");
File currentFile = new File(fileNumber+".xml");
for (int i=1; i <= nodes.getLength(); i++) {
    Node imported = currentDoc.importNode(nodes.item(i-1), true);
    rootNode.appendChild(imported);

    if (i % itemsPerFile == 0) {
        writeToFile(rootNode, currentFile);
        rootNode = currentDoc.createElement("transactions");
        currentFile = new File((++fileNumber)+"C:\\UsersEdit1.xml");
    }
    else
    {
            writeToFile(rootNode, currentFile);
    }
 }

}

private static void writeToFile(Node node, File file) throws Exception {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(new FileWriter("C:\\UsersEdit1.xml")));
}
}


}

Hi I am splitting an large XML using DOM parser but it is taking long time to split the XML .Can someone help me with this to do this using stax parser.Also it is not generating new file .That is also a problem .THanks In advance If someone can do this please help me .

Iamat8
  • 3,888
  • 9
  • 25
  • 35
  • How large is large? 50Kb or 50Gb? And how slow is slow? Giving us some numbers will help us assess whether scrapping your existing approach and trying something different is a sensible thing to do. – Michael Kay Oct 08 '15 at 15:53
  • The file size is 150 mb and it is giving out of memory heap error .and if we pass small size XML like for 50 mb it is taking more than half hour to complete . – Gaurav Gupta Oct 12 '15 at 07:18
  • I don't understand why it should take so long: there may be some problem here that is unrelated to the code you have shown us. Have you tried taking a Java CPU profile to find out where the time is going? – Michael Kay Oct 12 '15 at 09:12
  • I want this code to be with STAX parser . – Gaurav Gupta Oct 12 '15 at 10:24
  • can anyone please help me out with this code using stax parser . – Gaurav Gupta Oct 12 '15 at 13:29

1 Answers1

0

Here is the code for splitting your xml in vtd-xml...

import com.ximpleware.*;
import java.io.*;

public class splitter {
    public static void main(String[] s) throws VTDException, IOException {
        VTDGen vg = new VTDGen();
        if (!vg.parseFile("input.xml", false))
            return;
        VTDNav vn = vg.getNav();
        AutoPilot ap = new AutoPilot(vn);
        ap.selectXPath("//transaction");
        int i=0,j=0,k=0;
        File f = new File("transactionList"+k+".xml");
        FileOutputStream fos = new FileOutputStream(f);
        byte[] head="<transactions>\n".getBytes();
        byte[] tail="\n</transactions>".getBytes();
        fos.write(head);
        while((i=ap.evalXPath())!=-1){
            long l=vn.getElementFragment();
            fos.write(vn.getXML().getBytes(), (int)l, (int)(l>>32));
            j++;
            if ((j+1)%2000==0){
                k++;
                fos.write(tail);
                fos.close();
                f = new File("transactionList"+k+".xml");
                fos = new FileOutputStream(f);
                fos.write(head);
            }
        }
        fos.write(tail);
        fos.close();
    }
}
vtd-xml-author
  • 3,319
  • 4
  • 22
  • 30