1

I'm using XML files in Java for the first time and i need some help. I am trying to split an XML file to multiple XML files using Java

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<products>
    <product>
        <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
        <gtin>00027242816657</gtin>
        <price>2999.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple iPad 2 with Wi-Fi 16GB - iOS 5 - Black
        </description>
        <gtin>00885909464517</gtin>
        <price>399.0</price>
        <orderId>2343</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue
        </description>
        <gtin>00027242831438</gtin>
        <price>91.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple MacBook Air A 11.6" Mac OS X v10.7 Lion MacBook
        </description>
        <gtin>00885909464043</gtin>
        <price>1149.0</price>
        <orderId>2344</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Panasonic TC-L47E50 47" Smart TV Viera E50 Series LED
            HDTV</description>
        <gtin>00885170076471</gtin>
        <price>999.99</price>
        <orderId>2344</orderId>
        <supplier>Panasonic</supplier>
    </product>
</products>

and I'm trying to get three XML documents like:

 <?xml version="1.0" encoding="UTF-8"?>
<products>
        <product>
            <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
            <gtin>00027242816657</gtin>
            <price currency="USD">2999.99</price>
            <orderid>2343</orderid>
        </product>
        <product>
            <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue</description>
            <gtin>00027242831438</gtin>
            <price currency="USD">91.99</price>
            <orderid>2343</orderid>
        </product>
</products>

one for each supplier. How can I receive it? Any help on this will be great.

  • what have you already tried so far? There are plenty of opportunities with Java for XML processing. Opportunities include marshalling/unmarshalling DOM model, Stream XML read/write, running an XSLT transformation, etc. – S. Pauk Mar 20 '15 at 12:11
  • i never use XSLT. how can i do? i need from this solving in 2 hours. you can help me with that? please? :( – Din Ionuț Valentin Mar 20 '15 at 12:52

7 Answers7

2

Make sure you change the path in "inputFile" to your file and also the output part:

StreamResult result = new StreamResult(new File("C:\xmls\" + supplier.trim() + ".xml"));

Here your code.

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ExtractXml
{
    /**
     * @param args
     */
    public static void main(String[] args) throws Exception
    {
        String inputFile = "resources/products.xml";

        File xmlFile = new File(inputFile);
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(xmlFile);

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true); // never forget this!

        XPathFactory xfactory = XPathFactory.newInstance();
        XPath xpath = xfactory.newXPath();
        XPathExpression allProductsExpression = xpath.compile("//product/supplier/text()");
        NodeList productNodes = (NodeList) allProductsExpression.evaluate(doc, XPathConstants.NODESET);

        //Save all the products
        List<String> suppliers = new ArrayList<String>();
        for (int i=0; i<productNodes.getLength(); ++i)
        {
            Node productName = productNodes.item(i);

            System.out.println(productName.getTextContent());
            suppliers.add(productName.getTextContent());
        }

        //Now we create the split XMLs

        for (String supplier : suppliers)
        {
            String xpathQuery = "/products/product[supplier='" + supplier + "']";

            xpath = xfactory.newXPath();
            XPathExpression query = xpath.compile(xpathQuery);
            NodeList productNodesFiltered = (NodeList) query.evaluate(doc, XPathConstants.NODESET);

            System.out.println("Found " + productNodesFiltered.getLength() + 
                               " product(s) for supplier " + supplier);

            //We store the new XML file in supplierName.xml e.g. Sony.xml
            Document suppXml = dBuilder.newDocument();

            //we have to recreate the root node <products>
            Element root = suppXml.createElement("products"); 
            suppXml.appendChild(root);
            for (int i=0; i<productNodesFiltered.getLength(); ++i)
            {
                Node productNode = productNodesFiltered.item(i);

                //we append a product (cloned) to the new file
                Node clonedNode = productNode.cloneNode(true);
                suppXml.adoptNode(clonedNode); //We adopt the orphan :)
                root.appendChild(clonedNode);
            }

            //At the end, we save the file XML on disk
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            DOMSource source = new DOMSource(suppXml);

            StreamResult result =  new StreamResult(new File("resources/" + supplier.trim() + ".xml"));
            transformer.transform(source, result);

            System.out.println("Done for " + supplier);
        }
    }

}
slux83
  • 686
  • 9
  • 20
0

You can have a look here to see how to parse a XML document using DOM, in Java: DOM XML Parser Example

Here, how to write the new XML file(s): Create XML file using java

In addition you could study XPath to easily select your nodes: Java Xpath expression

If the performances are not your goal, first of all, once you load your DOM and your Xpath, you can retrieve all the suppliers you have in your xml document using the following XPath query

//supplier/text()

you will get something like that:

Text='Sony'
Text='Apple'
Text='Sony'
Text='Apple'
Text='Panasonic'

Then I will put those results in a ArraryList or whatever. The second step will be the iteration of that collection, and for each item query the XML input document in order to extract all the nodes with a particular supplier:

/products/product[supplier='Sony'] 

of course in java you will have to build the last xpath query in a dynamic way:

String xpathQuery = "/products/product/[supplier='" + currentValue + "']

After that, you will get the list of nodes which match the supplier you specified. The next step would be constructing the new XML DOM and save it on a file.

Community
  • 1
  • 1
slux83
  • 686
  • 9
  • 20
  • i want to sort product by supplier. and i have no idea how to do this. you can get me some code for this please? or something like that... – Din Ionuț Valentin Mar 20 '15 at 12:48
  • i want to sort product by supplier. and i have no idea how to do this. you can get me some code for this please? or something like that... – Din Ionuț Valentin Mar 20 '15 at 12:49
  • How can I populate that ArrayList? For now I have this code: String expSup = "//supplier/text()"; String path = "myFile.xml"; ArrayList suppliers = new ArrayList(); Document xmlDocument = DocumentBuilderFactory.newInstance() .newDocumentBuilder().parse(path); XPath xPathSup = XPathFactory.newInstance().newXPath(); XPathExpression xPathExpression = xPathSup.compile(expSup) – Din Ionuț Valentin Mar 20 '15 at 13:43
  • how can i do an arraylist...last question...please...is emergency...:-s – Din Ionuț Valentin Mar 20 '15 at 14:36
0

DOM parser will consume more memory. I prefer to use SAX parser to read XML and write .

Selva
  • 230
  • 3
  • 17
0

I like the approach of Xmappr (https://code.google.com/p/xmappr/) where you can use simple annotations:

first the root-element Products which simply holds a list of Product-instances

@RootElement
public class Products {

    @Element
    public List<Product> product;
}

Then the Product-class

@RootElement
public class Product {

   @Element
   public String description;

   @Element
   public String supplier;

   @Element
   public String gtin;

   @Element
   public String price;

   @Element
   public String orderId;
}

And then you simply fetch the Product-instances from the Products:

public static void main(String[] args) throws FileNotFoundException {
    Reader reader = new FileReader("test.xml");
    Xmappr xm = new Xmappr(Products.class);
    Products products = (Products) xm.fromXML(reader);

    // fetch list of products
    List<Product> listOfProducts = products.product;

    // do sth with the products in the list
    for (Product product : listOfProducts) {
        System.out.println(product.description);
    }       
}

And then you can do whatever you want with the products (e.g. sorting them according the supplier and put them out to an xml-file)

  • Exception in thread "main" java.lang.NullPointerException at org.xmappr.ConfigurationProcessor.readRootElementAnnotations(ConfigurationProcessor.java:106) at org.xmappr.Xmappr.initialize(Xmappr.java:300) at org.xmappr.Xmappr.fromXML(Xmappr.java:127) at XMLReader.Test(XMLReader.java:269) at XMLReader.main(XMLReader.java:127) – Din Ionuț Valentin Mar 20 '15 at 13:22
  • Is this the full stacktrace? Did you make any modifications on the code? – Christoph Burmeister Mar 20 '15 at 13:31
0

Consider this xml

<?xml version="1.0"?>
<SSNExportDocument xmlns="urn:com:ssn:schema:export:SSNExportFormat.xsd" Version="0.1" DocumentID="b482350d-62bb-41be-b792-8a9fe3884601-1" ExportID="b482350d-62bb-41be-b792-8a9fe3884601" JobID="464" RunID="3532468" CreationTime="2019-04-16T02:20:01.332-04:00" StartTime="2019-04-15T20:20:00.000-04:00" EndTime="2019-04-16T02:20:00.000-04:00">
    <MeterData MeterName="MUNI1-11459398" UtilDeviceID="11459398" MacID="00:12:01:fae:fe:00:d5:fc">
        <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
            <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:06.214-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                <Tier Number="0">
                    <Register Number="1" Summation="5949.1000" SummationUOM="GAL"/>
                </Tier>
            </RegisterRead>
        </RegisterData>
    </MeterData>
    <MeterData MeterName="MUNI4-11460365" UtilDeviceID="11460365" MacID="00:11:01:bc:fe:00:d3:f9">
        <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
            <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:11.082-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                <Tier Number="0">
                    <Register Number="1" Summation="136349.9000" SummationUOM="GAL"/>
                </Tier>
            </RegisterRead>
        </RegisterData>
    </MeterData>

We can use JAXB which converts your xml tags to objects. Then we can play around with them.

File xmlFile = new File("input.xml");
jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
SSNExportDocument ssnExpDoc = (SSNExportDocument) jaxbUnmarshaller.unmarshal(xmlFile);
MeterData mD = new MeterData();
Map<String, List<MeterData>> meterMapper = new HashMap<String, List<MeterData>>(); // Phantom Reference

for (MeterData mData : ssnExpDoc.getMeterData()) {
            String meterFullName = mData.getMeterName();
            String[] splitMeterName = meterFullName.split("-");
            List<MeterData> _meterDataList = meterMapper.get(splitMeterName[0]);// o(1)
            if (_meterDataList == null) {
                _meterDataList = new ArrayList<>();
                _meterDataList.add(mData);
                meterMapper.put(splitMeterName[0], _meterDataList);
                _meterDataList = null;
            } else {
                _meterDataList.add(mData);
            }
        }

meterMapper contains tag names against list of objects

Then Marshall the contents using

       JAXBContext jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);

        // Create Marshaller
        Marshaller jaxbMarshaller = jaxbContext.createMarshaller();

        // Required formatting??
        jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
        jaxbMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
        //jaxbMarshaller.setProperty("com.sun.xml.bind.xmlDeclaration", Boolean.FALSE);

        // Print XML String to Console

        StringWriter sw = new StringWriter();

        // Write XML to StringWriter
        jaxbMarshaller.marshal(employee, sw);

        // Verify XML Content
        String xmlContent = sw.toString();
        System.out.println(xmlContent);
vbnr
  • 307
  • 2
  • 5
  • 14
0

Not a perfect solution but works in most cases. Had to play around with some string operations to make it work. Basically this solution splits the given XML for a given element and forms Sub-XMLs and writes those a list.

public static void main(String[] args) {
    java.io.File inputFile = new java.io.File("input.xml");
    String elementSplitString = "product";
    java.io.InputStream inputStream = null;

    try {
        

        inputStream = new java.io.BufferedInputStream(new java.io.FileInputStream(inputFile));

        javax.xml.stream.XMLInputFactory inputFactory = javax.xml.stream.XMLInputFactory.newInstance();
        javax.xml.stream.XMLOutputFactory outputFactory = javax.xml.stream.XMLOutputFactory.newInstance();
        javax.xml.stream.XMLEventReader reader = inputFactory.createXMLEventReader(inputStream);
        javax.xml.stream.XMLEventWriter writer = null;
        StringWriter parentXMLStringWriter = new StringWriter();
        javax.xml.stream.XMLEventWriter headerWriter = outputFactory.createXMLEventWriter(parentXMLStringWriter); 
        StringWriter stringWriter = null;
        String lastReadEvent = "";
        boolean splitElementFound = false;
        List<StringBuilder> list = new ArrayList<StringBuilder>();
        while (reader.hasNext()) {
            javax.xml.stream.events.XMLEvent event = reader.nextEvent();
            
            
            switch(event.getEventType()) {
                case javax.xml.stream.XMLStreamConstants.START_ELEMENT:
                    javax.xml.stream.events.StartElement startElement = (javax.xml.stream.events.StartElement)event;
                    if (startElement.getName().getLocalPart().equals(elementSplitString)) {
                        splitElementFound = true;
                        stringWriter = new StringWriter();
                        writer = outputFactory.createXMLEventWriter(stringWriter);
                        if (writer != null) writer.add(event);
                    } else if(writer != null)
                         writer.add(event);
                    
                    break;

                case javax.xml.stream.XMLStreamConstants.END_ELEMENT:
                    javax.xml.stream.events.EndElement endElement = (javax.xml.stream.events.EndElement)event;
                    if (endElement.getName().getLocalPart().equals(elementSplitString)) {
                        if (writer != null) writer.add(event);
                        
                        writer.close();
                        StringBuilder builder = new StringBuilder();
                        String parentXML = parentXMLStringWriter.toString();
                        builder.append(parentXML.subSequence(0, parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 1));
                        builder.append(stringWriter.toString());
                        builder.append(parentXML.substring(parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 2));
                        list.add(builder);
                        writer = null;
                    }else if(writer != null)
                        writer.add(event);
                    break;

                default:
                    if (writer != null) 
                        writer.add(event);
                    break;
            }
            if(!splitElementFound) {
                if(event instanceof javax.xml.stream.events.StartElement)
                    lastReadEvent = ((javax.xml.stream.events.StartElement)event).getName().getLocalPart();
                else if(event instanceof javax.xml.stream.events.EndElement)
                    lastReadEvent = ((javax.xml.stream.events.EndElement)event).getName().getLocalPart();
                headerWriter.add(event);
            }else {
                headerWriter.close();
            }

        }
        
        headerWriter = null;
        reader.close();
        if (writer != null) writer.close();
    } catch(Throwable ex) {
        ex.printStackTrace();
    } finally {
        if (inputStream != null) {
            try {
                inputStream.close();
            } catch (java.io.IOException ex) {
                // do nothing
            }
        }
    }
} 
Sven Eberth
  • 3,057
  • 12
  • 24
  • 29
Ankit Jain
  • 47
  • 5
-1

An alternative to Dom would be, if you have the Schema (XSD) for your XML dialect, JAXB.

Florian Schaetz
  • 10,454
  • 5
  • 32
  • 58