0

Is there a a way to query for two tag names in DOM using the 'getElementsByTagName'

I am trying to make my program parse and XML from two different tags "Schools" & "Hospitals" and display the "id" and "text" attributes from each

Is this possible?

My code is:

import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class DomParse {

    public static void main(String[] args) {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    try {
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse("input2.xml");

    NodeList School = doc.getElementsByTagName("School");
    for(int i=0;i<School.getLength();i++){
        Node p = School.item(i);
        if(p.getNodeType()==Node.ELEMENT_NODE){
            Element person = (Element) p;
            String id = person.getAttribute("id");
            NodeList nameList = person.getChildNodes();
            for(int j=0;j<nameList.getLength();j++){
                Node n = nameList.item(j);
                if(n.getNodeType()==Node.ELEMENT_NODE){
                    Element name = (Element) n;
                    System.out.println("School "  + id + ": " + "\n" + "\n" + name.getTextContent());

                    NodeList Hospital = doc.getElementsByTagName("Hospitals");
                    for(int z=0;z<Hospital.getLength();z++){
                        Node k = Hospital.item(z);
                        if(k.getNodeType()==Node.ELEMENT_NODE){
                        Element uc = (Element) k;
                        NodeList uniqueList = uc.getChildNodes();
                        for(int l=0;l<uniqueList.getLength();l++){
                        Node a = uniqueList.item(l);
                            if(a.getNodeType()==Node.ELEMENT_NODE){
                                Element uniqcon = (Element) a;
                                System.out.println("Hospitals " + id + ": " + "\n" + "\n" + uniqcon.getTextContent() + "\n" );

                            }
                        }
                        }
                    }
                }
            }
        }
    }

    } catch (ParserConfigurationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (SAXException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
}
}

}

The input file is:

<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>
<school>
<Hospitals size="standard">
        <Hospital codeSet="base" included="true" analysis="nI" definedAt="scan" id="0">
            <text>New England Medical. Services only urgent procedures.</text>
            <scanPatterns>
                <scanPattern occurs="1" scanPatternId="45" definedAt="scan" id="0"/>
                <scanPattern occurs="1" scanPatternId="46" definedAt="scan" id="1"/>
            </scanPatterns>
        </Hospital>

        <Hospital codeSet="base" included="true" analysis="I" SchoolId="4" definedAt="scan" id="1">
            <text>Gibson Memorial. 20 miles away.</text>
            <scanPatterns>
                <scanPattern occurs="1" scanPatternId="42" definedAt="scan" id="0"/>
                <scanPattern occurs="1" scanPatternId="39" definedAt="scan" id="1"/>
            </scanPatterns>
        </Hospital>
        </Hospitals>
<Schools>
        <School definedAt="scan" id="0">
            <text>Craven County Middle School</text>
        </School>
        <School definedAt="scan" id="1">
            <text>WestLake Middle School</text>
        </School>
        </Schools>
</school>
  • 1
    navigating in the DOM is painful, take a look at XPath for an easier way to access values in the document http://stackoverflow.com/a/2811101/3215527 – wero May 12 '16 at 15:16
  • You could make a method which gets a `NodeList` as input, loops over the elements and prints the wanted values. Assuming the method's name is `printElements`, you could call it using `printElements(doc.getElementsByTagName("School)"` and `printElements(doc.getElementsByTagName("Hospital")`. Besides: why are you printing all Hospitals for every School? – Tobias Brösamle May 12 '16 at 15:28

1 Answers1

0

If you use XPath, this job will be far easier.

XPath xPath =  XPathFactory.newInstance().newXPath();
String schoolExpression = "//School";
String hospitalExpression = "//Hospital";

Get a NodeList using the XPath expression, then iterate over the nodes and get the id and text content:

NodeList nodeList = (NodeList) xPath.compile(schoolExpression).evaluate(xmlDocument, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
        System.out.println(nodeList.item(i).getAttributes().getNamedItem("id").getNodeValue());  // gets you your id
        System.out.println(nodeList.item(i).getTextContent());  // gets you the context         
}
kanghj91
  • 140
  • 1
  • 9