1

I'm trying to make Java code that would use Saxon xPath. I have 2 issues:

  1. I'm not very good in java
  2. I'm not sure what is the best way how to convert net.sf.saxon.om.NodeInfo to String.

Can someone please help? I know that there is some good sample code at http://www.saxonica.com/download/download_page.xml but it was not enough.

I saw a similar SO discussion XPath processor output as string . However in this case I would like to use Saxon and it uses NodeInfo.

<pre>
<!-- language: java --> 
public class helloSaxon {
    public static void main(String[] args) {
        String xml = "";
        String xPathStatement = "";
        String xPathResult = "";
        SaxonXPath xPathEvaluation = null;
        Boolean xPathResultMatch = false;
        
        xml="<root><a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";

        //I'm using the following XPath Tester for test scenarios
        //https://www.freeformatter.com/xpath-tester.html#ad-output
        // Test #1
        xPathStatement="/root/a";
        xPathEvaluation = new SaxonXPath(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #1 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #1 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #2
        xPathStatement="//a";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #2 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><a>#DDD#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #2 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #3
        xPathStatement="/root/a[1]/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #3 xPathResult - " + xPathResult);
            //xPathResult == "#BBB#";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #3 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #4
        xPathStatement="/a/root/a/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #4 xPathResult - " + xPathResult);
            //xPathResult == "";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #4 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == false;
            
        // Test #5
        xPathStatement="/root";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #5 xPathResult - " + xPathResult);
            //xPathResult == "<root><a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #5 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;         
    }
    static class SaxonXPath{
        private String xml;
        private String xPathStatement;
        private String xPathResult;
        private Boolean xPathResultMatch;
        public SaxonXPath(String xml, String xPathStatement){
            this.Reset(xml, xPathStatement);
        }
        public void Reset(String xml, String xPathStatement){
            this.xml = xml;
            this.xPathStatement = xPathStatement;
            this.xPathResult = "";
            this.xPathResultMatch = null;
            this.Evaluate();
        }
        public void Evaluate(){
            try{
                System.setProperty("javax.xml.xpath.XPathFactory:" + NamespaceConstant.OBJECT_MODEL_SAXON, "net.sf.saxon.xpath.XPathFactoryImpl");
                XPathFactory xPathFactory = XPathFactory.newInstance(NamespaceConstant.OBJECT_MODEL_SAXON);
                XPath xPath = xPathFactory.newXPath();
                InputSource inputSource = new InputSource(new StringReader(this.xml));
                SAXSource saxSource = new SAXSource(inputSource);
                Configuration config = ((XPathFactoryImpl) xPathFactory).getConfiguration();
                DocumentInfo document = config.buildDocument(saxSource);      
                XPathExpression xPathExpression = xPath.compile(this.xPathStatement);

                List matches = (List) xPathExpression.evaluate(document, XPathConstants.NODESET);
                if (matches != null && matches.size()>0) {
                    this.xPathResultMatch = true;   
                    for (Iterator iter = matches.iterator(); iter.hasNext();) {
                        NodeInfo node = (NodeInfo) iter.next();
                        
                        //need to convert content of "node" to string
                        xPathResult += node.getStringValue();
                    }
                } else {
                    this.xPathResultMatch = false;
                }
            } catch(Exception e){
                e.printStackTrace();
            }           
        }
        public String getxPathResult(){
            return this.xPathResult;
        }
        public Boolean getxPathResultMatch(){
            return this.xPathResultMatch;
        }
    }
}
</code>

There would the following inputs:

  1. XML as String
  2. xPath expression as String
    Outputs:
  3. xPath evaluation as String
  4. xPath result match as Boolean

I have also added some test examples in code comments so you could better understand what I'm trying to make.

Jeroen Steenbeeke
  • 3,884
  • 5
  • 17
  • 26
Peteris
  • 418
  • 2
  • 13

2 Answers2

2

Firstly, I would recommend using the s9api interface rather than the JAXP XPath interface for this. There are a number of reasons, notably:

  • the JAXP interface is very geared to XPath 1.0, for example it only recognizes the data types string, number, boolean and node-set. XPath 2.0 has a much richer type system

  • the JAXP interface is rather tied to DOM as its object model, although it makes concessions to the possibility of using other models (and the Saxon implementation takes advantage of this by supporting NodeInfo, which is an implementation of XDM nodes)

  • the JAXP interface has very little type safety; it makes extensive use of Object as an argument and result type, and it makes no use of Java generics

  • Any portability advantages of using a standard API are rather spurious since (a) all known implementations other than Saxon only support XPath 1.0, and (b) the kinds of value that may be supplied to interfaces declared as accepting Object are different for each product.

Your code is creating a new XPathFactory every time an XPath expression is evaluated. Creating an XPathFactory is a very expensive operation since it involves searching the classpath and examining many different JAR files to see which one contains an appropriate XPath engine.

In addition your code is building the source document from scratch every time an XPath expression is evaluated. Again, this is very expensive.

Having said all that, returning strings and booleans using JAXP isn't very difficult. You just need to change the argument XPathConstants.NODESET that says what result type is expected, to XPathConstants.STRING or XPathConstants.BOOLEAN, and the evaluate() call will return a string or a boolean in place of a list of nodes. But if you wanted to return a date or a duration, you would be stuck because JAXP doesn't support that.

Michael Kay
  • 156,231
  • 11
  • 92
  • 164
  • Thank you @Michael.Kay for your good comments! :) The issue with XPathConstants.STRING is that in case of expression "/root" it returns: "#BBB##CCC##DDD#" however I would like to have: "#BBB##CCC# #DDD#" – Peteris Sep 11 '19 at 05:29
  • Far as I understand I have to use NodeInfo in order to have tag names, namespace, node content, etc however it seems that the issue with NodeInfo is that there is no "out of the box" functions to convert to String... – Peteris Sep 11 '19 at 05:38
  • 1
    If you want the serialized representation of a node as the result of your XPath, you can either return the node and then do the serialization in your calling application, or you can call the `serialize(node)` function within the XPath expression itself. – Michael Kay Sep 11 '19 at 07:32
  • Thank you @michael-kay! Unfortunately I can't return node to calling application and I have to do the serialization in function. I tested with "xPathResult += net.sf.saxon.query.QueryResult.serialize(node);" and it returned the String what I wanted to achieve. Can you please comment about "net.sf.saxon.query.QueryResult.serialize(NodeInfo)"? Is it expensive operation? Or maybe you were thinking something else with "serialize(node) function within the XPath expression itself"? – Peteris Sep 11 '19 at 11:40
  • 2
    Serializing within the calling application using `QueryResult.serialize()`, and serializing within the XPath expression using `fn:serialize()`, are likely to cost exactly the same. Yes, serializing a large document is an expensive operation and best avoided if you can. But sometimes of course it is necessary, e.g if saving XML to filestore,. – Michael Kay Sep 11 '19 at 13:06
1

I just wanted to add edited code based on input from @MichaelKay. I'm still doing buildDocumentTree for every call despite it being expensive operation because I will have different XML's. I hope that also others will find it useful or will give nice comments in order to improve performance :)

import java.io.StringReader;
import java.util.Iterator;
import java.util.List;
import javax.xml.transform.sax.SAXSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFactoryConfigurationException;
import net.sf.saxon.Configuration;
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.om.TreeInfo;
import net.sf.saxon.xpath.XPathFactoryImpl;
import org.xml.sax.InputSource;

public class helloSaxon {

    public static void main(String[] args) {

        String xml = "";
        String xPathStatement = "";
        String xPathResult = "";
        SaxonXPath xPathEvaluation = null;
        Boolean xPathResultMatch = false;

        xml="<root version = '1.0' encoding = 'UTF-8' xmlns:bar='http://www.smth.org/'><bar:a>#BBB#</bar:a><a>#CCC#</a><b><a>#DDD#</a></b></root>";

        //I'm using the following XPath Tester for test scenarios
        //https://www.freeformatter.com/xpath-tester.html#ad-output
        // Test #1
        xPathStatement="/root/a";

        xPathEvaluation = new SaxonXPath(xml, xPathStatement);

        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #1 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #1 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #2
        xPathStatement="//a";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #2 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><a>#DDD#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #2 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #3
        xPathStatement="/root/a[1]/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #3 xPathResult - " + xPathResult);
            //xPathResult == "#BBB#";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #3 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #4
        xPathStatement="/a/root/a/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #4 xPathResult - " + xPathResult);
            //xPathResult == "";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #4 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == false;

        // Test #5
        xPathStatement="/root";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #5 xPathResult - " + xPathResult);
            //xPathResult == "<root><a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #5 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;         
    }
    static class SaxonXPath{
        private String xml;
        private String xPathStatement;
        private String xPathResult;
        private Boolean xPathResultMatch;
        private XPathFactory xPathFactory;
        private XPath xPath;
        public SaxonXPath(String xml, String xPathStatement){
            System.setProperty("javax.xml.xpath.XPathFactory:" + NamespaceConstant.OBJECT_MODEL_SAXON, "net.sf.saxon.xpath.XPathFactoryImpl");
            try {
                this.xPathFactory = XPathFactory.newInstance(NamespaceConstant.OBJECT_MODEL_SAXON);
            } catch (XPathFactoryConfigurationException e) {
                e.printStackTrace();
            }
            this.xPath = this.xPathFactory.newXPath();
            this.Reset(xml, xPathStatement);
        }
        public void Reset(String xml, String xPathStatement){
            this.xml = xml;
            this.xPathStatement = xPathStatement;
            this.xPathResult = "";
            this.xPathResultMatch = null;
            try{                
                InputSource inputSource = new InputSource(new StringReader(this.xml));
                SAXSource saxSource = new SAXSource(inputSource);
                Configuration config = ((XPathFactoryImpl) this.xPathFactory).getConfiguration();
                TreeInfo document = config.buildDocumentTree(saxSource);
                XPathExpression xPathExpression = this.xPath.compile(this.xPathStatement);
                List<NodeInfo> matches = (List<NodeInfo>) xPathExpression.evaluate(document, XPathConstants.NODESET);
                if (matches != null && matches.size()>0) {
                    this.xPathResultMatch = true;   
                    for (Iterator<NodeInfo> iter = matches.iterator(); iter.hasNext();) {
                        NodeInfo node = (NodeInfo) iter.next();

                        xPathResult += net.sf.saxon.query.QueryResult.serialize(node);
                    }
                } else {
                    this.xPathResultMatch = false;
                }
            } catch(Exception e){
                e.printStackTrace();
            }           
        }
        public String getxPathResult(){
            return this.xPathResult;
        }
        public Boolean getxPathResultMatch(){
            return this.xPathResultMatch;
        }
    }
}
Peteris
  • 418
  • 2
  • 13