5

I have big, very nested XML file. All entities and attributes are going to be my Object variables. I'm creating list of such objects. I know how to do it with DOM, SAX and XMLPullParser and it works fine but I have problem with VTD parser. ListView after parsing is empty. I'm putting below part of XML file and my code. Maybe someone knows what am I doing wrong.

<MedlineCitationSet>
<MedlineCitation Owner="NLM" Status="MEDLINE">
    <PMID Version="1">10540283</PMID>
    <DateCreated>
        <Year>1999</Year>
        <Month>12</Month>
        <Day>17</Day>
    </DateCreated>
    <Article PubModel="Print">
        <Journal>
            <ISSN IssnType="Print">0950-382X</ISSN>
            <JournalIssue CitedMedium="Print">
                <Volume>34</Volume>
                <Issue>1</Issue>
            </JournalIssue>...

My android code:

try {
        articlesList = new ArrayList<>();

        VTDGen vtdGen = new VTDGen();
        vtdGen.setDoc(bytes);
        vtdGen.parse(false);

        AutoPilot ap = new AutoPilot();
        VTDNav vtdNav = vtdGen.getNav();

        int i = -1;

        ap.bind(vtdNav);
        ap.selectXPath("/MedlineCitationSet/MedlineCitation");

        while ((ap.evalXPath()) != -1) {

            articlesList.add(new Article());
            String year = null, day = null, month = null;
            i++;

            if (vtdNav.hasAttr("Owner"))
                articlesList.get(i).setOwner(vtdNav.toNormalizedString(vtdNav.getAttrVal("Owner")));
            if (vtdNav.hasAttr("Status"))
                articlesList.get(i).setStatus(vtdNav.toNormalizedString(vtdNav.getAttrVal("Status")));

            vtdNav.push();

            AutoPilot ap1 = new AutoPilot();
            ap1.selectXPath("/MedlineCitationSet/MedlineCitation/PMID");
            ap1.bind(vtdNav);

            while ((ap1.evalXPath()) != -1) {
                articlesList.get(i).setPMID(vtdNav.toNormalizedString(vtdNav.getText()));
                articlesList.get(i).setVersion(vtdNav.toNormalizedString(vtdNav.getAttrVal("Version")));
            }

            ap1.resetXPath();
            ap1.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated");
            ap1.bind(vtdNav);

            while ((ap1.evalXPath() != -1)) {

                vtdNav.push();

                AutoPilot ap1x = new AutoPilot();
                ap1x.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated/Year");
                ap1x.bind(vtdNav);

                while ((ap1x.evalXPath()) != -1) {
                    year = vtdNav.toNormalizedString(vtdNav.getText());
                }

                ap1x.resetXPath();
                ap1x.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated/Month");
                ap1x.bind(vtdNav);

                while ((ap1x.evalXPath()) != -1) {
                    month = vtdNav.toNormalizedString(vtdNav.getText());
                }

                ap1x.resetXPath();
                ap1x.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated/Day");
                ap1x.bind(vtdNav);

                while ((ap1x.evalXPath()) != -1) {
                    day = vtdNav.toNormalizedString(vtdNav.getText());
                }

                articlesList.get(i).setDateCreated(day + "-" + month + "-" + year);

                vtdNav.pop();
            }

            ap1.resetXPath();
            ap1.selectXPath("/MedlineCitationSet/MedlineCitation/Article");
            ap1.bind(vtdNav);

            while ((ap1.evalXPath()) != -1) {

                if (vtdNav.hasAttr("Print"))
                    articlesList.get(i).setPubModel(vtdNav.toNormalizedString(vtdNav.getAttrVal("Print")));

                vtdNav.push();

                AutoPilot ap2 = new AutoPilot();
                ap2.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal");
                ap2.bind(vtdNav);

                {
                    vtdNav.push();

                    AutoPilot ap2x = new AutoPilot();
                    ap2x.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/ISSN");
                    ap2x.bind(vtdNav);

                    while ((ap2x.evalXPath()) != -1) {
                        articlesList.get(i).setISSN(vtdNav.toNormalizedString(vtdNav.getText()));
                        articlesList.get(i).setIssnType(vtdNav.toNormalizedString(vtdNav.getAttrVal("IssnType")));
                    }

                    ap2x.resetXPath();
                    ap2x.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/JournalIssue");
                    ap2x.bind(vtdNav);

                    while ((ap2x.evalXPath()) != -1) {

                        articlesList.get(i).setCitedMedium(vtdNav.toNormalizedString(vtdNav.getAttrVal("CitedMedium")));

                        vtdNav.push();

                        AutoPilot ap3 = new AutoPilot();
                        ap3.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/JournalIssue/Volume");
                        ap3.bind(vtdNav);

                        while ((ap3.evalXPath()) != -1) {
                            articlesList.get(i).setVolume(vtdNav.toNormalizedString(vtdNav.getText()));
                        }

                        ap3.resetXPath();
                        ap3.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/JournalIssue/Issue");
                        ap3.bind(vtdNav);

                        while ((ap3.evalXPath()) != -1) {
                            articlesList.get(i).setIssue(vtdNav.toNormalizedString(vtdNav.getText()));
                        }

                        ap3.resetXPath();
                        vtdNav.pop();
                    }...

Thank you for any help!

  • The stuff you are trying to do seems complicated, involving multiple xpath expressions nested one in the other. would it be possible for you to describe what you re trying to do conceptually first? – vtd-xml-author Sep 28 '15 at 23:51
  • I'm comparing performance of 4 parsers: XMLPullParser, DOM, SAX and VTD-XML. There are almost none examples for VTD-XML parser on the internet and I have big problem with it. – Agata Kobusinska Sep 29 '15 at 19:42
  • VTD-XML 2.11 release contains quite a few examples dealing with simple use cases. There are examples on the VTD-XML web site itself. There are also article and references on the web site. The task you are performing is pretty complicated, seems like a job for a seasoned vtd-xml developer... let me know how I can assist u... – vtd-xml-author Sep 29 '15 at 19:51
  • It seems to be much more difficult then with other parsers. I'll try to use sample from below tomorrow and I'll let you know. Thanks for help! – Agata Kobusinska Sep 29 '15 at 20:04
  • It has its learning curve like anything else. But the goal of vtd-xml is to make it as simple as possible for simple tasks or complicated ones... especially when comparing with SAX and PULL – vtd-xml-author Sep 29 '15 at 20:55
  • Here is a paper comparing different kinds of XML apis in java http://sdiwc.net/digital-library/web-admin/upload-pdf/00000466.pdf – vtd-xml-author Sep 29 '15 at 23:15

1 Answers1

0

Below is a code snippet helping you extract the relevant fields in the document. There are issues with your use of AutoPilot.

For example, I suggest you move all the selectXPath out of the while loop because they are relatively a slow operation. Also if the xml has deep nesting, you should consider turning on VTDGen's selectLcDepth and set it to 5. This helps improve navigation/xpath performance. Below is just a sample of what can be done. Also for simple XPaths, you can use VTDNav's native cursor API which is more handy...

let me know if you got any issues...

    VTDGen vtdGen = new VTDGen();
     vtdGen.selectLcDepth(5);
     vtdGen.parseFile("c:\\xml\\agata.xml",false);
     AutoPilot ap = new AutoPilot(),ap1=new AutoPilot(), 
ap2=new AutoPilot(),ap3=new AutoPilot();
     VTDNav vn = vtdGen.getNav();
     int i = -1;
     ap.bind(vn);ap1.bind(vn);ap2.bind(vn);ap3.bind(vn);
     ap.selectXPath("/MedlineCitationSet/MedlineCitation");
     ap1.selectXPath("PMID");
     ap2.selectXPath("DateCreated");
     ap3.selectXPath("Article");
     while ((ap.evalXPath()) != -1) {
            String year = null, day = null, month = null;
            i++;

           if (vn.hasAttr("Owner"))     System.out.println("Owner==>"+vn.toNormalizedString(vn.getAttrVal("Owner")));
                        //articlesList.get(i).setOwner(vtdNav.toNormalizedString(vtdNav.getAttrVal("Owner")));
                    if (vn.hasAttr("Status"))
                        System.out.println("Stats==>"+vn.toNormalizedString(vn.getAttrVal("Status")));
                        //articlesList.get(i).setStatus(vtdNav.toNormalizedString(vtdNav.getAttrVal("Status")));
                    vn.push();
                    while((ap1.evalXPath())!=-1){
                        System.out.println("Version==>"+vn.toNormalizedString(vn.getAttrVal("Version")));
                        System.out.println("PMID==>"+vn.toNormalizedString(vn.getText()));
                    }
                    ap1.resetXPath();
                    vn.pop();
                    vn.push();
                    while((ap2.evalXPath())!=-1){
                        vn.toElement(VTDNav.FIRST_CHILD,"Year");
                        System.out.println("Year==>"+vn.toNormalizedString(vn.getText()));
                        vn.toElement(VTDNav.PARENT);
                        vn.toElement(VTDNav.FIRST_CHILD,"Month");
                        System.out.println("Month==>"+vn.toNormalizedString(vn.getText()));
                        vn.toElement(VTDNav.PARENT);
                        vn.toElement(VTDNav.FIRST_CHILD,"Day");
                        System.out.println("Day==>"+vn.toNormalizedString(vn.getText()));
                        vn.toElement(VTDNav.PARENT);
                    }
                    ap2.resetXPath();
                    vn.pop();
                    VN.push();
                    while((ap3.evalXPath())!=-1){
                          System.out.println("PubModel==>"+vn.toNormalizedString(vn.getAttrVal("PubModel")));
                    VN.pop();   


      }
vtd-xml-author
  • 3,319
  • 4
  • 22
  • 30