1

my question here is how can i get the child nodes from a RDF file using xpath.

here you can see these set of nodes used multiple times:

<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xml:base="http://www.gutenberg.org/"
  xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
  xmlns:cc="http://web.resource.org/cc/"
  xmlns:dcam="http://purl.org/dc/dcam/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:marcrel="http://id.loc.gov/vocabulary/relators/"
  xmlns:dcterms="http://purl.org/dc/terms/"
>
  <pgterms:ebook rdf:about="ebooks/45916">
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/45916/45916-h.zip">
        <dcterms:format>
          <rdf:Description rdf:nodeID="N8bcaca7a1c7d421bb84e17512209a18e">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/html; charset=iso-8859-1</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nf2b3fd0a0a8846708ffa66bef504ceb3">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T09:02:38</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">651365</dcterms:extent>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/45916.kindle.noimages">
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T22:26:22.432333</dcterms:modified>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1123982</dcterms:extent>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nd99359d8b38946be9050e90d512f195b">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/x-mobipocket-ebook</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:creator>
      <pgterms:agent rdf:about="2009/agents/1609">
        <pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/August_Strindberg"/>
        <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1912</pgterms:deathdate>
        <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1849</pgterms:birthdate>
        <pgterms:alias>Strindberg, Johan August</pgterms:alias>
        <pgterms:name>Strindberg, August</pgterms:name>
      </pgterms:agent>
    </dcterms:creator>
    <dcterms:publisher>Project Gutenberg</dcterms:publisher>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/45916.txt.utf-8">
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nbbecde9fae7f49d3a61c4636a3f23b3f">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T22:26:13.886845</dcterms:modified>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">644491</dcterms:extent>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/45916.epub.images">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">410485</dcterms:extent>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nf57694627093478cb2f1b79fe569b0bb">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T22:26:15.113789</dcterms:modified>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:title>Inferno Legenden</dcterms:title>
    <marcrel:trl>
      <pgterms:agent rdf:about="2009/agents/26434">
        <pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1873</pgterms:birthdate>
        <pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1951</pgterms:deathdate>
        <pgterms:name>Schering, Emil</pgterms:name>
      </pgterms:agent>
    </marcrel:trl>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/45916/45916-8.zip">
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nd11587d6f2c54ed588554457e438f745">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/zip</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">252086</dcterms:extent>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T09:02:36</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nde9a69d5a10a4eac8bc341fbd6b284e2">
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=iso-8859-1</rdf:value>
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/45916.epub.noimages">
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="Nc8d0a845bc934efb9d272657cb764850">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/epub+zip</rdf:value>
          </rdf:Description>
        </dcterms:format>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T22:26:15.623745</dcterms:modified>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">272322</dcterms:extent>
      </pgterms:file>
    </dcterms:hasFormat>
    <pgterms:downloads rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">0</pgterms:downloads>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/45916/45916-8.txt">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">637384</dcterms:extent>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T09:02:34</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N7aa2a69d1003438ba4003eb259bd35ac">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/plain; charset=iso-8859-1</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/files/45916/45916-h/45916-h.htm">
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T09:02:36</dcterms:modified>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">666476</dcterms:extent>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N8eac5ace146f40b692be7124998929f6">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">text/html; charset=iso-8859-1</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/45916.kindle.images">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1362365</dcterms:extent>
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T22:26:19.053536</dcterms:modified>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N21c75eb3a0324793b23f0f2da05987d8">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/x-mobipocket-ebook</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:rights>Public domain in the USA.</dcterms:rights>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/45916.qioo">
        <dcterms:isFormatOf rdf:resource="ebooks/45916"/>
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">314173</dcterms:extent>
        <dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-08T22:26:14.223837</dcterms:modified>
        <dcterms:format>
          <rdf:Description rdf:nodeID="N2042f4d0e515438f86a835a107f3069f">
            <dcam:memberOf rdf:resource="http://purl.org/dc/terms/IMT"/>
            <rdf:value rdf:datatype="http://purl.org/dc/terms/IMT">application/x-qioo-ebook</rdf:value>
          </rdf:Description>
        </dcterms:format>
      </pgterms:file>
    </dcterms:hasFormat>
    <dcterms:type>
      <rdf:Description rdf:nodeID="N23936464382a478dbd492861f3ddcec0">
        <rdf:value>Text</rdf:value>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
      </rdf:Description>
    </dcterms:type>
    <dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2014-06-08</dcterms:issued>
    <dcterms:language>
      <rdf:Description rdf:nodeID="Ne8641b6eba6e463c8dd983956a243179">
        <rdf:value rdf:datatype="http://purl.org/dc/terms/RFC4646">de</rdf:value>
      </rdf:Description>
    </dcterms:language>
    <dcterms:license rdf:resource="license"/>
  </pgterms:ebook>
  <cc:Work rdf:about="">
    <cc:license rdf:resource="http://www.gnu.org/licenses/gpl.html"/>
  </cc:Work>
  <rdf:Description rdf:about="http://en.wikipedia.org/wiki/August_Strindberg">
    <dcterms:description>Wikipedia</dcterms:description>
  </rdf:Description>
</rdf:RDF>

what is want is to get the value <pgterms:name>Strindberg, August</pgterms:name> from all these nodes. and the main problem is that: <dcterms:creator> always changes in all the RDF files. so i want to deal with it dynamically.

my code is something like this:

$dom = simplexml_load_file('my.rdf');
$title = $dom->xpath('//rdf:RDF/pgterms:ebook/dcterms:creator/pgterms:agent/pgterms:name');
echo $title;

any help please.

Thank You.

  • The short answer is that you really shouldn't. "Solutions" based on the RDF/XML serialization of an RDF graph are very brittle, because the same RDF graph can be serialized as many different RDF/XML documents. It's different XML, but the same RDF. See, for instance [my answer](http://stackoverflow.com/a/17052385/1281433) to [How to access OWL documents using XPath in Java?](http://stackoverflow.com/q/17036871/1281433). – Joshua Taylor Jun 11 '14 at 11:30
  • That said, if you really really want to use XPath to get your information, then the accepted answer to that question may help you. – Joshua Taylor Jun 11 '14 at 11:31
  • But to stay within PHP, you might take a look a some of the libraries out there (easyrdf comes up if I search for php and sparql). Then you'd load the data, write a simple SPARQL query, and get your results. – Joshua Taylor Jun 11 '14 at 11:34
  • @JoshuaTaylor thank you very much for the help. so apart from this thing how to write simple SPARQL query to get the results as i want? – adfsadasdsad Jun 11 '14 at 11:38
  • You haven't shown us enough of the data to make that clear yet. Can you show a complete sample of the data? – Joshua Taylor Jun 11 '14 at 11:39
  • what do you suggest then? i have tried using easyrdf but it did not helped much to me. – adfsadasdsad Jun 11 '14 at 11:39
  • Ok sure please. let me show you. – adfsadasdsad Jun 11 '14 at 11:41
  • 1
    I haven't used easyrdf, so I can't say too much about it (although there are other questions on Stack Overflow about it). If you can show the data though, we can come up with the SPARQL query. – Joshua Taylor Jun 11 '14 at 11:41
  • 1
    i have posted the RDF file. and edit my question as well. once again thank you please. – adfsadasdsad Jun 11 '14 at 11:45
  • @adfsadasdsad If this question was helpful, you should accept the answer (and upvote it if you wish). Click on the checkmark on the left of the answer. – helderdarocha Jun 15 '14 at 04:22

1 Answers1

3

The short answer is that you really shouldn't try to access the RDF with XPath. "Solutions" based on the RDF/XML serialization of an RDF graph are very brittle, because the same RDF graph can be serialized as many different RDF/XML documents. It's different XML, but the same RDF. See, for instance my answer to How to access OWL documents using XPath in Java? If you insist, though, the accepted answer to that question may help you. I'd suggest that instead you use dedicated RDF tools.

At the moment, I can't help much with the PHP side of things, although it appears that there's a library called EasyRDF that may let you run SPARQL queries against your data. Coming up with the SPARQL query I can help you with. RDF is a graph-based data representation. The fundamental "thing" is the triple, which is just a three-tuple of the form (subject, predicate, object). We treat that as a directed edge from subject to object, labeled by predicate.

RDF/XML is just one representation of it. It's handy because there are so many XML processing tools, but it's inconvenient because it doesn't make the triples very clear, and it's not easy to read as plain text, or to write by hand. If we convert your data to N-Triples, which is a format that just puts one triple per line, it looks like this (just a part of it):

_:BX2D39ae9d40X3A1468ac2fcd1X3AX2D7ff9 <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> "text/plain; charset=iso-8859-1"^^<http://purl.org/dc/terms/IMT> .
_:BX2D39ae9d40X3A1468ac2fcd1X3AX2D7ff9 <http://purl.org/dc/dcam/memberOf> <http://purl.org/dc/terms/IMT> .
<http://www.gutenberg.org/2009/agents/1609> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.gutenberg.org/2009/pgterms/agent> .
<http://www.gutenberg.org/2009/agents/1609> <http://www.gutenberg.org/2009/pgterms/webpage> <http://en.wikipedia.org/wiki/August_Strindberg> .
<http://www.gutenberg.org/2009/agents/1609> <http://www.gutenberg.org/2009/pgterms/deathdate> "1912"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://www.gutenberg.org/2009/agents/1609> <http://www.gutenberg.org/2009/pgterms/birthdate> "1849"^^<http://www.w3.org/2001/XMLSchema#integer> .
<http://www.gutenberg.org/2009/agents/1609> <http://www.gutenberg.org/2009/pgterms/alias> "Strindberg, Johan August" .
<http://www.gutenberg.org/2009/agents/1609> <http://www.gutenberg.org/2009/pgterms/name> "Strindberg, August" .

That's very easy to write, but it's hard to read, and it's hard to see the graph structure. The Turtle serialization is very nice because it's easy to read and write, and it makes the graph structure more apparent, and it's very similar to the SPARQL query language syntax. The part about August Strindberg in Turtle is:

<http://www.gutenberg.org/2009/agents/1609>
        a                  pgterms:agent ;
        pgterms:alias      "Strindberg, Johan August" ;
        pgterms:birthdate  1849 ;
        pgterms:deathdate  1912 ;
        pgterms:name       "Strindberg, August" ;
        pgterms:webpage    <http://en.wikipedia.org/wiki/August_Strindberg> .

Now, it sounds like what you've actually got is one RDF file per ebook, and you're looking for the creator information about the ebook. Here's a query that will get the pgterms:name property of each author for each ebook in the document. Of course, if you expect there to be only one ebook description in the file, you could select just the name (i.e., select ?name where …) instead of select ?ebook ?name where ….

prefix dcterms: <http://purl.org/dc/terms/>
prefix pgterms: <http://www.gutenberg.org/2009/pgterms/>

select ?ebook ?name where {
  ?ebook a pgterms:ebook ; 
         dcterms:creator ?creator .
  ?creator pgterms:name ?name .
}
------------------------------------------------------------------
| ebook                                   | name                 |
==================================================================
| <http://www.gutenberg.org/ebooks/45916> | "Strindberg, August" |
------------------------------------------------------------------

Now, it's pretty clear that this data is coming from Project Gutenberg, in which case you may also find why sparql query below do not return cartesian product useful. It's got some more examples of SPARQL queries against Project Gutenberg data. It's also got some discussion about the differences between the new and the legacy RDF representations of the data, but it looks like you're already using the new representation, so that's not as important. In fact, the final query in that question is similar to this one, and uses property paths, which are actually kind of like XPaths, and sort of like regular expressions. You can simplify the query above using property paths as:

prefix dcterms: <http://purl.org/dc/terms/>
prefix pgterms: <http://www.gutenberg.org/2009/pgterms/>

select ?ebook ?name where {
  ?ebook a pgterms:ebook ; 
         dcterms:creator/pgterms:name ?name .
}
Community
  • 1
  • 1
Joshua Taylor
  • 84,998
  • 9
  • 154
  • 353
  • Thank you very very much for this great explanation mate. what if, if the nodes continously changes in each RDF file.??? how we can manage this?? – adfsadasdsad Jun 11 '14 at 12:06
  • Suppose in next RDF file. its not creator, it would be `marcrel:ill` or `marcrel:edt`. how can we treat them as well? – adfsadasdsad Jun 11 '14 at 12:07
  • moreover, cant we skip this section using `//` sort of thing? – adfsadasdsad Jun 11 '14 at 12:08
  • Is it *just* the values of `pgterms:name` that you want? If that's the case, and you don't care about where they're coming from, you can just do `select ?name where { ?something pgterms:name ?name }`, or even `select ?name where { [ pgterms:name ?name ] }` (an abbreviated form of the same thing). – Joshua Taylor Jun 11 '14 at 12:56
  • Thank you very much and that will give the desired result? as `pgterms:name` is the child node at 3rd level u know. – adfsadasdsad Jun 11 '14 at 13:18
  • Yes i want this result, moreover, where and how this query is used? i mean how i can use this query in my PHP code? – adfsadasdsad Jun 11 '14 at 13:19
  • my last question. please see `pgterms:name` repeats 2 times in this RDF file. means, 1- ` Strindberg, August ` and 2- ` Schering, Emil ` so how i have values of both?? please explain. – adfsadasdsad Jun 11 '14 at 13:22
  • please reply me on thrice questions. – adfsadasdsad Jun 11 '14 at 13:23
  • Just as a general note about Stack Overflow etiquette, since it appears that you're still a newer user. Comments like "anyone please? I need this get done." and "please reply me on thrice questions" come across as a somewhat demanding and impatient. (From some of the grammar, I'm guessing that English might not be your first language, so those comments might not actually be demanding and impatient. People may still read them that way and respond negatively, however.) – Joshua Taylor Jun 11 '14 at 13:55
  • As I explained in my answer, RDF is *graph based*, not *tree based*, so there's no concept of "3rd level deep" (even though there is in the XML serialization of the RDF graph). The query pattern `?something pgterms:name ?name` just searches for triples of the form (?something, pgterms:name, ?name). If something has a name specified by `pgterms:name`, that query will find it. – Joshua Taylor Jun 11 '14 at 13:57
  • If you run that latter query (`select ?name where { ?something pgterms:name ?name }`) against your data, you'll see that you get a result for *each* of the appropriate triples in the graph. – Joshua Taylor Jun 11 '14 at 14:17
  • Thank you very much for the general note and the help. your code helped me a lot. – adfsadasdsad Jun 12 '14 at 06:44