0

I have a query for ingesting some triples which I run inside spawn function.The task server uses 8 threads to execute the query. The status of the task server shows that there are a lot of tree misses and zero expanded hits. No results stored for the query. What might be the cause?

Query :

xquery version "1.0-ml";
import module namespace sem = "http://marklogic.com/semantics" at "/MarkLogic/semantics.xqy";
import module namespace entity="http://marklogic.com/entity" 
    at "/MarkLogic/entity.xqy";
let $uris := cts:uris((), (), cts:collection-query('collec_1'))
let $gene_dict := "/dict/dict_1"
let $size := xdmp:estimate(collection('collec_1'))
let $batch-size := 1000
let $num-batches :=  xs:int(math:ceil($size div $batch-size ))

for $step in (1 to fn:ceiling($size  div $batch-size )       )
let $start := ($batch-size * $step) - $batch-size + 1
let $end := $start + $batch-size - 1 

return xdmp:spawn-function(function() {
  let $socpe-uris := $uris[$start to $end]
  for $uri in $socpe-uris
  let $dict := map:map()
  let $input-node := doc($uri)/text
  let $seq := ()
  let $genes := insert-before($seq,1,entity:extract($input-node,(cts:entity-dictionary-get($G_dict)),"full")/@id) 
  let $_ := for $G in $GS
            let $n := if (map:contains($dict, $G)) then map:get($dict, $G) else 0
            return map:put($dict, $G, 1 + $n)
 
  return if (map:count($dict) gt 0)
             then 
                 for $k in map:keys($dict)
                   return sem:rdf-insert(sem:triple(sem:iri($uri),
                                       sem:iri("http://link"),
                                       sem:iri($k)),
                                       (), (), "collec_2")
           else ()
  
});
  • Lack of expanded tree hits: This looks like each URI may be hit once. If the values are unique in those docs, then everything is fetched from the compressed tree cache. Where is your data? Let the server help you. After every variable assignment in the system, go ahead and trace out the results(xdmp:log or xdmp:trace). To troubleshoot: I would step back: change spawn-function into invoke-function, change the execution to a few docs $uris[1 to 10]. Using invoke-function allows all logs in once place for troubleshooting. Otherwise, the logs are split between 8000 and task server – David Ennis -CleverLlamas.com Apr 03 '23 at 08:12

0 Answers0