Benchmarking in BaseX: how to set up

write them up

#!/usr/bin/perl

use warnings;

$| = 1;    # flush every print

# Directory where XPaths are stored
my $directory = shift(@ARGV);

# Set limit. If set to zero all results will be returned
my $limit = shift(@ARGV);

# Create session, connect to BaseX
my $session = Session->new([INFORMATION WITHHELD]);

# List all files in directory
@xpathfiles = <$directory/*.txt>;

# Read lines of treebank parts into variable
open( my $tfh, "treebankparts.lst" ) or die "cannot open file treebankparts.lst";
chomp( my @tlines = <$tfh> );
close $tfh;

# Loop through all XPaths in $directory
foreach my $xpathfile (@xpathfiles) {
    open( my $xfh, $xpathfile ) or die "cannot open file $xpathfile";
    chomp( my @xlines = <$xfh> );
    close $xfh;

    print STDOUT "File = $xpathfile\n";

    # Loop through lines from XPath file (= XPath query)
    foreach my $xline (@xlines) {
        # Loop through the lines of treebank file
        foreach my $tline (@tlines) {
            my ($treebank) = $tline =~ /<tb>(.+)<\/tb>/;
            QuerySonar( $xline, $treebank );
        }
    }
}
$session->close();

sub QuerySonar {
    my ( $xpath, $db ) = @_;

    print STDOUT "Querying $db for $xpath\n";
    print STDOUT "Limit = $limit\n";
    my $x_limit;
    my $x_resultsofxp = 'declare variable $results := db:open("' . $db . '")/treebank/alpino_ds'
      . $xpath . ';';
    my $x_open       = '<results>';
    my $x_totalcount = '<total>{count($results)}</total>';
    my $x_loopinit   = '{for $node at $limitresults in $results';

    # Spaces are important!
    if ( $limit > 0 ) {
        $x_limit = ' where $limitresults <= ' . $limit . ' ';
    }
    # Comment needed to prevent `Incomplete FLWOR expression`
    else { $x_limit = '(: No limit set :)'; }

    my $x_sentenceinfo = 'let $sentid := ($node/ancestor::alpino_ds/@id)
        let $sentence := ($node/ancestor::alpino_ds/sentence)
        let $begin := ($node//@begin)
        let $idlist := ($node//@id)
        let $beginlist := (distinct-values($begin))';

    # Separate sentence info by tab
    my $x_loopexit = 'return <match>{data($sentid)}&#09;
        {string-join($idlist, "-")}&#09;
        {string-join($beginlist, "-")}&#09;
        {data($sentence)}</match>}';
    my $x_close = '</results>';

    # Concatenate all XQuery parts
    my $x_concatquery =
        $x_resultsofxp
      . $x_open
      . $x_totalcount
      . $x_loopinit
      . $x_limit
      . $x_sentenceinfo
      . $x_loopexit
      . $x_close;

    my $querysent = $session->query($x_concatquery);

    my $basexoutput = $querysent->execute();
    print $basexoutput. "\n\n";

    $querysent->close();
}

Benchmarking in BaseX: how to set up

2 Answers2