I have this script in Perl, and it is getting a "Out of memory"
error after a few minutes of running. I can't see any circular references and I can't work out why it is happening.
use feature 'say';
use WWW::Mechanize;
use HTML::TreeBuilder::XPath;
use utf8;
$url = "some url";
my $mech = new WWW::Mechanize;
$mech->get($url);
my $html = HTML::TreeBuilder::XPath->new_from_content($mech->content);
my $html2;
do {
for $item ($html->findnodes('//li[@class="dataset-item"]'))
{
my $title = $item->findvalue('normalize-space(.//a[2])');
next unless $title =~ /environmental impact statement/i;
my $link = $item->findvalue('.//a[2]/@href');
$mech->get($link);
$html2 = HTML::TreeBuilder::XPath->new_from_content($mech->content);
my @pdflinks = $html2->findvalues('//a[@title="Go to external URL"]/@href');
my $date = $html2->findvalue('//tr[th="Date Created"]/td');
for $pdflink (@pdflinks)
{
next unless $pdflink =~ /\.pdf$/;
$mech->get($pdflink);
$mech->save_content($filename = $mech->response->filename);
say "Title: $title\nDate: $date\nFilename: $filename\n";
}
}
if ($nextpage = $html->findvalue('//ul[@class="pagination"]/li/a[.="»"]/@href'))
{
say "Next Page: $nextpage\n";
$mech->get("some site" . $nextpage);
$html = HTML::TreeBuilder::XPath->new_from_content($mech->content);
}
} while ($nextpage);
say "Completed.";