1

I would like to take screenshot of many pages using phantomjs.

The problem is how to deal if I have more pages than 1000 because at the present moment following code crashes phantomjs:

This is an example of aobj:

var aobj = '[{"kb":21.047829999999976,"jb":52.174250000000015,"ff":110.16456426650427},{"kb":21.047997078651633,"jb":52.17421235955058,"ff":110.16456426650427},{"kb":21.048164157303404,"jb":52.17417471910114,"ff":110.16456426650427},{"kb":21.04833123595506,"jb":52.1741370786517,"ff":110.16456426650427},{"kb":21.048498314606718,"jb":52.174099438202255,"ff":110.16456426650427},{"kb":21.04866539325849,"jb":52.17406179775282,"ff":110.16456426650427},{"kb":21.048832471910146,"jb":52.17402415730338,"ff":110.16456426650427},{"kb":21.048999550561803,"jb":52.173986516853944,"ff":110.16456426650427}]'

... and more

Rest of the code:

aobj = JSON.parse(aobj);

function tes_par(nrr,jbb,kkb,hhf)
{
var page = require('webpage').create();
page.viewportSize = { width: 600, height: 480 };
console.log(nrr);
page.open('http://maps.googleapis.com/maps/api/streetview?size=640x480&location='+jbb+','+kkb+'&pitch=-0.760&sensor=false&heading='+hhf, function () 
{
    page.render(nrr + '.png');      
});
}

for (var i=0;i<aobj.length;i++)
{
tes_par(i,aobj[i].jb,aobj[i].kb,aobj[i].ff);
}

I assume that this is because JS can run one code at a time, so it consumes all resources, but I have no idea how to wait for page.open and page.render and wait if there are more than 1000 calls of these functions.

I also tried to use setTimeout, but again without success:

aobj = JSON.parse(aobj);

function tes_par(nrr,jbb,kkb,hhf)
{
var page = require('webpage').create();
page.viewportSize = { width: 600, height: 480 };
console.log(nrr);
page.open('http://maps.googleapis.com/maps/api/streetview?size=640x480&location='+jbb+','+kkb+'&pitch=-0.760&sensor=false&heading='+hhf, function () 
{
    page.render(nrr + '.png');      
});
}


function aa_bb(ii)
{
var jj=0;
(ii+500<aobj.length) ? jj=ii+500 : jj=aobj.length;
for (var i=ii;i<jj;i++)
{
tes_par(i,aobj[i].jb,aobj[i].kb,aobj[i].ff);
}
};

aa_bb(1);
setTimeout(function(){aa_bb(501)},120000);
Swapnil
  • 301
  • 1
  • 10
Tom
  • 67
  • 1
  • 3
  • 8
  • Sorry I haven't worked in phantom js. You are running this script in node right? can you use some kind of queue so that you can pass the request one at a time? – karthick Jun 26 '13 at 10:31
  • Yes, I have been thinking about this. Unfortunately, my implementations of the queue don't work:(. – Tom Jun 26 '13 at 11:30

2 Answers2

2

Try calling page.close() after each render. The docs mention reusing the same object without closing can prevent garbage collection.

Artjom B.
  • 61,146
  • 24
  • 125
  • 222
Simon
  • 37,815
  • 2
  • 34
  • 27
  • Same results - before page.close() is executed phantomjs crashes because of too many page.open() calls. – Tom Jun 26 '13 at 11:17
2

See Using Multiple page.open in Single Script. The basic idea is that you open the next page at the end of the function passed to the previous page open. Otherwise, you are opening a bunch of pages at once, and mayhem ensues.

aobj = JSON.parse(aobj);
var page = require('webpage').create();
page.viewportSize = { width: 600, height: 480 };

function tes_par(nrr,jbb,kkb,hhf)
{
    console.log(nrr);
    page.open(
        'http://maps.googleapis.com/maps/api/streetview?size=640x480&location='+jbb+','+kkb+'&pitch=-0.760&sensor=false&heading='+hhf,
        function () 
        {
            page.render(nrr + '.png');
            setTimeout(function(){next_thingie(nrr+1);},100);
        }
    );
}

function next_thingie(i)
{
    if(i>=aobj.length){return;}
    tes_par(i,aobj[i].jb,aobj[i].kb,aobj[i].ff);
}

next_thingie(0);

Update

The code above has been changed from the earlier version, after I reviewed my own code (which works) and found I had not applied it correctly to this problem. Changes include:

  1. Create page only once.

  2. Make recursive call for next page within page.open function.

  3. Apply a little timeout to help PhantomJS catch its breath.

This approach works for me in capturing about 300 screenshots from about 80 files.

Community
  • 1
  • 1
  • It doesn't work;/. console.log(nrr) prints only "0" and then crashes. – Tom Jul 01 '13 at 10:30
  • `i` was being incremented in the wrong place and never reached. See my edit. –  Jul 02 '13 at 12:48
  • Something around i=1200 I get _[CRITICAL] QThread::start: Failed to create thread_ and phantomjs crashes. Moreover, it iterates over `i` so fast that phantomjs doesn't have time to save png file (for sure). – Tom Jul 03 '13 at 12:52