I want to crawl Google PlayStore ranking pages such as "https://play.google.com/store/apps/category/EDUCATION/collection/topselling_paid"
When I view the page with a browser, it firstly shows 60 apps and shows more apps up to 540 by mouse scrolling and clicking a "show more" button.
I think the page is fully rendered when events such as "mouse scroll", "click a button" are created.
Problem is that I don't know how to generate these events without browsers, therefore I could only scrape the pages which are not fully rendered and contain only 60 apps.
I tried a bellow code with PhantomJS, but did not work at all.
var page = require('webpage').create(),
system = require('system'),
url;
url = system.args[1];
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.open(url, function() {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.evaluate(function() {
//Shows ranking up to 60th.
console.log($("a.title").text());
//Tried scroll mouse. However, cannot render the page.
for(i=0; i<150; i++){
console.log(document.body.scrollTop)
window.scrollTo(0, document.body.scrollHeight);
console.log(document.body.scrollTop)
window.scrollTo(0, 0);
}
//Expect to show ranking up to 540th.
console.log($("a.title").text());
});
phantom.exit()
});
});
How can I crawl the fully rendered pages?