I tried to use the phantomjs(v1.9.8) module (https://www.npmjs.com/package/phantom) for node.js The goal was just to display the current url and navigate by programatically clicking the Next link using JQuery. Im unsuccessful in clicking the Next link. I tried doing it the hard way via document to no avail.
Kindly point me to the right direction on how to do this.
Code:
var phantom = require('phantom');
function scrape(keyword) {
phantom.create("", {}, function(ph) {
console.log("Phantom Bridge Initiated");
ph.createPage(function(page) {
page.set("settings.javascriptEnabled", true, function() {
console.log("...enabling javascript");
});
page.set("settings.userAgent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", function() {
console.log("...setting user agent");
});
function traverse_page(page) {
console.log('--- traverse_page ---');
page.evaluate(function() {
console.log("HOW MANY NEXT IS FOUND : " + $(".paginationNext a").length);
if ($(".paginationNext a").length > 0) {
$(".paginationNext a")[0].click(); // doesnt work
// document.getElementsByClassName('paginationNext')[0].getElementsByTagName('a')[0].click() // doesnt work too
}
return document;
}, function(result) {
setTimeout(function() {
extract_values(page);
}, 10000);
})
}
function extract_values(page) {
console.log("--- extract values ---");
page.evaluate(function() {
return { "url": $(location).attr('href') };
}, function(result) {
console.log("URL : " + result.url);
traverse_page(page);
});
}
page.open("http://search.infospace.com/search/web?q=thylacine&searchbtn=Search", function(status) {
page.set("onConsoleMessage",function(msg) {
if(msg.indexOf("Unsafe JavaScript") == -1){
console.log(msg)}});
page.includeJs("http://code.jquery.com/jquery-2.1.4.min.js", function() {
extract_values(page);
})
});
});
});
}
scrape();
Result:
Phantom Bridge Initiated
...enabling javascript
...setting user agent
--- extract values ---
URL : http://search.infospace.com/search/web?q=thylacine&searchbtn=Search
--- traverse_page ---
phantom stdout: TypeError: 'undefined' is not a function (evaluating '$(".paginationNext a")[0].click()')
phantom stdout: phantomjs://webpage.evaluate():6
phantomjs://webpage.evaluate():11
phantomjs://webpage.evaluate():11
HOW MANY NEXT IS FOUND : 1