I got some trouble using the waitFor-Method within PhantomJS.
This is what I want to do:
- Load multiple webpages by generated urls
- Use jQuery to parse some links from that pages
- Store each parsed link in the same array (In this example I'll just log them)
I'm Using the waitFor() -Method, so I can wait until a page has been evaluated. As I understand, this method will make the programm prevent to continue, until the function which I pass as a parameter has returned anything.
My Problem: Actually the programm will not continue to run after it executes the waitFor-Method. It's just stuck. There is no error whatsoever. The function I passed as a parameter will not be executed...at least there is no logging in the console.
When I remove the waitFor-Methot it will execute the code properly, however I cannot execute the handleSeriesPageListPage()-Method multiple times. I'm really not too much into js and callbacks or asynchronous method handling. I guess I did some heavy mistakes and some javascript expert will be able to help me quickly :).
"use strict";
var page = require('webpage').create();
page.onConsoleMessage = function (msg) {
console.log(msg);
};
var seriesPageBaseUrl = "https://www.example.com?pageid=";
var simpleBaseUrl = "https://www.example.com/";
var seriesPageIds = [0xx, 1xx];
var allSeriesUrls = [];
function handleSeriesPageListPage(url) {
console.log("Open url: " + url);
page.open(url, function (status) {
console.log("status: " + status);
if (status === "success") {
waitFor(
function () {
return page.includeJs("https://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
console.log("Included JS");
return page.evaluate(function () {
console.log("evaluate result...");
$('.list_item').each(function () {
var seriesLink = jQuery(this).find("a").first().attr("href");
var seriesUrl = simpleBaseUrl + seriesLink;
console.log(seriesUrl);
return true;
});
});
});
}
);
} else {
phantom.exit(1);
}
});
}
function nextSeriesListPage() {
var seriesPageId = seriesPageIds.shift();
if (typeof seriesPageId === "undefined") {
console.log(allSeriesUrls);
phantom.exit(0);
}
var targetURL = seriesPageBaseUrl + seriesPageId;
handleSeriesPageListPage(targetURL);
}
nextSeriesListPage();