0

I'm having some serious problems with a fairly simple CasperJS scraper that I'm trying to write. Essentially, I want to traverse some search results on a website, following each result, collecting some data, then returning to the current search page. Once this process has been completed, I want to write the results to file. I have the following code that is not working at all. Please excuse any glaringly obvious mistakes, I am quite new to javascript, being a Java, Ruby, C++ native.

// This site can also be queried via URL, I initially wrote this serializer
// to use this approach, but I ended up going with CasperJS navigation instead.
// My problems seem agnostic to whether or not I navigate using page links or URL.
function serialize(json) {
    var str = [];
    for(var prop in json) {
        if(json.hasOwnProperty(prop)) {
            str.push(encodeURIComponent(prop) + "=" + encodeURIComponent(obj[prop]));
        }
    }
    return str.join("&");
}


// Scrape Links and Names from the current page in the searh results
function getPageLinks() {
    var dancers = document.querySelectorAll('h4 > a');
    return Array.prototype.map.call(links, function(e) {
        var result = {};
        result[e.textContent] = e.getAttribute('href');
        return result;
    });
}

// For a given dancer, scrape the block of html containing the name of each donor,
// their donation amount, and any comments.
function scrapeDonorInfo() {
    var donors = document.querySelectorAll('div.msgBottomInnCont > div.meta');
    return Array.prototype.map.call(links, function(e) {
        return e.innerHtml;
    });
}

// Use Tail recursion to scrape the donors for every dancer in each page of the search results.
function scrapeAllDonors(dancers, startIndex) {
    // Inject Underscore.js for utility methods (namely _.union())
    this.page.injectJs('https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js');

    // Populate the links object only after there are links to scrape
    casper.waitForSelector('h4 > a', function() {
        var links = this.evaluate(getPageLinks);
        dancers = this.evaluate(_.union(dancers, links));
    });
    this.echo('Links object populated', 'INFO'); // Log the message, 
                                                 // using this.echo() for colored tags

    // For every dancer page link on this page of search results,
    // fetch their fundraising page, scrape their donors, 
    // 
    dancers.forEach(function(element, index, array) {
        if(index >= startIndex) {
            var name = Object.keys(element)[0];
            var link = baseURL + element[name];
            casper.thenOpen(link);
            casper.waitForSelector('div.meta', function() {
                var viewMore = 'a.viewMore';
                if(casper.visible(viewMoreActivity)) {
                    casper.thenClick(viewMore);
                }

                element[name] = {"donor_info": this.evaluate(getDonorInfo)};
            });
            casper.back();
        }
    });

    var nextLink = "a#next";
    casper.waitForSelector(nextLink, function() {

        // If the next button in the results is clickable, click it.
        if (casper.visible(nextLink)) {
            casper.thenClick(nextLink);
            casper.thenEvaluate(scrapeAllDonors(dancers, dancers.length()));
        } else {
            // Otherwise, write the final results to file.
            fs.write(save, dancers, 'w');
            casper.echo("END")
        }
    });
}


// Note: This is the Phantom.js package 'fs', not the Node.js package.
var fs = require('fs');

// Create a dated file for scrape results
var fname = new Date().getTime() + '.txt';
var save = fs.pathJoin(fs.workingDirectory, 'data', fname);

// Initialize Casper.js with desired settings
var casper = require('casper').create({
    verbose: true,
    logLevel: 'debug',
    pageSettings: {
        loadImages:  false,     
        loadPlugins: false         
    }
});

// Handler for Resource Errors
casper.on("resource.error", function(resourceError) {
    console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')');
    console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
});

// Handler for Page Errors
casper.on("page.error", function (msg, trace) {
    console.log( 'Error: ' + msg, 'ERROR' );
    console.log( 'Trace: ' + trace, 'TRACE' );
});

// Handler for Blocking requests made by social components (facebook in particular)
casper.on("resource.requested", function(requestData, networkRequest){
    console.log('Request (#' + requestData.id + '): ' + JSON.stringify(requestData) + "\n");
    if (requestData.url.indexOf("facebook") !== -1) {
        networkRequest.abort();
    }
});

// BaseURL for the site, convenient for scrapeAllDonors
var baseURL = 'https://fundraise.nudm.org/';

casper.start('https://fundraise.nudm.org/search/fundraisers?page=1');

casper.then(scrapeAllDonors([], 0));

// Run everything in the stack, then notify and exit
casper.run(function() {
    this.echo("DONE", 'INFO');
    this.exit();
});

To make the problem worse, Casper/Phantom refuses to print any of my log messages and I can't figure out why. When I run without debug, I get:

casperjs --ssl-protocol=tlsv1 Crawler.js
[info] [phantom] Starting...
Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match.

With debug enabled, I get:

 casperjs --ssl-protocol=tlsv1 -debug=true Crawler.js
Unable to open file: -debug=true
Unable to load script -debug=true; check file syntax
dhcp-199-74-85-154:NUDM Expose williambyrne$ casperjs --ssl-protocol=tlsv1 --debug=true Crawler.js
2016-03-06T14:22:31 [DEBUG] CookieJar - Created but will not store cookies (use option '--cookies-file=<filename>' to enable persisten cookie storage) 
2016-03-06T14:22:31 [DEBUG] Phantom - execute: Configuration 
2016-03-06T14:22:31 [DEBUG]      0 objectName : "" 
2016-03-06T14:22:31 [DEBUG]      1 cookiesFile : "" 
2016-03-06T14:22:31 [DEBUG]      2 diskCacheEnabled : "false" 
2016-03-06T14:22:31 [DEBUG]      3 maxDiskCacheSize : "-1" 
2016-03-06T14:22:31 [DEBUG]      4 ignoreSslErrors : "false" 
2016-03-06T14:22:31 [DEBUG]      5 localToRemoteUrlAccessEnabled : "false" 
2016-03-06T14:22:31 [DEBUG]      6 outputEncoding : "UTF-8" 
2016-03-06T14:22:31 [DEBUG]      7 proxyType : "http" 
2016-03-06T14:22:31 [DEBUG]      8 proxy : ":1080" 
2016-03-06T14:22:31 [DEBUG]      9 proxyAuth : ":" 
2016-03-06T14:22:31 [DEBUG]      10 scriptEncoding : "UTF-8" 
2016-03-06T14:22:31 [DEBUG]      11 webSecurityEnabled : "true" 
2016-03-06T14:22:31 [DEBUG]      12 offlineStoragePath : "" 
2016-03-06T14:22:31 [DEBUG]      13 offlineStorageDefaultQuota : "-1" 
2016-03-06T14:22:31 [DEBUG]      14 printDebugMessages : "true" 
2016-03-06T14:22:31 [DEBUG]      15 javascriptCanOpenWindows : "true" 
2016-03-06T14:22:31 [DEBUG]      16 javascriptCanCloseWindows : "true" 
2016-03-06T14:22:31 [DEBUG]      17 sslProtocol : "tlsv1" 
2016-03-06T14:22:31 [DEBUG]      18 sslCertificatesPath : "" 
2016-03-06T14:22:31 [DEBUG]      19 webdriver : ":" 
2016-03-06T14:22:31 [DEBUG]      20 webdriverLogFile : "" 
2016-03-06T14:22:31 [DEBUG]      21 webdriverLogLevel : "INFO" 
2016-03-06T14:22:31 [DEBUG]      22 webdriverSeleniumGridHub : "" 
2016-03-06T14:22:31 [DEBUG] Phantom - execute: Script & Arguments 
2016-03-06T14:22:31 [DEBUG]      script: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js" 
2016-03-06T14:22:31 [DEBUG]      0 arg: "--casper-path=/usr/local/Cellar/casperjs/1.1-beta4/libexec" 
2016-03-06T14:22:31 [DEBUG]      1 arg: "--cli" 
2016-03-06T14:22:31 [DEBUG]      2 arg: "Crawler.js" 
2016-03-06T14:22:31 [DEBUG] Phantom - execute: Starting normal mode 
2016-03-06T14:22:31 [DEBUG] WebPage - setupFrame "" 
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/_coffee-script.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/package.json" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/coffee-script.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./lexer.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././rewriter.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././helpers.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./parser.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./helpers.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./nodes.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././scope.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./././helpers.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/././lexer.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/../coffee-script/./lib/coffee-script/./././rewriter.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/package.json" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/cli.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/utils.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] Phantom - injectJs: "Crawler.js" 
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/casper.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/colorizer.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/events.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/http.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/mouse.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/pagestack.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/querystring.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: "/usr/local/Cellar/casperjs/1.1-beta4/libexec/modules/tester.js" QMap(("mode", QVariant(QString, "r") ) )  
[info] [phantom] Starting...
2016-03-06T14:22:31 [DEBUG] WebpageCallbacks - getJsConfirmCallback 
2016-03-06T14:22:31 [DEBUG] WebpageCallbacks - getGenericCallback 
2016-03-06T14:22:31 [DEBUG] WebpageCallbacks - getJsConfirmCallback 
2016-03-06T14:22:31 [DEBUG] WebPage - setupFrame "" 
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/_coffee-script.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 10 
2016-03-06T14:22:31 [DEBUG] WebPage - setupFrame "" 
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/_coffee-script.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r") ) )  
2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 100 
Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match.

2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 10 
2016-03-06T14:22:31 [DEBUG] WebPage - updateLoadingProgress: 100 

Any ideas?

Update (after making the first of the suggested changes)

williambyrne$ casperjs --ssl-protocol=tlsv1  Crawler.js
[info] [phantom] Starting...
[info] [phantom] Running suite: 3 steps
[debug] [phantom] opening url: https://fundraise.nudm.org/search/fundraisers?page=1, HTTP GET
[debug] [phantom] Navigation requested: url=https://fundraise.nudm.org/search/fundraisers?page=1, type=Other, willNavigate=true, isMainFrame=true
Request (#1): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}],"id":1,"method":"GET","time":"2016-03-06T21:03:49.874Z","url":"https://fundraise.nudm.org/search/fundraisers?page=1"}

[debug] [phantom] url changed to "https://fundraise.nudm.org/search/fundraisers?page=1"
Request (#2): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":2,"method":"GET","time":"2016-03-06T21:03:51.112Z","url":"https://fundraise.nudm.org/css/sc_global.css?cuiv=1456860159443"}

Request (#3): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":3,"method":"GET","time":"2016-03-06T21:03:51.113Z","url":"https://fundraise.nudm.org/stylesheets/css/charity/search.css?cuiv=1456860159443"}

Request (#4): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":4,"method":"GET","time":"2016-03-06T21:03:51.113Z","url":"https://fundraise.nudm.org/css/white_label_header_v3.4.3.1.css?cuiv=1456860159443"}

Request (#5): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"text/css,*/*;q=0.1"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":5,"method":"GET","time":"2016-03-06T21:03:51.114Z","url":"https://fundraise.nudm.org/css/white_label_header_responsive.css?cuiv=1456860159443"}

Request (#6): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":6,"method":"GET","time":"2016-03-06T21:03:51.114Z","url":"https://ajax.googleapis.com/ajax/libs/jquery/1.8.1/jquery.min.js"}

Request (#7): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":7,"method":"GET","time":"2016-03-06T21:03:51.114Z","url":"https://fundraise.nudm.org/js/front_scripts.js?cuiv=1456860159443"}

Request (#8): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":8,"method":"GET","time":"2016-03-06T21:03:51.115Z","url":"https://fundraise.nudm.org/js/mobile_share.js?cuiv=1456860159443"}

Request (#9): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":9,"method":"GET","time":"2016-03-06T21:03:51.115Z","url":"https://fundraise.nudm.org/js/search.js?cuiv=1456860159443"}

Request (#10): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":10,"method":"GET","time":"2016-03-06T21:03:51.116Z","url":"https://fundraise.nudm.org/js/mobile.js?cuiv=1456860159443"}

Request (#11): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":11,"method":"GET","time":"2016-03-06T21:03:51.304Z","url":"https://ssl.google-analytics.com/ga.js"}

Request (#12): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":12,"method":"GET","time":"2016-03-06T21:03:51.304Z","url":"https://www.google-analytics.com/analytics.js"}

Request (#13): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":13,"method":"GET","time":"2016-03-06T21:03:51.309Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Reg-webfont.woff"}

Request (#14): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":14,"method":"GET","time":"2016-03-06T21:03:51.313Z","url":"https://connect.facebook.com/en_US/sdk.js"}

Request (#15): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":15,"method":"GET","time":"2016-03-06T21:03:51.314Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Sbold-webfont.woff"}

Request (#16): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":16,"method":"GET","time":"2016-03-06T21:03:51.315Z","url":"https://fundraise.nudm.org/css/fonts/pictos/pictos-webfont.woff"}

Request (#17): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":17,"method":"GET","time":"2016-03-06T21:03:51.315Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Bold-webfont.woff"}

Request (#18): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":18,"method":"GET","time":"2016-03-06T21:03:51.316Z","url":"https://fundraise.nudm.org/css/fonts/proximanova/ProximaNova-Thin-webfont.woff"}

Request (#19): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"},{"name":"Accept","value":"*/*"}],"id":19,"method":"GET","time":"2016-03-06T21:03:51.317Z","url":"https://fundraise.nudm.org/css/fonts/entypo/entypo.woff"}

Unable to load resource (#14URL:)
Error code: 301. Description: Protocol "" is unknown
Request (#20): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":20,"method":"GET","time":"2016-03-06T21:03:51.796Z","url":"https://js-agent.newrelic.com/nr-885.min.js"}

Request (#21): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) CasperJS/1.1.0-beta4+PhantomJS/1.9.8 Safari/534.34"},{"name":"Accept","value":"*/*"},{"name":"Referer","value":"https://fundraise.nudm.org/search/fundraisers?page=1"}],"id":21,"method":"GET","time":"2016-03-06T21:03:53.756Z","url":"https://bam.nr-data.net/1/67fe2a1b26?a=10291124&v=885.a559836&to=ZV0HYUJUCEYEU0QLC1wXJFZEXAlbSlRVBAVHVBEaQ1AHRwZYHwQRXFwXVFlGA0cW&rst=2645&ap=775&fe=686&dc=204&f=%5B%5D&at=SRoEFwpOG0g%3D&jsonp=NREUM.setToken"}

[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] start page is loaded
[info] [phantom] Step anonymous 3/3 https://fundraise.nudm.org/search/fundraisers?page=1 (HTTP 200)
Links object populated
[info] [phantom] Step anonymous 3/3: done in 3944ms.
[info] [phantom] Step _step 4/5 https://fundraise.nudm.org/search/fundraisers?page=1 (HTTP 200)
[info] [phantom] Step _step 4/5: done in 3965ms.
[info] [phantom] waitFor() finished in 40ms.
[info] [phantom] Step anonymous 5/6 https://fundraise.nudm.org/search/fundraisers?page=1 (HTTP 200)
Error: ReferenceError: Can't find variable: links
Trace: [object Object],[object Object],[object Object]
Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file:///usr/local/Cellar/casperjs/1.1-beta4/libexec/bin/bootstrap.js. Domains, protocols and ports must match.

It seems that there is some problem with the scoping of the 'links' array.

Update 2: (Changes to scrapeAllDonors)

// Use Tail recursion to scrape the donors for every dancer in each page of the search results.
function scrapeAllDonors(dancers, startIndex) {
    // Inject Underscore.js for utility methods (namely _.union())
    this.page.injectJs('https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js');

    // Populate the links object only after there are links to scrape
    casper.waitForSelector('h4 > a', function() {
        var links = this.evaluate(getPageLinks);
        dancers = this.evaluate(_.union(dancers, links));

        // For every dancer page link on this page of search results,
        // fetch their fundraising page, scrape their donors, 
        // 
        dancers.forEach(function(element, index, array) {
            if(index >= startIndex) {
                var name = Object.keys(element)[0];
                var link = baseURL + element[name];
                casper.thenOpen(link);
                casper.waitForSelector('div.meta', function(name) {
                    var viewMore = 'a.viewMore';
                    if(casper.visible(viewMoreActivity)) {
                        casper.thenClick(viewMore);
                    }

                    element[name] = {"donor_info": this.evaluate(getDonorInfo)};
                }, name);
                casper.back();
            }
        });

        // If the next button in the results is clickable, click it.
        var nextLink = "a#next";
        if (casper.visible(nextLink)) {
            casper.thenClick(nextLink);
            casper.then(function() {
                scrapeAllDonors.call(this, dancers, dancers.length());
            });
        } else {
            // Otherwise, write the final results to file.
            fs.write(save, dancers, 'w');
            casper.echo("END")
        }
    });
    this.echo('Donor Information Scraped', 'INFO'); // Log the message, 
                                                 // using this.echo() for colored tags
}
Will Byrne
  • 681
  • 7
  • 15

1 Answers1

1

You've made the error of calling scrapeAllDonors immediately instead of passing it in for execution at a later time, here:

casper.thenEvaluate(scrapeAllDonors(dancers, dancers.length()));

and here:

casper.then(scrapeAllDonors([], 0));

This means that it executes before even the first page is loaded and therefore tries to operate on about:blank. If you want to call it like that, you need to refactor scrapeAllDonors, so that it returns a step function:

function scrapeAllDonors(dancers, startIndex) {
    return function(){
        // Inject Underscore.js for utility methods (namely _.union())
        this.page.injectJs('https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js');

        // ...

        var nextLink = "a#next";
        casper.waitForSelector(nextLink, function() {
            // ...
        });
    };
}

If you don't want to change scrapeAllDonors would be to refactor the calls to it by replacing

casper.then(scrapeAllDonors(...));

with

casper.then(function(){
    scrapeAllDonors.call(this, ...)
});

My answer on What must be wrapped in then() statements in CasperJS? How to determine execution order of sync/async functions? might be helpful for understanding the intricacies of the asynchronous execution in CasperJS.

Community
  • 1
  • 1
Artjom B.
  • 61,146
  • 24
  • 125
  • 222
  • So if I'm understanding this correctly the problem is that scrapeAllDonors is called before the page I requested in casper.start is finished loading? So would a viable solution be to throw the `casper.then(scrapeAllDonors([], 0));` in a `casper.waitForUrl('https://fundraise.nudm.org/search/fundraisers?page=1', function() {});` ? – Will Byrne Mar 06 '16 at 20:44
  • PS: I actually read that answer while I was working on this. Interesting, but I must not have read closely enough. – Will Byrne Mar 06 '16 at 20:44
  • Got it, thanks so much. I'll give both of those a try. – Will Byrne Mar 06 '16 at 20:55
  • Good point, incidentally, its not quite working. Check the update – Will Byrne Mar 06 '16 at 21:03
  • Looking further on your code, it should be noted that `wait*` is asynchronous, so `dancers.forEach` will never iterate or will result in a TypeError. You need to move that block to the end of the `casper.waitForSelector` callback above it. Besides that, I haven't checked whether the logic is ok ;) – Artjom B. Mar 06 '16 at 21:03
  • Fixed that too and I still get the exact same console output that I added – Will Byrne Mar 06 '16 at 21:07
  • *Error: ReferenceError: Can't find variable: links* says it all. It either has to be `dancers` or `donors`. – Artjom B. Mar 06 '16 at 21:15
  • Hmm, I was especially careful about the scope and made some more changes, but still the exact same error. Check the update for my changes to `scrapeAllDonors`. – Will Byrne Mar 06 '16 at 21:24
  • Let me know if you get a chance to take a look @artjom-b and thanks again for the help – Will Byrne Mar 06 '16 at 23:10
  • There is no `_` in the outer context. You've inject underscore into the page context, but those two are separated and sandboxed. I'm not sure, why you need it at all, since Array does provide a concat function: `dancers = dancers.concat(links);`, provided those are actual arrays and not just array-like (duck typing is fun) – Artjom B. Mar 06 '16 at 23:25
  • Wow, Silly me, I'll try that. – Will Byrne Mar 06 '16 at 23:26
  • As I said in an earlier comment `var dancers = document.querySelectorAll('h4 > a'); return Array.prototype.map.call(links, function(e) {` should be `var dancers = document.querySelectorAll('h4 > a'); return Array.prototype.map.call(dancers, function(e) {`. There is another reference to `links` that cannot be fulfilled and that should be replaced with `donors` – Artjom B. Mar 06 '16 at 23:33
  • Wow, completely missed that, sorry. Thanks again. – Will Byrne Mar 06 '16 at 23:34