2

This is what I currently have:

var casper = require('casper').create();

var fs = require('fs');
var folderName = 'CARD_DATA';
var fileName = 'allChampionDecks.txt';
var save = fs.pathJoin(fs.workingDirectory, folderName, fileName);

// init jquery 
var casper = require('casper').create({
    clientScripts: ['jquery.min.js']
});

casper.start(URL, function() {

});

var links = ["http://magic.wizards.com/en/events/coverage/mtgochamp14","http://magic.wizards.com/node/335986","http://magic.wizards.com/en/events/coverage/2014WC"];
var i = -1;
var linkData = '';

// iterate
casper.then(function() {
    this.each(links, function() { 
        i++;
        this.thenOpen((links[i]), function() {
            linkData += this.evaluate(getLinkDeckData);
        });
    });

    fs.write(save, linkData + '\n', 'w');
});

// scrape
function getLinkDeckData() {
    var meta = $('.deck-meta h4');
    var event = $('.deck-meta h5');
    var allDecks = $('.toggle-text .deck-list-text');
    var json = '{';

    for(var i = 0; i < meta.length; i++) {
        json += '"event": "'+$(event[i]).text().trim()+'",'
            +'"deckName": "'+$(meta[i]).text()+'",'
            +'"deck": [';

        var cardCount = $(allDecks[i]).find('.sorted-by-overview-container .row .card-count');
        var cardName = $(allDecks[i]).find('.sorted-by-overview-container .row .card-name');

        for(var j = 0; j < cardCount.length; j++) {
            if(j < cardCount.length-1) 
                json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},';
            else
                json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}';
        }

        json += '],'
            +'"sideboard": [';

        var cardCount = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-count');
        var cardName = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-name');

        for(var j = 0; j < cardCount.length; j++) {
            if(j < cardCount.length-1) 
                json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},';
            else
                json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}';
        }

        if(i < meta.length-1) 
            json += '],'
        else
            json += ']}'
        /**/
    }

    return json;
}

casper.run();

I'm trying to iterate through some links to scrape some data. I'm not getting any errors but linkData is empty and nothing is written to the file.

For a single page I used the following and it works fine:

casper.start(URL, function() {
    var data = this.evaluate(getLinkDeckData);

    fs.write(save, data + '\n', 'w');
});
Artjom B.
  • 61,146
  • 24
  • 125
  • 222
Howard
  • 3,648
  • 13
  • 58
  • 86

1 Answers1

2

All then* (and wait*) functions are asynchronous step functions. When you make a loop and inside the loop call casper.thenOpen() then you schedule an opening step with an accompanying then callback as a separate step.

The problem is that when you try to write linkData, it is not in a separate step. Simply wrap it in casper.then() and it will work.

Fixed snippet:

casper.then(function() {
    links.forEach(links, function(link, i) { 
        this.thenOpen(link, function() {
            linkData += this.evaluate(getLinkDeckData);
        });
    });

    this.then(function(){
        fs.write(save, linkData + '\n', 'w');
    });
});

Instead of using CasperJS' each, you should use Array.prototype.forEach. That way, you don't need a global counter variable.

Artjom B.
  • 61,146
  • 24
  • 125
  • 222
  • I have an issue with looping and it's `then()` or `thenEvaluate()` don't work inside the loop: https://stackoverflow.com/q/44176889/190929 – DummyBeginner May 25 '17 at 14:47