0

I am scraping a page which uses socket.io to populate some select tag options. How can I wait for the socket to receive data before evaluating the page? I am using casperJS

the socket code (loaded by the target site):

socket.on('list', function (data) {


$.each(data.match_names, function (id, name) {
    if (some condition) {
        /*nothing*/
    } else {
        if (typeof( varname ) == 'function') {
            $('#myselector').append('<option value="' + id + '">' + name + " " + get_tournament_name(id.substr(0, 4)) + '</option>');
        } else {
            $('#myselector').append('<option value="' + id + '">' + name + '</option>');
        }
        match_count++;
    }
});

I check that the socket.io script has loaded:

casper.waitForResource("socket.io.js", function() {
    this.echo('socket.io has been loaded.'); //is printed

//how can I check that data has arrived from 'socket.on('list', function (data)' ?
});

But the option tags are not on the page, presumably because I am evaluating the page too soon

casper.then(function() {
    baseTargetUrl = this.evaluate(function() {
        return __utils__.getElementByXPath('//*[@id="wrapper"]/div[1]/a[2]')["href"];
    });
    console.log('logging: '+baseTargetUrl); // works

    casper.thenOpenAndEvaluate(baseTargetUrl ,function() { //baseTargetUrl is no longer undefined, it's a closure now

        $(function(){ // DOM is ready

            var myOptions = [] ;

            $('select#myselector option').each(function(){
                myOptions.push( $(this).text() + ' : '+$(this).val() ); //additional options have not yet been added

            });

        });
    });
});
codecowboy
  • 9,835
  • 18
  • 79
  • 134

2 Answers2

2

You could do something like the following as lud1977 describes I guess, although I don't like it. Casper's waitFor works by polling, while that's really stupid when waiting for an event to happen. It would be way better if you do this:

var list = null; // broader scope

casper.on('socket.list', function() {
  // do stuff with <list> here
});

casper.waitForResource("socket.io.js", function() { 
  var socket = io.connect("http://wherever-the-server-is")
  socket.on('list', function(data){
    list = data;
    this.emit('socket.list');
  }.bind(this));
})

Source: http://docs.casperjs.org/en/latest/events-filters.html

Herman
  • 1,534
  • 10
  • 16
  • Thanks but neither of these worked for me. In your second example, that won't work as socket is not defined unless you are within evaluate() in the page context. Also, you can't do require("socket.io.js"); – codecowboy Jan 10 '14 at 15:05
  • Please use my code as an example and adjust it to your needs. The fact that you can't do require(..) is no problem. Just have het Socket.io client library loaded. window.io will be defined and you can call var socket = io.connect(....) Please note: socket.io 0.9 uses different setup methods as socket.io 1.0. – Herman Jan 11 '14 at 19:01
0

i suggest you take a look at this page: how to wait for element visibility in phantomjs

particularly the waitfor function. In the situation you described, I would use something like that to check if the page has been populated with data from the socket, meaning the socket has finished loading.

Hope it helps.

Community
  • 1
  • 1
Lud
  • 482
  • 2
  • 9