1

I'm trying to scrape a webpage that is populated via jS, but cheerio keeps returning a "" return when it scrapes a particular element.

My node.js file:

var five = require("johnny-five");
var board = new five.Board();
var request = require('request');
var cheerio = require('cheerio');

function scoreRequest(callback){

  request('http://selfiesoldier.co/jsmidterm/newindex.html', function (error, response, body) {
  if (!error && response.statusCode == 200) {

      var $ = cheerio.load(body);

      var a = $(".answer").html();

      console.log(body);

    board.on("ready", function() {

    var array = new five.Leds([3, 5, 6]);

        if (a == "NO") {


          array[0].pulse();
          array[2].off();
          console.log("GREEN")
          console.log(a);



        } else if (a == "YES") {

            array[0].off();
            array[2].pulse();
            console.log("RED");
            console.log(a);
    }

    });

}

callback(); 

});

}

function wait10sec(){
    setTimeout(function(){
        scoreRequest(wait10sec);
        console.log("its been 10 sec");
    }, 10000);
}


scoreRequest(wait10sec);

The h1.answer HTML tag is being populated by jS which determines the content (YES or NO) depending on some JSON data.

When I run the script however, I get a blank response instead of "NO" or "YES".

loldi
  • 41
  • 9
  • 1
    When you make a request for that page `(.../newindex.html)` with the `request` client, it gets just the HTML page and doesn't follow with requests for each css and js file present in the HTML page. That page uses javascript to get its contents, so you can't get those with just cheerio or request. – limekin Oct 06 '15 at 02:13
  • Try checking the `main.js` script of the page, it's responsible for populating the page with content. So maybe you can just get the data straight from that url instead of trying to scrape the loaded data from the page. – limekin Oct 06 '15 at 02:28
  • Ok, I'll try that. Thanks for your help, I'll update this if it works! – loldi Oct 06 '15 at 17:16
  • 2
    Turns out cheerio + request does NOT work for dynamic web pages. So I just scraped the JSON data I was pulling to make the website, and had it display values based on that, rather than having it scrape the HTML page. – loldi Oct 10 '15 at 19:09

0 Answers0