1

I'm trying to learn some web scraping with node.js. I chose some sample page for example http://www.imdb.com/chart/top. Then I tried to scrape all titles with rating. I created a PhantomJS script (I need to use it because it's dynamic so it uses JavaScript on the site). It's working, but I don't know how to make a loop for each title.

For example:

$('.lister-list tr').each(
  function(){
    $(this).find('.titleColumn').text().replace(/\n/g, '');
    $(this).find('.imdbRating').text().replace(/\n/g, '');
  }
);

Then I put it all in a JSON file. At this moment I can only put data without loop. This is my script:

var phantom = require('phantom');
var fs = require('fs');

phantom.create(function (ph) {
  ph.createPage(function (page) {
    page.open("http://www.imdb.com/chart/top", function (status) {
      page.evaluate(function () {

        //search datajquer
        var k_title = $('.lister-list tr .titleColumn').first().text().replace(/\n/g, '');
        var k_rating = $('.lister-list tr .imdbRating').first().text().replace(/\n/g, '');

        // create json data
        var metadata = JSON.stringify({
          Title: k_title,
          Rating: k_rating
        });

        return metadata; 

      }

        , function (result) {
        //save json data
        fs.appendFile('java.json', "\n" + result, function (err) {
          if (err) throw err;
          console.log('file is updated!');
        });
        //display data in console
        console.log('Result: ' + result);
        ph.exit();
        });
    });
  });
});

How I can make a loop which will loop over all TR elements?

Artjom B.
  • 61,146
  • 24
  • 125
  • 222
Ponciusz
  • 147
  • 3
  • 18

1 Answers1

2

What you need is an array. You could initialize an empty array and push new objects onto it like this:

page.evaluate(function () {
    var metadataList = [];
    $('.lister-list tr').each(function(){
        var metadata = {
            Title: $(this).find('.titleColumn').text().replace(/\n/g, ''),
            Rating: $(this).find('.imdbRating').text().replace(/\n/g, '')
        };
        metadataList.push(metadata);
    });

    return JSON.stringify(metadataList);
}, function(result){ ... });

You could also use the jQuery map() function to map each row to an object and create an array that way:

page.evaluate(function () {
    var metadataList = $('.lister-list tr').map(function(){
        return {
            Title: $(this).find('.titleColumn').text().replace(/\n/g, ''),
            Rating: $(this).find('.imdbRating').text().replace(/\n/g, '')
        };
    }).get();

    return JSON.stringify(metadataList);
}, function(result){ ... });

Notice that get(), must be called on the map() result to retrieve the actual array and not the jQuery object.

You don't need jQuery to do this:

page.evaluate(function () {
    var metadataList = [];
    [].forEach.call(document.querySelectorAll('.lister-list tr'), function(tr){
        var metadata = {
            Title: tr.querySelector('.titleColumn').textContent.replace(/\n/g, ''),
            Rating: tr.querySelector('.imdbRating').textContent.replace(/\n/g, '')
        };
        metadataList.push(metadata);
    });

    return JSON.stringify(metadataList);
}, function(result){ ... });
Artjom B.
  • 61,146
  • 24
  • 125
  • 222