0

Hi i am making a web scraper using node, but the code always return 'undefined', i tried use 'async' module but the result is the same.

Where is the error?

var request = require('request');
var cheerio = require('cheerio');

//this function returns a info of a movie
var get_info = function (url){
  var meta = {
        title   : '',
        year    : '',
        length  : '',
        gen     : '',
        details : '',
        raiting : '',
      };

  request.get(url, function (error, response, html){
    if (!error && response.statusCode == 200){
      var $         = cheerio.load(html);
      meta.title    = $('.movie-title h1').find('a').text();
      meta.year     = $('.release').text();
      meta.length   = $('.running_time').text().replace('minutos', '');
      meta.gen      = $('.btn-tags-list').find('a').first().text();
      meta.details  = $('.description').find('p').text();
      meta.raiting  = $('.movie-rating-average').find('.average').text();
    };
  });
  return meta;
};

Thanks.

2 Answers2

1

You would use a callback since get is an async request:

var get_info = function (url, callback){
  var meta = {
    title   : '',
    year    : '',
    length  : '',
    gen     : '',
    details : '',
    raiting : '',
  };

request.get(url, function (error, response, html){
  if (!error && response.statusCode == 200){
    var $         = cheerio.load(html);
    meta.title    = $('.movie-title h1').find('a').text();
    meta.year     = $('.release').text();
    meta.length   = $('.running_time').text().replace('minutos', '');
    meta.gen      = $('.btn-tags-list').find('a').first().text();
    meta.details  = $('.description').find('p').text();
    meta.raiting  = $('.movie-rating-average').find('.average').text();
    callback(meta);
  };
});

};

And use it:

get_info(url, function(meta) {
    console.log(meta);
});
tymeJV
  • 103,943
  • 14
  • 161
  • 157
1

You cannot return a value when dealing with async (you should probably read up on how async works and what callback functions are).

To remedy this, you'll need to provide a callback argument to your method (get_info) which would basically extend the callback chain. Something like:

var get_info = function(url,callback){
  /* ... */
  request.get(url, function(error, response, html){
    if (!error && response.statusCode == 200){
      // populate `meta`
      callback(meta);
    }
  });
};

Your implementation would then become:

get_info('foo', function(meta){
  // you may now work with `meta`
});
Brad Christie
  • 100,477
  • 16
  • 156
  • 200