0

I am trying to return the array which is being async populated after every foreach loop. But when I return, I get only undefined.

Below is the code block:

var navigateAndFetchPages = function (data) {
  var countryPages = [];
  data.forEach(function (val) {
    Rq(val.esomar_url)
      .then(function (data) {
          var $ = cheerio.load(data),
              pages_elem = $('.mt0.mb0-5.pt0').find('a').not('.active');
          countryPages.push({country_name: val.country_name, links: pages_elem});
    })
  });
  return countryPages;
};


var scraper = {
    extract: function (dir) {
        return landingPage(dir)
            .then(function (countries) {
                return navigateAndFetchPages(countries)
            })
            .then(function (p) {
                p()
            })
            .catch();
    }
};
Mukul Jain
  • 1,121
  • 11
  • 24
ChanX
  • 362
  • 3
  • 9
  • 26
  • _"But when I return, I get only `undefined`"_ Where is a value returned from `navigateAndFetchPages`? – guest271314 Jan 23 '17 at 06:59
  • sorry, added the return value.. And i know the value is being returned before the async proc could complete.. But I can't also return within the foreach block.. Now i'm stuck what to do.. – ChanX Jan 23 '17 at 07:01
  • Do you need to return the array directly like that, or can you structure your code to use a promise or a callback function? (As per "normal" async processing.) – nnnnnn Jan 23 '17 at 07:07
  • the arguments that the `navigateAndFetchPages` function receiving are an array of urls which i need to traverse to get links which will be returned in an array .. I can return the array directly so that i can chain it in my main function from where `navigateAndFetchPages` is being called. Updated the code with the callee. – ChanX Jan 23 '17 at 07:11
  • Check http://stackoverflow.com/a/14220323/1435132 – Sangharsh Jan 23 '17 at 08:38

2 Answers2

1

First things first, please indent your code properly -- it's much easier to follow!

Second, you need to embrace the asynchronicity of your code. Rq, presumably, is an asynchronous function: that makes the whole operation asynchronous. That means that navigateAndFetchPages needs to return promises, rather than trying to return the data.

The simple way to do this is with Array#map.

var navigateAndFetchPages = function(data) {
  return data.map(function(val) {
    return Rq(val.esomar_url).then(function(data) {
        var $ = cheerio.load(data),
            pages_elem = $('.mt0.mb0-5.pt0').find('a').not('.active');
        return {
          country_name: val.country_name,
          links: pages_elem
        };
      })
  });
};

The result of a call to navigateAndFetchPages will now be an array of promises.

You can wait for them all to complete with Promise.all:

Promise.all(navigateAndFetchPages(data)).then(function(result) {
  // result is an array of objects containing your data
});

Your updated question shows that this is itself called in a chain of promises. This is easy to deal with:

var scraper = {
    extract: function (dir) {
        return landingPage(dir)
            .then(function (countries) {
                return Promise.all(navigateAndFetchPages(countries));
            })
            .then(function (p) {
                p()
            })
            .catch();
    }
};

You could put Promise.all inside the navigateAndFetchPages function, but I wouldn't, in case you need to access individual pages' data in future. It's your API, though, so that's up to you.

lonesomeday
  • 233,373
  • 50
  • 316
  • 318
-1

You can substitute .map(), Promise.all() for .forEach(), return object from .then() which will be included within resulting array at .then() chained to Promise.all()

var navigateAndFetchPages = function(data) {

  return Promise.all(data.map(function(val) {
    return Rq(val.esomar_url)
      .then(function(data) {
        var $ = cheerio.load(data),
          pages_elem = $('.mt0.mb0-5.pt0').find('a').not('.active');
          return {
            country_name: val.country_name,
            links: pages_elem
          };
      });
  }));

};

navigateAndFetchPages
.then(function(countryPages) {
  // do stuff with `countryPages`
})
.catch(function(err) {
  console.log(err);
});
guest271314
  • 1
  • 15
  • 104
  • 177