1

I'm aware of closures and callbacks in JavaScript but it's obvious I don't get them on an intuitive level.

I have a small app that is scraping data from an API and I can easily console.log the responses from each request, my problem is I'm trying to gather the data and build an object to save to a file when all requests are complete.

I get that nodejs is a single thread of execution and it doesn't block but I can't figure out where to put the call backs when all the inner requests are finished I can console.log the built object. You'll see my console.log lines are in the wrong place and execute before the first response from the inner request.

Breakdown

  1. Fetch country data
  2. Loop over countryResponse and use each country id to fetch details
  3. Add each detail to an array
  4. Add array to object when all requests are complete.

code

const limit = require("simple-rate-limiter");

let request = limit(require("request")).to(1).per(200);


let options = {
    method: 'POST',
    url: 'https://myendpoint/details',
    headers: {
        'cache-control': 'no-cache',
        'Content-Type': 'application/json'
    },
    body: {
        "token": "TOKEN",
        "method": "countries"
    },
    json: true
};

global.package = {};
global.services = {};
let countryServices = [];

/*
    Country fetch
*/
request(options, function (err, response, countryResponse) {
    if (err) {}

    package.countries = countryResponse;

    countryResponse.forEach(function (entry) {

        let innerOptions = {
            method: 'POST',
            url: 'https://myendpoint/details',
            headers: {
                'cache-control': 'no-cache',
                'Content-Type': 'application/json'
            },
            body: {
                "token": "TOKEN",
                "method": "services"
            },
            json: true
        };
        //THIS LINE OMG
        //let countryServices = [];

        innerOptions.body.countryCode = entry.countryCode;

        request(innerOptions, function (err, response, innerResponse) {
            if (err) {}

            countryServices.push(innerResponse);
            console.log(" inner response " + entry.countryCode + ' : ' + JSON.stringify(innerResponse, null, ""));

        });//END innerResponse
    });//END countryResponse.forEach
    services = countryServices;
    console.log(JSON.stringify(package, null, ""));
    console.log(JSON.stringify(countryServices, null, ""));
});//END orderResponse

countryResponse

[
    {
        "countryCode": 1,
        "countryName": "Virgin Islands (U.S.)"
    },
    {
        "countryCode": 7,
        "countryName": "Russian Federation"
    }
]

innerResponse

[
    {
        "Type": "1",
        "id": 2
    },
    {
        "Type": "2",
        "id": 3
    }
]
chris loughnane
  • 2,648
  • 4
  • 33
  • 54

2 Answers2

1

The most concise and straightforward way to do this may be async/await way. You can manually promisify request and replace simple-rate-limiter dependency with simple delay:

'use strict';

const request = require('request');

function promisifiedRequest(options) {
  return new Promise((resolve, reject) => {
    request(options, (err, response, body) => {
      if (err) reject(err);
      else resolve(body);
    });
  });
}

function delay(ms) {
  return new Promise((resolve) => { setTimeout(resolve, ms); });
}

const options = {
  method: 'POST',
  url: 'https://myendpoint/details',
  headers: {
    'cache-control': 'no-cache',
    'Content-Type': 'application/json',
  },
  body: {
    token: 'TOKEN',
    method: 'countries',
  },
  json: true,
};

(async function main() {
  try {
    const countryResponse = await promisifiedRequest(options);

    const innerRequests = [];
    for (const entry of countryResponse) {
      const innerOptions = {
        method: 'POST',
        url: 'https://myendpoint/details',
        headers: {
          'cache-control': 'no-cache',
          'Content-Type': 'application/json',
        },
        body: {
          token: 'TOKEN',
          method: 'services',
          countryCode: entry.countryCode,
        },
        json: true,
      };

      const innerRequest = promisifiedRequest(innerOptions);
      innerRequests.push(innerRequest);
      await delay(200);
    }

    const countryServices = await Promise.all(innerRequests);
    console.log(JSON.stringify(countryServices, null, ''));
  } catch (err) {
    console.error(err);
  }
})();

These materials can be helpful if you want more background or need to scale your app (add parallel requests with more complicated rate limits):

Stackoverflow: How do I return the response from an asynchronous call?

Stackoverflow: Why is my variable unaltered after I modify it inside of a function?

Handling asynchronous operations in parallel

Back-off and retry using JavaScript arrays and promises

vsemozhebuty
  • 12,992
  • 1
  • 26
  • 26
  • 1
    Thank you for that, I definitely have a lot to absorb there and I see a lot of reading ahead. One thing I noticed, in my naive approach it completes it's run ~19 seconds but your solution is ~85 seconds. Is this because yours is async and it will only make a request after the previous request is complete + 200ms delay; where as mine will fire of a request once ever 200ms? – chris loughnane Feb 20 '19 at 16:25
  • Yes. I can try to edit to achieve the same behavior. I will add a comment a bit later. – vsemozhebuty Feb 20 '19 at 16:58
  • Can you try the new variant with some tiny changes? – vsemozhebuty Feb 20 '19 at 17:13
  • 1
    Yeah, down to ~20s! :) It took me a minute to see that you constructed an array of `promisifiedRequest`s and then called `Promise.all` on that. Thank you so much, I have a lot to mull over and will be adding more nested calls depending on responses. – chris loughnane Feb 20 '19 at 17:31
  • 1
    @chrisloughnane Happy coding) – vsemozhebuty Feb 20 '19 at 17:33
1

The console.logs at the end of your code won't wait for all the asynchronous operations fired off by your forEach to complete before they run. You'll need to introduce some kind of mechanism that waits for all the functions fired by forEach to complete their requests.

If you want to stick with using callbacks, then you could take a look at using something like the each method of async, which will handle this kind of situation for you.

This problem is commonly handled using Promises and async/await. If you used a promise based interface to request, your example would look something like this, assuming a fairly up to date version of Node.js (options omitted):

const request = require('request-promise');

async function run() {
  const options = {};
  const countryServices = [];
  const countryResponse = await request(options);

  for (const country of countryResponse) {
    const innerOptions = {};
    const innerResponse = await request(innerOptions);
    countryServices.push(innerResponse);
  }

  console.log(countryServices);
}

run();

This is a bit clearer than using callbacks, and the for-of loop behaves exactly how you would expect.