I am running a server using Node.js and need to request data from another server that I am running (localhost:3001
). I need to make many requests (~200) to the data server and collect the data (response sizes vary from ~20Kb to ~20Mb). Each request is independent, and I would like to save the responses as one giant array of the form:
[{"urlAAA": responseAAA}, {"urlCCC": responseCCC}, {"urlBBB": responseBBB}, etc ]
Notice that the order of the items in unimportant, they should ideally fill the array in the order that the data becomes available.
var express = require('express');
var router = express.Router();
var async = require("async");
var papa = require("papaparse");
var sync_request = require('sync-request');
var request = require("request");
var pinnacle_data = {};
var lookup_list = [];
for (var i = 0; i < 20; i++) {
lookup_list.push(i);
}
function write_delayed_files(object, key, value) {
object[key] = value;
return;
}
var show_file = function (file_number) {
var file_index = Math.round(Math.random() * 495) + 1;
var pinnacle_file_index = 'http://localhost:3001/generate?file=' + file_index.toString();
var response_json = sync_request('GET', pinnacle_file_index);
var pinnacle_json = JSON.parse(response_json.getBody('utf8'));
var object_key = "file_" + file_number.toString();
pinnacle_data[object_key] = pinnacle_json;
console.log("We've handled file: " + file_number);
return;
};
async.each(lookup_list, show_file, function (err) {});
console.log(pinnacle_data);
/* GET contact us page. */
router.get('/', function (req, res, next) {
res.render('predictionsWtaLinks', {title: 'Async Trial'});
});
module.exports = router;
Now when this program is run it displays:
We've handled file: 0
We've handled file: 1
We've handled file: 2
We've handled file: 3
We've handled file: 4
We've handled file: 5
etc
Now as the files are of such variable size I was expecting that this would perform the requests "in parallel", but it seems to perform them sequentially, which is what I was trying to avoid through using async.each()
. Currently it takes about 1-2s to connect to the data server and so to perform this over many files is taking too long.
I realise I am using synchronous requesting, and so would like to ideally replace:
var response_json = sync_request('GET', pinnacle_file_index);
with something similar to
request(pinnacle_file_index, function (error, response, body) {
if (!error && response.statusCode == 200) {
pinnacle_data[object_key] = JSON.parse(body);
}
});
Any help would be much appreciated.
Additionally I have looked at trying:
- Converting the list of urls into a list of anonymous functions and using
async.parallel(function_list, function (err, results) { //add results to pinnacle_data[]});
. (I have encountered problems trying to define unique functions for each element in the array).
Similarly I have looked at other related topics:
I have tried to mimic suggested solutions from Asynchronous http calls with nodeJS with no progress.
- How to do parallel async multiple requests at once with Promises in Node
EDIT - WORKING SOLUTION
The following code now does the task (taking ~80ms per request, including having to make repeated requests using npm requestretry
). Similarly this scales very well, taking an average request time of ~80ms for making between 5 request in total, up to 1000.
var performance = require("performance-now");
var time_start = performance();
var async = require("async");
var request_retry = require('requestretry');
var lookup_list = [];
var total_requests = 50;
for (var i = 0; i < total_requests; i++) {
lookup_list.push(i);
}
var pinnacle_data = {};
async.map(lookup_list, function (item, callback) {
var file_index = Math.round(Math.random() * 495) + 1;
var pinnacle_file_index = 'http://localhost:3001/generate?file=' + file_index;
request_retry({
url: pinnacle_file_index,
maxAttempts: 20,
retryDelay: 20,
retryStrategy: request_retry.RetryStrategies.HTTPOrNetworkError
},
function (error, response, body) {
if (!error && response.statusCode == 200) {
body = JSON.parse(body);
var data_array = {};
data_array[file_index.toString()] = body;
callback(null, data_array);
} else {
console.log(error);
callback(error || response.statusCode);
}
});
},
function (err, results) {
var time_finish = performance();
console.log("It took " + (time_finish - time_start).toFixed(3) + "ms to complete " + total_requests + " requests.");
console.log("This gives an average rate of " + ((time_finish - time_start) / total_requests).toFixed(3) + " ms/request");
if (!err) {
for (var i = 0; i < results.length; i++) {
for (key in results[i]) {
pinnacle_data[key] = results[i][key];
}
}
var length_array = Object.keys(pinnacle_data).length.toString();
console.log("We've got all the data, totalling " + length_array + " unique entries.");
} else {
console.log("We had an error somewhere.");
}
});
Thanks for the help.