I am making a node.js application and part of my code requests for data from 193 different urls to download the json data from each url. Here is one of those urls: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Apeldoorn For the some the downloaded json data is fine and is complete. However towards the end, corruptions happen for some of the files. Part of the data becomes nullified and then there are some that have database errors. I think it has to do with requesting data from so many urls in a short amount of time (which is why I tried the "setTimeout" function (but that doesn't really work)).
function writeToFile(url) {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream);
}
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
writeToFile(url);
}
})
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
jsonUrls = data;
callback(null, jsonUrls);
});
}
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
body = JSON.parse(body);
// extracting each json URL from all the municipalities in Gelderland
var jsonUrls = [];
var numberMun = body.length;
for (var i = 0; i < numberMun; i++) {
var url = body[i].uri.naam;
var urlSplit = url.split("gemeentenaam");
var jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
jsonUrl = jsonUrl.replace("http://", "https://");
jsonUrls.push(jsonUrl);
}
callback(null, jsonUrls);
});
}
The last json data downloaded into the file as an html page with a database error from the url: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Zutphen which actually just took just under 6 seconds to load up looking at the network tab on Chrome the 1812 has null for its properties when it should have a bunch of coordinates https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Winssen (took just over a second to load on chrome
I am a noob at node, but please help me fix this issue maybe with some sort of checking if the data is corrupted or something. Thanks for the help in advanced:)
EDIT: I am trying to do up to 200 urls at a time in the for loop.