I created a simple scraper using cheerio and request client but it doesn't work the way I want.
First I see all the "null returned, do nothing" messages on the terminal and then see the names, so I think it first checks all the urls that returns a null, then non-nulls.
I want it to run in the right order, from 1 to 100.
app.get('/back', function (req, res) {
for (var y = 1; y < 100; y++) {
(function () {
var url = "example.com/person/" + y +;
var options2 = {
url: url,
headers: {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
}
};
request(options2, function (err, resp, body) {
if (err) {
console.log(err);
} else {
if ($ = cheerio.load(body)) {
var links = $('#container');
var name = links.find('span[itemprop="name"]').html(); // name
if (name == null) {
console.log("null returned, do nothing");
} else {
name = entities.decodeHTML(name);
console.log(name);
}
}
else {
console.log("can't open");
}
}
});
}());
}
});