I'm trying to make a simple price checking scraper. I'm struggling with the part of the program that gets updates for the items that are in the database which are over an hour old.
Everything works fine, apart from when I put the URL and the updated prices back into the database as new entries. I get the last URL from the for loop against all the entries, so not much use for future checks!
From what I've read I'm pretty sure its an async issue, but I just can't fathom what I need to do resolve it. Should I be looking at callbacks, promises or something else completely?
connection.query("SELECT * FROM productprice WHERE DATE_ADD(date, INTERVAL 1 HOUR) <= CURRENT_DATE()", function(err, rows, fields){
if (err) throw err;
for (var i in rows) {
request(rows[i].urlsource, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
var priceup, urls;
$('#priceblock_ourprice').filter(function(){
var data = $(this);
priceup = data.text();
urls = rows[i].urlsource;
console.log(rows[i].urlsource, priceup);
})
}
var sql = "INSERT INTO productprice (urlsource, price, date) VALUES (?, ?, ?)";
connection.query(sql, [urls, priceup, now], function (err, result) {
if (err) throw err;
console.log("something got inserted");
});
}) ;//REQUEST
}//FOR LOOP
})//CONNECTION.QUERY
I think it's an async issue and the for loop continuing to loop through the entries while cheerio is still looking at the DOM and getting the price, so that by the time it gets put back into the database it just grabs the last URL value.