I'm new at Node.js and promises (here, I'm using Q.js). I'm trying to make a scraper out of a site which has the following structure:
- main_page: has a list of categories, each category has a link that points to a list of stores page.
- list of stores page: has a list of stores, each store has a link that points to a store details page.
- store detail page: has the data that I'm looking for.
I made a first working approach without promises, but as a result the code was very ugly. So I think this is a good case to use promises.
I cannot get this approach to work. When the second loop is done, the app doesn't continue (it never executes the end() method). Besides, I don't know how to attach the third loop.
How could I do it?
function get(url) {
var deferred = Q.defer();
requestify.get(url).then(function(response) {
deferred.resolve(cheerio.load(response.getBody()));
});
return deferred.promise;
}
function process_main_page($) {
var promises = [];
$('.categories a').each(function(i) {
var deferred = Q.defer();
var storesList = $('.store');
get($(this).attr('href')).then(function($) {
deferred.resolve(process_stores_list(storesList));
});
promises.push(deferred);
});
return Q.all(promises);
}
function process_stores_list(storesList) {
var promises = [];
storesList.each(function() {
// Here I need to make another ajax call for each store detail page, which has the data that I need.
promises.push(deferred);
});
return Q.all(promises);
}
function end(res) {
var deferred = Q.defer();
fs.writeFile('output.json', JSON.stringify(myGatheredData, null, 4), function(err) {
deferred.resolve(function() {
res.send('File successfully written! - Check your project directory for the output.json file');
});
});
return deferred.promise;
}
app.get('/', function(req, res) {
get(url).then(process_main_page).then(end);
});