I've a page like this one:
<html>
<body>
<table>
<thead>
<tr>
<th>Link</th><th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="https://www.google.com">Google</a></td><td>Search engine</td>
</tr>
<tr>
<td><a href="https://github.com">Github</a></td><td>Code management</td>
</tr>
</tbody>
</table>
</body>
</html>
I would like to parse every row of the table and follow each link (to get the HTML's page title) to create an array of sites like this one:
[ { name: 'Google',
title: 'Google',
descr: 'Search engine' },
{ name: 'Github',
title: 'GitHub ยท Where software is built',
descr: 'Code management' } ]
I thought this is a good example to start learning using Promises and the Q library, but I failed to grasp how promises works. Below the code that I wrote:
var request = require('request');
var cheerio = require('cheerio');
var Q = require('q');
var sites = [];
var loadPage = function(url){
var deferred = Q.defer();
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
deferred.resolve($);
} else {
deferred.reject(new Error(error));
}
});
return deferred.promise;
}
var parseRows = function($){
var promises = [];
$("tbody tr").each(function(){
var $cells = $('td', this);
var $firstC = $cells.eq(0);
var name = $firstC.text();
var link = $firstC.find('a').attr('href');
var descr = $cells.eq(1).text();
promises.push(Q.fcall(function () {
var site = {name: name, descr: descr};
loadPage(link).then(function($){
var title = $("title").text();
console.log(title);
// here I don't know how to set the title
// as obj's attribute
});
return site;
}));
});
return Q.all(promises);
}
var displayTitles = function(res){
for (var i = 0, len = res.length; i < len; i++) {
var obj = res[i];
}
return Q.fcall(function () {
return sites;
});
}
loadPage('http://127.0.0.1/sample.html')
.then(parseRows)
.then(displayTitles)
.done();
I'm satisfied with loadPage function but I'm stuck with parseRows, beacuse I'm not able to set the title as property of "site" object. Moreover displayTitles was initially developed to handle the logic required to get the page's title, but now is almost useless.
How can I modify the code above in order to get the desired array as output in a more clean and readable way?