I'm using cheerio
and axios
to get all links from a web page and making GET requests with axios
with each link to check that it is a not broken link (i.e. the status code is 200).
Then, I want to put each broken link URL into an array.
I can't figure out how this works in term of async
and await
.
const cheerio = require("cheerio");
// returns an array of broken link urls
async function getAllBrokenLinks(url) {
const res = await axios.get(url) // get webpage
// if there was an error getting the data, return null
if (!res || res.status != 200) {
return null;
}
let body = res.data.body.value; // get body html from res
let results = [];
$ = cheerio.load(body); // parse body html into cheerio
let links = $('a'); // get all the links in the body
$(links).each( (i, link) => {
let linkText = $(link).text();
let linkHref = $(link).attr('href');
// check if the link is dead
axios.get(linkHref)
.then( (response) => {
if (response.status != 200) {
results.push(linkHref);
}
})
.catch( (error) => {
results.push(linkHref);
});
})
return results; // this returns an empty array instead of a populated array
}
I know that axios.get()
is an async function but I'm not sure how I can wait for each GET request in the each
loop to finish running before I return the populated array results
.
EDIT: This is different from Using async/await with a forEach loop because $(links)
does not have a .forEach
function.
EDIT: I tried this:
const reqs = $(links).map( async (i, link) => {
let linkText = $(link).text();
let linkHref = $(link).attr('href');
// check if the link is dead
axios.get(linkHref)
.then( (response) => {
if (response.status != 200) {
results.push(linkHref);
}
})
.catch( (error) => {
results.push(linkHref);
});
return axios.get(linkHref)
})
await Promise.all(reqs); // ERROR
return results;
but it is now giving me this error:
(node:77480) UnhandledPromiseRejectionWarning: TypeError: object is not iterable (cannot read property Symbol(Symbol.iterator))
EDIT: I figured it out. Here is my code:
// put each link text and href in the linkUrls array
$(links).each( (i, link) => {
linkUrls[String($(link).text())] = $(link).attr('href');
})
let results = {};
results['title'] = pageTitle;
let num = 0;
for (const linkText in linkUrls) {
const response = await axios.get(linkUrls[linkText])
.then( (r) => {
if (r.status != 200) {
num += 1;
results[ 'broken link ' + String(num)] = {
'text': linkText,
'url': linkUrls[linkText],
'status code': r.status
}
}
})
.catch( (err) => {
num += 1;
results[ 'broken link ' + String(num)] = {
'text': linkText,
'url': linkUrls[linkText],
'status code': err.response.status
}
})
}
console.log('results: ', results);
return results;