I have been trying to scrape some data from Wikipedia using nodejs. i used request-promise and cheerio for the same. The first then block in the requestPromise works as expected and returns once the whole code logic above has been completed. But for the second then block i used two methods - M-1 and M-2, but both the code despite of being blocking returns the data and thus the third then block executes and console logs undefined, i dont undertsand why it returns before completing the promise
let cheerio = require('cheerio')
let requestPromise = require('request-promise')
//Website to be scraped
const url = "https://en.wikipedia.org/wiki/List_of_Presidents_of_the_United_States"
requestPromise(url)
.then( html => {
let wikiLinks = []
let obj = cheerio('big > a', html)
for (let key in obj){
if(obj[key].attribs){
wikiLinks.push(obj[key].attribs.href)
}
}
return wikiLinks
})
.then( links => {
//M-1
let data = []
let info
links.forEach(async link => {
info = await getAllBirthdayData(link)
data.push(info)
})
return data ==> returns []
//M-2
return Promise.all([
links.forEach(link => {
return getAllBirthdayData(link)
})
])
//M-3
return await Promise.all([
links.map( async link => {
return await getAllBirthdayData(link) ==> returns pending
promises only
})
])
})
.then(finalData => {
console.log(finalData)
})
.catch(err => {
console.log("error 1")
})
let getAllBirthdayData = (url) => {
return requestPromise("https://en.wikipedia.org/" + url)
.then( html => {
return {
name : cheerio('.firstHeading', html).text(),
birthday : cheerio('.bday', html).text()
}
})
.catch( err => {
console.log("error 2")
})
}
I expect the output to be an array of objects with key-value pairs as such
[{name : something, birthday : 2018-01-01},
{name : something2, birthday : 2018-01-02}]