I'm new to JavaScript and I'm having trouble with promises. I'm using cloudscraper to retrieve a webpage's html to scrape data from. I have a simple function - getData() - which calls cloudscraper.get() and passes the html to the extract() function, which is responsible for scraping data. This is the working code:
const getData = function(pageUrl) {
var data;
return cloudscraper.get(pageUrl)
.then(function(html) {
data = extract(html);
return data;
})
.catch(function(err) {
// handle error
})
}
The "data" object returned contains an array of URLs I want to connect to, in order to retrieve other information. That information has to be stored in the same data object. So I want to call cloudscraper.get() method again for each URL contained in the array. I've tried the code below:
const getData = function(pageUrl) {
var data;
// first cloudscraper call:
// retrieve main html
return cloudscraper.get(pageUrl)
.then(function(html) {
// scrape data from it
data = extract(html);
for (let i = 0; i < data.array.length; ++i) {
// for each URL scraped, call cloudscraper
// to retrieve other data
return cloudscraper.get(data.array[i])
.then(function(newHtml) {
// get other data with cheerio
// and stores it in the same array
data.array[i] = getNewData(newHtml);
})
.catch(function(err) {
// handle error
})
}
return data;
})
.catch(function(err) {
// handle error
})
}
but it doesn't work, because the data object is returned before the promises in the loop are resolved. I know that probably there is a simple solution, but I couldn't figure it out, so could you please help me? Thanks in advance.