I've written a script in node.js
to scrape the links
of different titles from a webpage. When I execute my following script, I get undefined
printed in the console instead of the links
I'm after. My defined selectors are accurate.
I do not wish to put the links
in an array and return the results; rather, I wish to print them on the fly. As I'm very new to write scripts using node.js
in combination with puppeteer
, I can't figure out the mistake I'm making.
This is my script (Link to that site):
const puppeteer = require('puppeteer');
function run () {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://stackoverflow.com/questions/tagged/web-scraping");
let url = await page.evaluate(() => {
let items = document.querySelectorAll('a.question-hyperlink');
items.forEach((item) => {
//would like to keep the following line intact
console.log(item.getAttribute('href'));
});
})
browser.close();
return resolve(url);
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
The following script works just fine if I consider to declare an empty array
results
and store the scraped links within it and finally return theresults
but I do not wish to go like this. I would like to stick to the way I tried above, as in printing the result on the fly.
const puppeteer = require('puppeteer');
function run () {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://stackoverflow.com/questions/tagged/web-scraping");
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.question-hyperlink');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
})
browser.close();
return resolve(urls);
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
Once again: my question is how can I print the link like console.log(item.getAttribute('href'));
on the fly without storing it in an array?