I am trying to scrape multiple URL one by one, then repeat the scrape after one minute.
But I keep getting two errors and was hoping for some help.
I got an error saying:
functions declared within loops referencing an outer scoped variable may lead to confusing semantics
And I get this error when I run the function / code:
TimeoutError: Navigation timeout of 30000 ms exceeded.
My code:
const puppeteer = require("puppeteer");
const urls = [
'https://www.youtube.com/watch?v=cw9FIeHbdB8',
'https://www.youtube.com/watch?v=imy1px59abE',
'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
];
const scrape = async() => {
let browser, page;
try {
browser = await puppeteer.launch({ headless: true });
page = await browser.newPage();
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
await page.goto(`${url}`);
await page.waitForNavigation({ waitUntil: 'networkidle2' });
await page.waitForSelector('.view-count', { visible: true, timeout: 60000 });
const data = await page.evaluate(() => { // functions declared within loops referencing an outer scoped on this line.
return [
JSON.stringify(document.querySelector('#text > a').innerText),
JSON.stringify(document.querySelector('#container > h1').innerText),
JSON.stringify(document.querySelector('.view-count').innerText),
JSON.stringify(document.querySelector('#owner-sub-count').innerText)
];
});
const [channel, title, views, subs] = [JSON.parse(data[0]), JSON.parse(data[1]), JSON.parse(data[2]), JSON.parse(data[3])];
console.log({ channel, title, views, subs });
}
} catch(err) {
console.log(err);
} finally {
if (browser) {
await browser.close();
}
await setTimeout(scrape, 60000); // repeat after one minute after all urls have been scrape.
}
};
scrape();
I would really appreciate any help I could get.