I am a pythonist and very new to puppeteer and JavaScript. I am trying to webscrape a page and get some specific links out of that page and save those links in an array. I want this array to be outside the function and global. Here is my python code for this:
base_url ="https://www.blablabla.com"
links = []
for a in soup.find_all('a',attrs={'class':"o-job-card"}, href=True):
links.append( base_url + a['href'] )
But my boss wants me to do the same thing with puppeteer. Anyhow I have come up with the solution below. But there is something wrong: I can console.log(my_links[i]) and see the links but I can not links.push(my_links[i]); and I do not understand why... Can somebody explain this to me?
Here is the whole code :
const puppeteer = require('puppeteer');
async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://www.blablabla.com');
return await page.evaluate(() =>
Array.from(document.querySelectorAll('a.o-job-card[href]'), (a) => a.getAttribute('href'))
);
} catch (err) {
console.error(err);
}
}
let links = [];
var txt = 'https://www.blablabla.com';
let userToken = main();
userToken.then(function (my_links) {
for (i = 0; i < my_links.length; i++) {
my_links[i] = txt + my_links[i];
links.push(my_links[i]);
}
});
console.log(links);