0

trying capturing all the <a> in a page

the console.log returns undefined, but i can't understand why is this const anchors = Array.from(document.querySelectorAll(sel)); correct?

const puppeteer = require('puppeteer');

(async () => {
  const browser = await puppeteer.launch({
                                            headless: false,
                                            userDataDir: "C:\\Users\\johndoe\\AppData\\Local\\Google\\Chrome\\User Data\\Default"
                                        });
  const page = await browser.newPage();
  await page.setViewport({
    width: 1920,
    height: 1080,
    deviceScaleFactor: 1,
  });
  await page.goto('https://www.facebook.com/groups/632312010245152/members');
  
  //https://github.com/puppeteer/puppeteer/blob/main/examples/search.js
  let membri = await page.evaluate((sel) => { 
    const anchors = Array.from(document.querySelectorAll(sel));
    return anchors;
  }, 'a');
  console.log(membri);
})();
isherwood
  • 58,414
  • 16
  • 114
  • 157
pinale
  • 2,060
  • 6
  • 38
  • 72
  • 2
    See https://stackoverflow.com/questions/55017057/puppeteer-returning-empty-object/55032557#55032557 – vsemozhebuty Jan 12 '21 at 23:53
  • 1
    thanks, i got and returned an attribute(href) of the elements, in order to have a serializable Array `const serializableLinks = anchors.map(x => x.getAttribute("href")); //<-- convert to string` – pinale Jan 13 '21 at 20:19
  • 1
    Bear in mind that `x.getAttribute("href")` may return relative URLs. If you need absolute URLs, use `x.href` instead. – vsemozhebuty Jan 13 '21 at 20:42

2 Answers2

1
const findLinks = await page.evaluate(() =>
  Array.from(document.querySelectorAll("a")).map((info) => ({
    information: info.href.split()
  }))
);
links = [];
findLinks.forEach((link) => {
  if (link.information.length) {
    links.push(link.information);
  }
});
await console.log(links);
await page.close();
return links;

Not sure if this is the most optimized solution, but it works. If you could message me a cleaned version of this code I would highly appreciate that :)

Kyle
  • 77
  • 5
-1
const arrayList = await page.evaluate(() => {
const nodeListLinks = document.querySelectorAll('a'),
      array = [...nodeListLinks],
      list = array.map(({href}) => ({href}))
return arrayList
})

console.log(arrayList)
Papito
  • 11
  • 1
  • 2
    Please don't post only code as answer, but also provide an explanation what your code does and how it solves the problem of the question. Answers with an explanation are usually more helpful and of better quality, and are more likely to attract upvotes. – Mark Rotteveel Nov 05 '21 at 16:11
  • While this code may answer the question, providing additional context regarding how and/or why it solves the problem would improve the answer's long-term value. You can find more information on how to write good answers in the help center: https://stackoverflow.com/help/how-to-answer . Good luck – nima Nov 07 '21 at 13:53