1

I am trying to scrape a site to get live football (soccer) information, using node.js and puppeteer but my browser is closing immidiatly after the console.log("trying to select team element").

const puppeteer = require("puppeteer");

async function openPage() {
  const browser = await puppeteer.launch( {headless: true} );
  const page = await browser.newPage();
  
  await page.setViewport({ width: 1000, height: 926 });
  await page.goto("https://www.livescore.com/en/");

  // accepting cookies 
  const button = await page.waitForSelector('#onetrust-accept-btn-handler');
  if (button) {
    await button.click();
    console.log("clicked cookie button");
  };
  
  return page;
}

async function scrapeData(page) {
  let content = [];

  // Getting match elements
  let elements = await page.waitForSelector(".Ip")
  console.log("trying to select team element")


  for (let i=0; i < elements.length; i++) {
    let homeTeamElement = await elements[i].$(".Ip")
    if (homeTeamElement) {
      const homeTeamText = await homeTeamElement.evaluate(node => node.textContent);
      content.push(homeTeamText);
    }
  };

  return content;
};

(async () => {
  const page = await openPage();
  const 
```
dataScraped = await scrapeData(page);
  console.log(dataScraped)

  await page.browser().close();
})();

Any idea why this may be and further critique on my code would be greatly appreciated!

Mannen
  • 35
  • 5
  • Recommend adding more `console.logs` for debugging. E.g. Is `elements.length` > 0? What does your `console.log(dataScraped)` print, `[ ]`? – user1160006 Aug 15 '23 at 21:43
  • `await page.waitForSelector(".Ip")` only returns one element, not an array, so there's no way to loop over it. You should see an error message--what does Node print when your browser closes? I think you have an extra ``` in your code. – ggorlen Aug 15 '23 at 23:41

1 Answers1

0

await page.waitForSelector(".Ip") only returns one element, not an array, so there's no way to loop over it. There should be a clear error message to this effect. Instead, use page.$$eval (or the new locators API if you're feeling cutting edge) to extract the data.

const puppeteer = require("puppeteer"); // ^21.0.2

const url = "<Your URL>";

let browser;
(async () => {
  browser = await puppeteer.launch();
  const [page] = await browser.pages();
  await page.setViewport({width: 1000, height: 926});
  await page.goto(url, {waitUntil: "domcontentloaded"});

  // not really necessary
  const button = await page.waitForSelector("#onetrust-accept-btn-handler");
  await button.click();

  await page.waitForSelector(".Ip");
  const content = await page.$$eval(".Ip", els =>
    els.map(e => {
      const text = id =>
        e.querySelector(`[id*=${id}]`).textContent.trim();
      return {
        time: text("status-or-time"),
        home: text("home-team-name"),
        away: text("away-team-name"),
        homeTeamScore: +text("home-team-score"),
        awaitTeamScore: +text("away-team-score"),
      };
    })
  );
  console.log(content);
})()
  .catch(err => console.error(err))
  .finally(() => browser?.close());

Notes:

  • Closing the cookie banner isn't necessary for the scrape, but if you do use it, there's no point in checking the return value of waitForSelector. It's guaranteed to be the element, otherwise it'll throw if it can't find it in time.
  • await elements[i].$(".Ip") isn't going to help you access anything, since there's no .Ip within the .Ip element you're already holding a handle to (as far as I can tell).
  • Avoid ElementHandles--they're slow, unreliable and verbose.
ggorlen
  • 44,755
  • 7
  • 76
  • 106