1

I am trying to scrape the inner text of a div by class that is within multiple other divs. The websites code looks like this:

<div class="time time-tile"><div class="js-summary"><div>
            <div class="time-summary">
                <div class="time-summary-left time-summary-grid">
                    <div class="time-summary-left-top">
                        <div class="booking-start-time-label">4:22pm</div>
                        <div class="facility-name-label">
                            <span class="js-booking-side-name"> </span>
                        </div>
                    </div>
                    <div>
                        <div class="booking-slot-details">
                            <span class="booking-slot-holes js-booking-slot-holes">
                                <i class="glyphicon glyphicon-flag"></i>
                                <span>18</span>
                            </span>
                            <span class="booking-slot-players js-booking-slot-players">
                                <i class="glyphicon glyphicon-user"></i>
                                <span>4</span>
                            </span>
                        </div>
                    </div>
                </div>
                <div class="time-summary-right time-summary-grid">
                    <div>
                         <div class="booking-slot-pricing-information js-booking-slot-pricing">
                            <span class="js-booking-green-fee" title="Green Fee">
                                <i class="icon-golf-ball-tee"></i>
                                $34.50
                            </span>
                        </div>
                    </div>
                    <div>
                         <div class="booking-slot-pricing-information js-booking-slot-pricing">
                            <span class="js-booking-cart-fee" title="Cart Fee" style="display: none;">
                                <i class="icon-golf-cart"></i>
                                $0.00
                            </span>
                        </div>
                    </div>
                </div>
            </div>
            
        </div></div></div>

I currently have:

const puppeteer = require('puppeteer');

async function scrape() {
    const browser = await puppeteer.launch()
    const page = await browser.newPage()
    await page.goto("https://foreupsoftware.com/index.php/booking/19671/2149#/teetimes")

    await page.waitForSelector('.booking-start-time-label')
    let element = await page.$('.booking-start-time-label')
    let value = await page.evaluate(el => el.textContent, element)
    console.log(value)

    await browser.close()
}

scrape()

I am trying to scrape and log the inner text of the div with the class name "booking-start-time-label". The error I continue to get is "TimeoutError: Waiting for selector .booking-start-time-label failed: Waiting failed: 30000ms exceeded
at Timeout.".The site is https://foreupsoftware.com/index.php/booking/19671/2149#/teetimes

ggorlen
  • 44,755
  • 7
  • 76
  • 106
Glix
  • 11
  • 2
  • Looks like you're being detected as a bot. If you use `puppeteer.launch({headless: false})` or add a user agent it should work. See [Why does headless need to be false for Puppeteer to work?](https://stackoverflow.com/a/70936552/6243352). BTW, [`waitForSelector` returns the element it found](https://serpapi.com/blog/puppeteer-antipatterns/#not-using-the-return-value-of-waitforselector-and-waitforxpath). – ggorlen Feb 06 '23 at 22:23
  • Got it to work by using headless: false, now just need to create a function and use $$ to scrape all. Thank you!! – Glix Feb 06 '23 at 22:52
  • No problem, thanks for the full code and site. That last bit should be something like `page.$$eval(".booking-start-time-label", els => els.map(e => e.textContent))`. – ggorlen Feb 06 '23 at 22:54

1 Answers1

0

Try adding a user agent as suggested in Why does headless need to be false for Puppeteer to work?:

const puppeteer = require("puppeteer"); // ^19.6.3

const url = "<Your URL>";

let browser;
(async () => {
  browser = await puppeteer.launch();
  const [page] = await browser.pages();
  const ua =
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36";
  await page.setUserAgent(ua);
  await page.goto(url, {waitUntil: "domcontentloaded"});
  const sel = ".booking-start-time-label";
  await page.waitForSelector(sel);
  const times = await page.$$eval(sel, els => els.map(e => e.textContent));
  console.log(times);
})()
  .catch(err => console.error(err))
  .finally(() => browser?.close());
ggorlen
  • 44,755
  • 7
  • 76
  • 106