0

I have been trying to use puppeteer to login to a website and scrape the data, I keep getting a Error: Navigation Timeout Exceeded: 30000ms exceeded on the page.waitForNavigation portion, and am stuck. Does this mean I am not passing the username and password and submit button properly? Could anyone give me insight or other things to try?

const cheerio = require('cheerio');
const puppeteer = require('puppeteer');
const { parse } = require('json2csv');
const { save } = require('save-file')

const loginUrl = 'https://albertacannabis.org/login'
const url = 'https://albertacannabis.org/shop/Cannabis%3daglc_cannabis-cannabis?f=format%3dDried%20Flower';
const strainUrl = 'https://albertacannabis.org/'
var strainLinks = [];
var currentPage = 1;
var hasResult = true
var strainData = []
const regex = /\d+.\d+ - \d+.\d+%/m
var d = new Date();
var date = d.getFullYear() + "-" + (d.getMonth() + 1) + "-" + d.getDate();
let username = '###'
let pass = '###'
const USERNAME_SELECTOR = '#UserName'
const PASSWORD_SELECTOR = '#Password'
const CTA_SELECTOR = '#SignInButton'
const navSelector = '#menubutton1-1'
const driedCanna = '#menu1-1 > li:nth-child(3) > a'

runProgram()

async function runProgram () {
    await getLinks()
    for (let index = 0; index < strainLinks.length; index++) {
        const link = strainLinks[index];
        await getStrainData(`${strainUrl}${link}`)         
    }
    exportResults(strainData)
}

async function getLinks() {
    const browser = await puppeteer.launch()
    const page = await browser.newPage()

    async function playTest(loginUrl) {
        page.setViewport({width: 1366, height: 768});
        await page.goto(loginUrl);
        await page.click(USERNAME_SELECTOR);
        await page.keyboard.type(username);
        await page.click(PASSWORD_SELECTOR);
        await page.keyboard.type(pass)
        Promise.all([
            page.click(CTA_SELECTOR),
            page.waitForNavigation({ waitUntil: 'networkidle0' }),
        ]);
        await page.click(navSelector);
        await page.click(driedCanna);

        while(hasResult === true) {
            let currentUrl = `${url}&pg=${currentPage}`
            let newLinks = []


            let html = await page.goto(currentUrl).then(function() {
                return page.content();
            })

            const $ = cheerio.load(html);

            $('.product-summary .product-info > a').each(function() {
                newLinks.push(
                    $(this).attr('href')
                );
            });

            strainLinks = [...strainLinks, ...newLinks]
            currentPage = currentPage + 1
            console.log(currentPage)
            if (newLinks.length < 12) hasResult = false
        }
    }
    (async () => {
        await playTest(loginUrl);
    })();

}
Mike
  • 129
  • 1
  • 9
  • 1
    What happens on the UI if you launch puppeteer with `puppeteer.launch({ headless: false })`? So you can see what is actually happening. It may have a validation dialog or something preventing to reach the new page after the login, or the login is not succesful. You need headful mode to debug it. – theDavidBarton Jun 17 '20 at 13:51
  • @theDavidBarton, thank you, i found out there is a captcha, is there a way around that? – Mike Jun 17 '20 at 14:11
  • 2
    BTW, you miss `await` before the `Promise.all`. – vsemozhebuty Jun 17 '20 at 14:38
  • 2
    First I recommend to read Thomas Dondorf's thoughts on puppeteer and captcha topic here: https://stackoverflow.com/a/55500565/12412595. Then, if you willing to continue in spite of all the captchas make sure to apply the change your Promise.all as vsemozhetbyt said. – theDavidBarton Jun 17 '20 at 14:50
  • @theDavidBarton, thank you again, this was what i needed. – Mike Jun 18 '20 at 20:12
  • Does this answer your question? [How to deal with the captcha when doing Web Scraping in Puppeteer?](https://stackoverflow.com/questions/55493536/how-to-deal-with-the-captcha-when-doing-web-scraping-in-puppeteer) – ggorlen Aug 04 '21 at 21:13

0 Answers0