I can see two other ways to achieve what you want: using page.waitForResponse
and page.waitForFunction
. Let's see both.
With page.waitForResponse you can do something as simple as this:
page.goto('https://www.google.com/').catch(() => {});
await page.waitForResponse('https://www.google.com/'); // don't forget to put the final slash
Pretty simple, ahn? If you don't like it, try page.waitForFunction
and wait until de document
is created:
page.goto('https://www.google.com/').catch(() => {});
await page.waitForFunction(() => document); // you can use `window` too. It is almost the same
This code will wait until the document
exists. This happens when the first bit of html arrives and the browsers starts to create de DOM tree representation of the document.
But be aware that despite this two solutions are simple, neither of them wait until the entire html page/document is downloaded. If you want that, you should modify the waitForEvent
function of my other Answer, to accept the specific url that you want to be complete downloaded. Example:
/**
* The methods `page.waitForNavigation` and `frame.waitForNavigation` wait for the page
* event `domcontentloaded` at minimum. This function returns a promise that resolves as
* soon as the specified `requestUrl` resource has finished downloading, or `timeout` elapses.
*
* @param {puppeteer.Page} page
* @param {string} requestUrl pass the exact url of the resource you want to wait for. Paths must be ended with slash "/". Don't forget that.
* @param {number} [timeout] optional time to wait. If not specified, waits forever.
*/
function waitForRequestToFinish(page, requestUrl, timeout) {
page.on('requestfinished', onRequestFinished);
let fulfill, timeoutId = (typeof timeout === 'number' && timeout >= 0) ? setTimeout(done, timeout) : -1;
return new Promise(resolve => fulfill = resolve);
function done() {
page.removeListener('requestfinished', onRequestFinished);
clearTimeout(timeoutId);
fulfill();
}
function onRequestFinished(req) {
if (req.url() === requestUrl) done();
}
}
How to use it:
page.goto('https://www.amazon.com/').catch(() => {});
await waitForRequestToFinish(page, 'https://www.amazon.com/', 3000);
Complete example showing neat console.logs:
const puppeteer = require('puppeteer');
/**
* The methods `page.waitForNavigation` and `frame.waitForNavigation` wait for the page
* event `domcontentloaded` at minimum. This function returns a promise that resolves as
* soon as the specified `requestUrl` resource has finished downloading, or `timeout` elapses.
*
* @param {puppeteer.Page} page
* @param {string} requestUrl pass the exact url of the resource you want to wait for. Paths must be ended with slash "/". Don't forget that.
* @param {number} [timeout] optional time to wait. If not specified, waits forever.
*/
function waitForRequestToFinish(page, requestUrl, timeout) {
page.on('requestfinished', onRequestFinished);
let fulfill, timeoutId = (typeof timeout === 'number' && timeout >= 0) ? setTimeout(done, timeout) : -1;
return new Promise(resolve => fulfill = resolve);
function done() {
page.removeListener('requestfinished', onRequestFinished);
clearTimeout(timeoutId);
fulfill();
}
function onRequestFinished(req) {
if (req.url() === requestUrl) done();
}
}
(async () => {
const netMap = new Map();
const browser = await puppeteer.launch();
const page = await browser.newPage();
const cdp = await page.target().createCDPSession();
await cdp.send('Network.enable');
await cdp.send('Page.enable');
const t0 = Date.now();
cdp.on('Network.requestWillBeSent', ({ requestId, request: { url: requestUrl } }) => {
netMap.set(requestId, requestUrl);
console.log(`> ${Date.now() - t0}ms\t requestWillBeSent:\t${requestUrl}`);
});
cdp.on('Network.responseReceived', ({ requestId }) => console.log(`< ${Date.now() - t0}ms\t responseReceived:\t${netMap.get(requestId)}`));
cdp.on('Network.dataReceived', ({ requestId, dataLength }) => console.log(`< ${Date.now() - t0}ms\t dataReceived:\t\t${netMap.get(requestId)} ${dataLength} bytes`));
cdp.on('Network.loadingFinished', ({ requestId }) => console.log(`. ${Date.now() - t0}ms\t loadingFinished:\t${netMap.get(requestId)}`));
cdp.on('Network.loadingFailed', ({ requestId }) => console.log(`E ${Date.now() - t0}ms\t loadingFailed:\t${netMap.get(requestId)}`));
// The magic happens here
page.goto('https://www.amazon.com').catch(() => { });
await waitForRequestToFinish(page, 'https://www.amazon.com/', 3000);
console.log(`\nThe page was released after ${Date.now() - t0}ms\n`);
await page.close();
await browser.close();
})();
/* OUTPUT EXAMPLE
[... lots of logs removed ...]
> 574ms requestWillBeSent: https://images-na.ssl-images-amazon.com/images/I/71vvXGmdKWL._AC_SY200_.jpg
< 574ms dataReceived: https://www.amazon.com/ 65536 bytes
< 624ms responseReceived: https://images-na.ssl-images-amazon.com/images/G/01/AmazonExports/Fuji/2019/February/Dashboard/computer120x._CB468850970_SY85_.jpg
> 628ms requestWillBeSent: https://images-na.ssl-images-amazon.com/images/I/81Hhc9zh37L._AC_SY200_.jpg
> 629ms requestWillBeSent: https://images-na.ssl-images-amazon.com/images/G/01/personalization/ybh/loading-4x-gray._CB317976265_.gif
< 631ms dataReceived: https://www.amazon.com/ 58150 bytes
. 631ms loadingFinished: https://www.amazon.com/
*/
This code show lots of requests and responses, but the code stops as soon has "https://www.amazon.com/" has been completely downloaded.