Puppeteer is made for running e2e
tests. It can be used for webscraping and other things but that was its intention. If you think about how you use chrome, it is not possible for you to download 3 files completely in parallel. You need to open 1 tab,
press "download" and then go to the next tab for pressing "download" and so on.
The download itself continues even when switching to a new tab.
So in the script you have to do the same. Start the downloads one after the other but after the start the download itself is done in the background.
For example:
const puppeteer = require('puppeteer');
const path = require('path');
async function startDownload(i, browser) {
const page = await browser.newPage();
await page.goto('https://vinay-jtc.github.io/test-pdf', {
waitUntil: 'networkidle2',
});
const billsData = await page.$$('.pdf');
const downloadPath = path.resolve(`/home/vanni/download/${i}`);
await page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: downloadPath,
});
await billsData[i].click();
}
// ideally you would watch the filesystem for being able to return at the moment
// the file was donwloaded. You can achieve that using the `chokidar` module for example.
async function waitForDownload(ms) {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function simplefileDownload() {
const browser = await puppeteer.launch({ headless: false });
const promises = [];
for (let i = 0; i < 4; i++) {
// start the download, await it..
await startDownload(i, browser)
// After it has started, you can proceed!.
promises.push(waitForDownload(5 * 1000));
}
await Promise.all(promises).then(() => {
browser.close();
});
}
simplefileDownload();
If you wanted to do it completely in parallel you need to start multiple puppeteer
instances like so:
const puppeteer = require('puppeteer');
const path = require('path');
async function startDownload(i) {
// run new puppeteer instance on each download.
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://vinay-jtc.github.io/test-pdf', {
waitUntil: 'networkidle2',
});
const billsData = await page.$$('.pdf');
const downloadPath = path.resolve(`/home/vanni/download/${i}`);
await page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: downloadPath,
});
await billsData[i].click();
await waitForDownload(5 * 1000)
await browser.close();
}
// ideally you would watch the filesystem for being able to return at the moment
// the file was donwloaded. You can achieve that using the `chokidar` module for example.
async function waitForDownload(ms) {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function simplefileDownload() {
const promises = [];
for (let i = 0; i < 4; i++) {
// do it like you did before.
promises.push(startDownload(i));
}
await Promise.all(promises).then(() => {
console.log('done.')
});
}
simplefileDownload();