1

I want to download multiple files by opening multiple tabs by loop and downloading the file from each tab separately but it is only downloading a file of the last tab in a different download path.

Here is a sample code that is failing to download multiple files in a separate download path by a puppeteer.

const puppeteer = require('puppeteer');
const path = require('path');

async function download(i, browser) {
  const page = await browser.newPage();
  await page.goto('https://vinay-jtc.github.io/test-pdf', {
    waitUntil: 'networkidle2',
  });
  const billsData = await page.$$('.pdf');
  const downloadPath = path.resolve(`/home/vanni/download/${i}`);
  await page._client.send('Page.setDownloadBehavior', {
    behavior: 'allow',
    downloadPath: downloadPath,
  });
  await billsData[i].click();
  await new Promise((resolve) => setTimeout(resolve, 5000));
}

async function simplefileDownload() {
  const browser = await puppeteer.launch({ headless: false });
  const promises = [];
  for (let i = 0; i < 4; i++) {
    promises.push(download(i, browser));
  }
  await Promise.all(promises).then(() => {
    browser.close();
  });
}

simplefileDownload();

can someone help with this issue?

1 Answers1

0

Puppeteer is made for running e2e tests. It can be used for webscraping and other things but that was its intention. If you think about how you use chrome, it is not possible for you to download 3 files completely in parallel. You need to open 1 tab, press "download" and then go to the next tab for pressing "download" and so on.

The download itself continues even when switching to a new tab.

So in the script you have to do the same. Start the downloads one after the other but after the start the download itself is done in the background.

For example:

const puppeteer = require('puppeteer');
const path = require('path');

async function startDownload(i, browser) {
  const page = await browser.newPage();
  await page.goto('https://vinay-jtc.github.io/test-pdf', {
    waitUntil: 'networkidle2',
  });
  const billsData = await page.$$('.pdf');
  const downloadPath = path.resolve(`/home/vanni/download/${i}`);
  await page._client.send('Page.setDownloadBehavior', {
    behavior: 'allow',
    downloadPath: downloadPath,
  });
  await billsData[i].click();
}

// ideally you would watch the filesystem for being able to return at the moment
// the file was donwloaded. You can achieve that using the `chokidar` module for example.
async function waitForDownload(ms) {
    await new Promise((resolve) => setTimeout(resolve, ms));
}

async function simplefileDownload() {
  const browser = await puppeteer.launch({ headless: false });
  const promises = [];
  for (let i = 0; i < 4; i++) {

    // start the download, await it..
    await startDownload(i, browser)

    // After it has started, you can proceed!.
    promises.push(waitForDownload(5 * 1000));
  }
  await Promise.all(promises).then(() => {
    browser.close();
  });
}

simplefileDownload();


If you wanted to do it completely in parallel you need to start multiple puppeteer instances like so:

const puppeteer = require('puppeteer');
const path = require('path');

async function startDownload(i) {
  // run new puppeteer instance on each download.
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();
  await page.goto('https://vinay-jtc.github.io/test-pdf', {
    waitUntil: 'networkidle2',
  });
  const billsData = await page.$$('.pdf');
  const downloadPath = path.resolve(`/home/vanni/download/${i}`);
  await page._client.send('Page.setDownloadBehavior', {
    behavior: 'allow',
    downloadPath: downloadPath,
  });
  await billsData[i].click();
  await waitForDownload(5 * 1000)
  await browser.close();
}

// ideally you would watch the filesystem for being able to return at the moment
// the file was donwloaded. You can achieve that using the `chokidar` module for example.
async function waitForDownload(ms) {
  await new Promise((resolve) => setTimeout(resolve, ms));
}

async function simplefileDownload() {
  const promises = [];
  for (let i = 0; i < 4; i++) {

    // do it like you did before.
    promises.push(startDownload(i));
  }
  await Promise.all(promises).then(() => {
      console.log('done.')
  });
}

simplefileDownload();
Silvan Bregy
  • 2,544
  • 1
  • 8
  • 21