0

I have the following code which scrapes a webpage, removes the javascript, and saves it to disk:

const puppeteer = require('puppeteer');
const fs = require('fs');

async function run() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  await page.goto('http://example.com', {
    waitUntil: 'networkidle2'
  });

  await page.waitFor(1 * 2000);

  await page.evaluate(() => {
    for (const script of document.body.querySelectorAll('script'))
      script.remove();
  });

  const result = await page.content();

  fs.writeFileSync('result.html', result);
  await browser.close();
}

run();

I would also like to convert all images to base64. How can I do this with JS and puppeteer?

Nathan
  • 7,627
  • 11
  • 46
  • 80
  • Does MHTML suffice for your task? If so, see https://stackoverflow.com/questions/54814323/puppeteer-how-to-download-entire-web-page-for-offline-use/54817983#54817983 – vsemozhebuty Jan 18 '21 at 17:53
  • Or you can try some approaches from https://stackoverflow.com/questions/6150289/how-can-i-convert-an-image-into-base64-string-using-javascript – vsemozhebuty Jan 18 '21 at 18:07

0 Answers0