I have created a REST API with express and I first need to scrap data with puppeteer during the build process. However vercel fails to scrap data with puppeteer during deployment.
I get this issue : Error: Failed to launch the browser process!
First, I have checked this page : Error: Failed to launch the browser process puppeteer and How to solve Puppeteer: failed to launch the browser process
All answers to my issue say to install lib using sudo apt-get install
, but I also read this question :
Installing a package using apt get to serverless node.js app
That say we can only download packages with npm during vercel deployment.
So it seems impossible to do what I want (I don't want to have my scrapped data in my github repo, I want to scrap them during build process.)
I also tried this approach : https://github.com/michaelkitas/Puppeteer-Vercel (with chrome-aws-lambda
and puppeteer-core
but it still doesn't work, I have the same issue.)
my build cmd : node scrap.js && node index.js
my code to scrap :
const fs = require("fs");
onst fs = require("fs");
let chrome = {};
let puppeteer;
if (process.env.AWS_LAMBDA_FUNCTION_VERSION) {
chrome = require("chrome-aws-lambda");
puppeteer = require("puppeteer-core");
} else {
puppeteer = require("puppeteer");
}
const writeIconFile = (name, content) => {
fs.appendFile(name, content, (err) => {
if (err) throw err;
});
};
const scrap = async () => {
let options = {};
if (process.env.AWS_LAMBDA_FUNCTION_VERSION) {
options = {
args: [...chrome.args, "--hide-scrollbars", "--disable-web-security"],
defaultViewport: chrome.defaultViewport,
executablePath: await chrome.executablePath,
headless: true,
ignoreHTTPSErrors: true,
};
}
let data;
try {
let browser = await puppeteer.launch(options);
let page = await browser.newPage();
await page.goto("https://www.google.com");
data = await page.title();
} catch (err) {
console.error(err);
data = null;
}
return data;
};
scrap().then((data) => {
writeIconFile("data.txt", data);
});