You're closing the page in the wrong place,and putting everything into the for loop like below along with increasing the viewport size would solve your problems.
const puppeteer = require("puppeteer");
let browser;
(async () => {
async function getResults(lnk) {
let results = [];
const timeFrames = [1, 5, 15];
for (const i of timeFrames) {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36");
await page.setViewport({width: 1920, height: 1080});
const url = `${lnk}?timeFrame=${i * 60}`;
console.log(url);
await page.goto(url, {waitUntil: "networkidle2", timeout: 70000});
await page.waitForSelector('section.forecast-box-graph');
const status = await page.$eval("section.forecast-box-graph .title", el => el.textContent);
const bank_name = await page.$eval("h1.main-title.js-main-title", (el) => el.textContent.trim());
results.push({
bankName: bank_name,
status: status,
lnk: lnk.replace(/-/g, ' ').split('/').pop(),
url: url
});
await browser.close();
}
return results;
}
console.log( await getResults('https://in.investing.com/equities/axis-bank-technical'));
})().catch(err => console.error(err)). finally(() => browser?. close());
How to run on Render (source)
package.json - add the following dependencies
"dotenv": "^16.0.3",
"express": "^4.18.2",
"puppeteer": "^20.1.2"
index.js
const express = require("express");
const { scrape } = require('./scrape');
const app = express();
const PORT = process.env.PORT || 3000;
app.get("/scrape", (req,res) => {
scrape(req.query.url,res);
});
app.get("/", (req, res) => {
res.send("test is running");
});
app.listen(PORT, () => {
console.log(`Listening on port ${PORT}`);
});
DockerFile
FROM ghcr.io/puppeteer/puppeteer:20.1.2
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
WORKDIR /usr/src/app
COPY package*.json ./
RUN npm ci
COPY . .
CMD [ "node", "index.js" ]
scrape.js - modify the above code a bit
const puppeteer = require('puppeteer');
require("dotenv").config();
const scrape = async (lnk,res) => {
const browser = await puppeteer.launch({
headless: true, // "new" gives error on render
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--single-process",
"--no-zygote",
],
executablePath:
process.env.NODE_ENV === 'production'
? process.env.PUPPETEER_EXECUTABLE_PATH
: puppeteer.executablePath(),
});
let results = [];
const timeFrames = [1, 5, 15];
for (const i of timeFrames) {
const page = await browser.newPage();
const url = `${lnk}?timeFrame=${i * 60}`;
console.log(url);
try {
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36");
await page.setViewport({width: 1920, height: 1080});
//skip loading non-essentials
await page.setRequestInterception(true);
await page.on('request', async (req) => (/image|imageset|media|stylesheet|font|script/.test(req.resourceType()) && !req.isInterceptResolutionHandled())
? await req.respond({status: 200, body: 'aborted'})
: await req.continue()
);
await page.goto(url, {waitUntil: "load", timeout: 7000});
await page.waitForSelector('section.forecast-box-graph');
const status = await page.$eval("section.forecast-box-graph .title", el => el.textContent);
const bank_name = await page.$eval("h1.main-title.js-main-title", (el) => el.textContent.trim());
results.push({
bankName: bank_name,
status: status,
lnk: lnk.replace(/-/g, ' ').split('/').pop(),
url: url
});
} catch (err) {
results.push({
url : url,
lnk: lnk.replace(/-/g, ' ').split('/').pop(),
error : err
});
} finally {
await page.close();
}
}
res.send(results);
await browser.close();
};
module.exports = {scrape};
.gitignore
/node_modules
- push everything to a new github repository,
- open Render, create NEW webservice, connect or add your git repository
- Name: anything, Runtime : should be Docker,
- click Advanced, add environment variable
- key: PUPPETEER_SKIP_CHROMIUM_DOWNLOAD, value: true
- key: PUPPETEER_EXECUTABLE_PATH, value: /usr/bin/google-chrome-stable
- click Create Web Service
- wait until setup is complete, and go to the
<URL>
it gives you, you'll see the 'test is running' message.
- next go to
<URL>/scrape?url=https://in.investing.com/equities/axis-bank-technical
and you'll get the result
Note(s) :
waitUntil
can be changed from "load"
to "networkidle0"
or "networkidle2"
, they all work.
- Changing
headless: true
to headless: "new"
as the deprecation warning suggests, gives errors on Render, even-though it works fine locally.