0

I'm trying to scrape little pieces of data from a webpage, but it is taking soo long to scrape... any reason why this is happening ? Initially, it stops scraping, then i had to set the default timeout to 0. This same code was working perfectly fine earlier..

Now it's taking forever to scrape the data i need code below

const puppeteer = require("puppeteer");
const express = require("express");
//const ejs = require("ejs");
const port = 5000;

const app = express();
app.set('view engine', 'ejs');
app.use(express.static(__dirname + '/public', {
     types: {
          'js': "text/javascript"
     }
}));

var usdBuy;
var usdSell;

var gbpBuy;
var gbpSell;

var eurBuy;
var eurSell;


app.get('/', function(req, res) {

     async function start() {
          const browser = await puppeteer.launch();
          const page = await browser.newPage();
          await page.setDefaultNavigationTimeout(0); 
          await page.goto("URL");
          const prices = await page.evaluate(() => {
               return Array.from(document.querySelectorAll(".overlay-text")).map(x => x.textContent)
          })
     
          usdBuy = prices[0]
          usdSell = prices[1]
     
          gbpBuy = prices[2]
          gbpSell = prices[3]
     
          eurBuy = prices[4]
          eurSell = prices[5]
     
          console.log(usdBuy);
          console.log(usdSell);
     
          console.log(gbpBuy);
          console.log(gbpSell);
     
          console.log(eurBuy);
          console.log(eurSell);
          await page.close();
          await browser.close();
     }    
     start();
     res.render("home", {
          usdBuy: usdBuy,
          usdSell: usdSell,
          gbpBuy: gbpBuy,
          gbpSell: gbpSell,
          eurBuy: eurBuy,
          eurSell: eurSell

     });
})


app.listen(process.env.PORT || port,  function () {
     console.log(`Listening on port ${port}`)
})
Loki
  • 1
  • I assume the string URL is just something you replaced for your question. Any chance you’re being rare limited by whoever you’re scraping? Did you add logging to the code to see which part is taking a long time? – Adam Jenkins Jan 11 '23 at 22:31
  • yea i have added comments and still same thing – Loki Jan 11 '23 at 23:12
  • `await page.setDefaultNavigationTimeout(0);` is a bad idea. This says "if something doesn't work, hang the script forever until I kill it, and don't tell me what's wrong". You're probably getting blocked, and so you _should_ stop the script if nav takes more than a few minutes and print some diagnostic information with `page.content()` so you can understand what's going wrong. Without the page URL, there's no way to concretely help beyond that, though. – ggorlen Jan 11 '23 at 23:53
  • 1
    Also, `start()` was never `await`ed, so this doesn't work anyway. The front end will not receive the scraped results because the [response is sent before Puppeteer even starts](https://stackoverflow.com/questions/14220321/how-do-i-return-the-response-from-an-asynchronous-call). "This same code was working perfectly fine earlier.." seems impossible, unless you were just referring to the `console.log`s inside the scraping function. – ggorlen Jan 11 '23 at 23:54

0 Answers0