I'm trying to create a node app that requires a URL from the user, the URL is then passed to scrape.js
and using puppeteer, scrapes certain fields, and then passes the data back to app.js
in a json format (so that I can then upset it into a doc). But what I receive is the entire ServerResponse and not the data in a json format as I'm intending.
I was hoping someone with more experience could shed some light. Here is what I have so far:
// app.js
const scrape = require('./scrape');
const router = express.Router();
router.get( '/', ( req, res ) => {
const url = req.body.url;
const item = new Promise((resolve, reject) => {
scrape
.scrapeData()
.then((data) => res.json(data))
.catch(err => reject('Scraping failed...'))
})
});
// scrape.js
const puppeteer = require('puppeteer');
const scrapeData = async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.setViewport({ width: 360, height: 640 });
await page.goto(url);
let scrapedData = await page.evaluate(() => {
let scrapedDetails = [];
let elements = document.querySelectorAll('#a-page');
elements.forEach(element => {
let detailsJson = {};
try {
detailsJson.title = element.querySelector('h1#title').innerText;
detailsJson.desc = element.querySelector('#description_box').innerText;
} catch (exception) {}
scrapedDetails.push(detailsJson);
});
return scrapedDetails;
}));
// console.dir(scrapeData) - logs the data successfully.
};
module.exports.scrapeData = scrapeData