0

I am trying to scrape the gig titles but it's only giving me the first 7-8 titles where it should be like 48 titles. When Checking the issue, I noticed I am not getting all children elements from the gig container. It's only giving the first 8 of the children. After checking on the website, I could see when I opened the page, It returns 8 of the gigs container first, and then after a second, the rest of the gigs container loads.

If that's the issue, how can I get the all-children element?

Here is the code:

const express = require('express');
const axios = require('axios');
const cheerio = require('cheerio');
const app = express();

async function getGigObjects(proURL) {
    try {
        const url = proURL;
        const { data } = await axios({
            method: "GET",
            url: url,
            headers: {
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38"
            }
        })


        let $ = cheerio.load(data)
        const queryDa = '#perseus-app > div > div > div.layout-row.content-row > div > div > div > div'

        const gigContainerData = []
        $(queryDa).each((parentIdx, parentElem) => {
            $(parentElem).children().children().children('.text-display-7').children().each((childIdx, childElem) => {
                console.log($(childElem).text())
                    
            })
        })
        return gigContainerData

        
    } catch (err) {
        console.log(err)
    }
}


app.get('/home', async function (req, res) {
    const getGigData = await getGigObjects('https://www.fiverr.com/categories/programming-tech/wordpress-services')

    console.log('Inside Home Login');
    res.writeHead(200, {
        'Content-Type': 'application/json',
    });
    res.end(JSON.stringify(getGigData));
});


app.listen(3001, () => {
    console.log('Server Listening on port 3001');
});

mapa0402
  • 454
  • 1
  • 8
  • 24
AnikBhaya
  • 1
  • 1
  • Does this answer your question? [How can I scrape pages with dynamic content using node.js?](https://stackoverflow.com/questions/28739098/how-can-i-scrape-pages-with-dynamic-content-using-node-js) – ggorlen Oct 13 '21 at 21:40
  • Yes issues might be something like that. I tried with those methods (using puppeteer/panthomjs) but the request gets blocked by the website. so eventually I am not getting any data now. – AnikBhaya Oct 14 '21 at 06:06
  • @ggorlen Thanks man, I just solved the issue by using https header on puppeteer. – AnikBhaya Oct 14 '21 at 09:39
  • Awesome, feel free to post a [self answer](https://stackoverflow.com/help/self-answer) – ggorlen Oct 14 '21 at 13:33

0 Answers0