0

I am using cheerio to scrape job postings from indeed(at least 50 job postings).

I have already got the scraped data by using cheerio. But I don't know how to store these data in an object with async/await method

tools: node.js, cheerio, request.

const cheerio = require('cheerio');
const request = require('request');

const fetchIndeedData = async () => {
    let start = 0;
    let title = [];
    let company = [];
    let location = [];
    let summary = [];

    for (let i = 0; i < 5; i++) {
        let url = `https://ca.indeed.com/jobs?q=full+stack+developer&l=Toronto,+ON&start=${start}`;
        await request(url, (err, response, body) => {
            const $ = cheerio.load(body);

            $('#resultsCol .jobsearch-SerpJobCard .title a').each((i, item) => {
                title.push(item.attribs.title);
            });

            $('#resultsCol .jobsearch-SerpJobCard .company a').each((i, item) => {
                company.push($(item).text().trim());
            });

            $('#resultsCol .jobsearch-SerpJobCard .location').each((i, item) => {
                location.push($(item).text());
            });

            $('#resultsCol .jobsearch-SerpJobCard .summary ').each((i, item) => {
                summary.push($(item).text());
            });

        });
        start += 10;
    }

    const jobPostings = {
        title,
        location,
        company,
        summary
    };

    return jobPostings;
};

const getData = fetchIndeedData().then(data => console.log(data));

I can't get any data when I call getData function. And when I ran console.log(jobPostings) before return. I still can't get data.... Anyone has idea?

Harrymissu
  • 450
  • 2
  • 8
  • 18

1 Answers1

3

Here's what I think it might look like with async / await (you will need request-promise or similar):

const fetchIndeedData = async () => {
  let urls = [0,1,2,3,4].map(start => `https://ca.indeed.com/jobs?q=full+stack+developer&l=Toronto,+ON&start=${start}`)
  let responses = await Promise.all(urls.map(url => request.get(url)))
  let $$ = responses.map(response => cheerio.load(response))
  return $$.map($ => {
    return {
      title: $('title').text(),
      // more cheerio code here
    }
  })
}

;(async function(){
  const data = await fetchIndeedData()
  console.log(data)
})()
Patrick Roberts
  • 49,224
  • 10
  • 102
  • 153
pguardiario
  • 53,827
  • 19
  • 119
  • 159