0

I'm working on a small parser that look at both a rss.xml file and extract information from the link that I find inside the xml items.

I'm able to get all the information I am looking for, now my only problem is I don't really understand how Async/Await works.

const Parser = require('rss-parser');
const cheerio = require('cheerio')
const request = require('request');

const parser = new Parser(
    { 
        customFields: {
            item: [
                ['media:thumbnail', 'preview']
            ]
        }
    }
);
const URL = 'https://www.dimensions.guide/element/rss.xml';
const updatedItems = [];
var updatedItem = {};

(async () => {
    const feed = await parser.parseURL(URL);

    await feed.items.forEach(item => {
        request(item.link, (err, res, html) => {
            if(!err && res.statusCode == 200){
                const $ = cheerio.load(html);

                updatedItem = {
                    "title": $('h1.title-text-hover-tag').text(),
                    "category": $('.header-wrapper-lower-links > a.parent-link').first().text(),
                    "subCategory": $('.header-wrapper-lower-links > a.parent-link').last().text(),
                    "link": item.link,
                    "preview": item.preview.$.url,
                    "lastUpdate": item.pubDate,
                }
                updatedItems.push(updatedItem)
            }
        });
    });
    console.log(updatedItems)
})();

This is my code.

  1. First, I'm getting the information from my xml file.
  2. I'm parsing each item from the rss feed, getting the link to scrape the information I need.
  3. When I have all the information I need, I'm pushing the item to my array.
  4. Only when I'm done checking EVERY link, I want to log the updatedItems I found.

Right now, it's showing an empty array before doing anything else. Any Idea?

I'm open to any improvement to my code, I'm really starting to code as a hobby and don't know much

Spy474
  • 23
  • 3

2 Answers2

0

You're trying to await the return value of feed.items.forEach(..) which is undefined. To fix that, you can use .map(..) instead of .forEach(..) and create an array of promises which you can wait for, here is an example:

await Promise.all(await feed.items.map(item => {
  return request(item.link, (err, res, html) => {
    ..
  });
});
Titus
  • 22,031
  • 1
  • 23
  • 33
0

i believe your code should look like below. Hope it work!

const Parser = require('rss-parser');
    const cheerio = require('cheerio')
    const request = require('request');

    const parser = new Parser(
        { 
            customFields: {
                item: [
                    ['media:thumbnail', 'preview']
                ]
            }
        }
    );
    const URL = 'https://www.dimensions.guide/element/rss.xml';
    const updatedItems = [];
    var updatedItem = {};

    function doRequest(url) {
      return new Promise(function (resolve, reject) {
        request(url, function (error, res, body) {
          if (!error && res.statusCode == 200) {
            resolve([error, res, body]);
          } else {
            reject(error);
          }
        });
      });
    }

    (async () => {
        const feed = await parser.parseURL(URL);

        feed.items.forEach(async (item) => {
           const [err, res, html] = await doRequest(item.link);

           if(!err && res.statusCode == 200){
                const $ = cheerio.load(html);

                updatedItem = {
                    "title": $('h1.title-text-hover-tag').text(),
                    "category": $('.header-wrapper-lower-links > a.parent-link').first().text(),
                    "subCategory": $('.header-wrapper-lower-links > a.parent-link').last().text(),
                    "link": item.link,
                    "preview": item.preview.$.url,
                    "lastUpdate": item.pubDate,
                }
                updatedItems.push(updatedItem)
            }
        });
        console.log(updatedItems)
    })();
Danh Nguyen
  • 308
  • 3
  • 9