0
fs.readFile(htmlPath, 'utf8', function(err, html) {
            var htmlparser = require("htmlparser2");
            var primary, secondary;
            var handler = new htmlparser.DomHandler(function(error, dom) {
                for (var i = 0; i < dom.length; i++) {
                    if (((dom[i].attribs !== undefined) ? dom[i].attribs.class === 'offer-wrapper' : false) && (dom[i].children[1] !== undefined ? dom[i].children[1].children[1] !== undefined : false)) {

                        if (dom[i].children[1].children[1] !== undefined ? (dom[i].children[1].children[1].name !== undefined ? (dom[i].children[1].children[1].name === 'p' && dom[i].children[1].children[1].children[0] !== undefined ? dom[i].children[1].children[1].children[0].data !== undefined : false) : false) : false) {
                            primary = dom[i].children[1].children[1].children[0].data.trim();
                        }
                    }
                    if ((dom[i].attribs !== undefined ? dom[i].attribs.class === 'promo-banner' : false) && (dom[i].children[1] !== undefined ? ((dom[i].children[1].name !== undefined && dom[i].children[1].children[0] !== undefined) ? (dom[i].children[1].name === 'p' && dom[i].children[1].children[0].data !== undefined) : false) : false)) {
                        secondary = dom[i].children[1].children[0].data.trim();
                    }
                }
            });
            var parser = new htmlparser.Parser(handler);
            parser.write(html);
            parser.end();
        });

HTML file are in format of Format 1

<div class=offer-wrapper>
  <div class=offer>
    <p>Content1</p>
  </div>
</div>
<div class=promo-banner>
  <p>Content 2</p>
</div>

or Format 2

<div class=promo-banner>
    <p>Content 2</p>
</div>

or Format 3

<div class=offer-wrapper>
  <div class=offer>
    <p>Content1</p>
  </div>
</div>
<div class=promo-banner>
</div>

When I try to read HTML files content, I can only read content from format 2 & 3 not from format1

Can someone please help me?

  • Wow can you unroll your nested ternary operators? This is completely unreadable code, please use regular `if`/`else`. – Tomalak Dec 05 '16 at 19:36
  • 2
    Also, there is an HTML parser called [Cheerio](https://github.com/cheeriojs/cheerio) which is a wrapper around htmlparser2 that also supports jQuery-like syntax for selecting specific nodes. You will find that *much* easier to handle than with the basic DOM methods of htmlparser2. – Tomalak Dec 05 '16 at 19:43

1 Answers1

0

As Tomalak suggested, cheerio would solve your problem. I have written a module, that solves your problem. I used cheerio. Since it is a piece of code, you won't waste your time. You can check it by refering my answer. I explained the code.

Hakan Demir
  • 307
  • 2
  • 4
  • 12