0

* I've already searched stack overflow and tried answers for all of the similar questions *

I'm writing a node script that:

  1. Scans a directory for html files
  2. Reads the file contents into a string
  3. Searches the string for an id or class name

I'm using a regex to find the id or class name. I'm able to get a match when I'm searching for an id, but not when I'm searching for a class name.

Heres my example code:

https://jsbin.com/bayuquzulu/edit?js,console

For simplicity, I put the html into a variable and omitted the file read portion.

var html = '<section class="mb-4" id="button-test">'+
        '<button type="button" class="btn btn-primary">Primary</button>'+
        '<button type="button" class="btn btn-secondary">Secondary</button>'+
        '<button type="button" class="btn btn-success">Success</button>'+
        '</section>';

var scan = function(content) {

    var props = ['btn-primary', 'button-test'], res = {};

    for (var i = 0; i < props.length; i++) {
        var prop = props[i];

        var exps = [
            {key: '#', exp: new RegExp("<*.* id=([\"]|')\\s*"+prop+"\\s*([\"]|').*>", "gi")},
            {key: 'data-comp=', exp: new RegExp("<*.* data-dna=([\"]|')\\s*"+prop+"\\s*([\"]|').*>", "gi")},
            {key: '.', exp: new RegExp("<*.* class=([\"]|')*."+prop+".*([\"]|').*>", "gi")}
        ];

        for (var e = 0; e < exps.length; e++) {
            var item = exps[e];
            var key = item.key;
            var exp = item.exp;
            var match = content.match(exp);

            key += prop;

            if (match) {

                if (!res[key]) { res[key] = []; }

                res[key].push(match[0]);

            }
        }
    }

    return res;
}

console.log(scan(html));
Cam Tullos
  • 2,537
  • 1
  • 21
  • 18
  • Have a look at http://stackoverflow.com/questions/16559171/regular-expression-to-get-a-class-name-from-html – psycho Feb 10 '17 at 06:01
  • None of the solutions on that topic work for me. – Cam Tullos Feb 10 '17 at 06:28
  • could you add some details on what exactly is failing? What should the expected output look like and what does it look like? At which point is your code failing?` what about `(class|id)="(.*?)"`? – nozzleman Feb 10 '17 at 06:40
  • recheck your regexes, they seem pretty wrong and partly too complicated – Fallenhero Feb 10 '17 at 07:18
  • If you accept using libraries, I would suggest using a parser first, for example https://github.com/fb55/htmlparser2 . Once you get HTML parsed into a tree, it's simple question to traverse the tree and find out all the classnames. – jiyinyiyong Feb 10 '17 at 09:21

1 Answers1

3

I think this might be the answer:

const data = `<section class="mb-4" id="button-test">
    <button type="button" class="btn btn-primary">Primary</button>
    <button type="button" class="btn btn-secondary">
        Secondary
    </button>
    <button type="button" class="btn btn-success">
        Success
    </button>
</section>`

const elements = []

const findTag = /<[^\/].*?>/g
let element
while(element = findTag.exec(data)){
  element = element[0]
  const id = (element.match(/id="(.*?)"/i) || [, ""])[1]
  const classes = (element.match(/class="(.*?)"/i) || [,""])[1].split(' ')
  
  element = {}
  element["id"] = id
  element["class"] = classes
  elements.push(element)
}
console.log("All elements")
console.log(elements)

// You can now also filter elements
console.log("All elements having btn class")    
console.log(elements.filter(element => element.class.indexOf("btn") != -1))
Maciej Kozieja
  • 1,812
  • 1
  • 13
  • 32