0

I'm, looking for a way (or regex pattern) to extract key value pair of attributes and their values from a given string. e.g:

var text = "<script src="a" integrity="b" crossorigin="c" exclude="d"><//script>";

var reg = '...';
var res = reg.exec(text);

Result: { 'src': 'a', 'integrity': 'b', 'crossdomain': 'c', 'exclude': 'd' }

Please note important things:

  1. There is nothing to do with the DOM.
  2. The 'text' parameter is a pure text, without any connection to the DOM. (I get it from the server).
  3. This line of text doesn't suppose to be inserted into the DOM at all.
Jacob
  • 3,598
  • 4
  • 35
  • 56
  • 1
    You are looking for an HTML parser, not regex. – Tomalak Mar 08 '17 at 15:25
  • You want to do that in the browser or in node? – baao Mar 08 '17 at 15:26
  • @Tomalak The object doesn't suppose to be inserted to the DOM. Therefore, I can't use codes like: document.attributes etc... – Jacob Mar 08 '17 at 15:27
  • @baao In the browser. Thank you. – Jacob Mar 08 '17 at 15:28
  • 2
    Using a DOM parser and inserting something into the DOM of the current page are two different things. – CBroe Mar 08 '17 at 15:31
  • `([\w-_]+)="(.*?)""`... But please don't use regex for this problem. – vallentin Mar 08 '17 at 15:35
  • @CBroe There is nothing to do with the DOM. Please see my edit. – Jacob Mar 08 '17 at 15:38
  • That is not the point. See [Using regular expressions to parse HTML: why not?](http://stackoverflow.com/q/590747/1427878) for an explanation why you should in general not try to parse HTML with regex. Ok, granted, in this particular case it might be rather trivial (although you did not manage to present even the slightest attempt so far.) – CBroe Mar 08 '17 at 15:41

3 Answers3

2

All you need to do is to create a html element, set your string as it's innerHTML and use standard DOM methods on it - there's still nothing added to the DOM at any time:

var el = document.createElement('html');
el.innerHTML = '<script src="a" integrity="b" crossorigin="c" exclude="d"><//script>';

console.log(el.getElementsByTagName('script')[0].attributes)

To get the attribute names and values, use the following (note that below uses es6):

var el = document.createElement('html');
el.innerHTML = '<script src="a" integrity="b" crossorigin="c" exclude="d"><//script>';

var attrs = Array.from(el.getElementsByTagName('script')[0].attributes);

for (let attr of attrs) {
 console.log(attr.name + "=" + attr.value);
}

ES5 equivalent would be:

Array.prototype.forEach.call(el.getElementsByTagName('script')[0].attributes, function(e) {
    console.log(e.name + " = " + e.value);
});

To get the object you asked for, you can use the following:

var el = document.createElement('html');
el.innerHTML = '<script src="a" integrity="b" crossorigin="c" exclude="d"><//script>';

var obj = Array.from(el.getElementsByTagName('script')[0].attributes).reduce((a,b) => {
  a[b.name] = b.value;
  return a;
}, {});

console.log(obj);
baao
  • 71,625
  • 17
  • 143
  • 203
1

To achieve your result we can first extract the attributes with a regex:

var text = '<script src="a" integrity="b" crossorigin="c" exclude="d"><//script>';

    var attributes = text.match(/\b(\w+)="(.*?)"/g);

    var result = {};
    attributes.forEach(attr => {
        attr.replace(/"/g,'') // Remove quotes from attributes

        var htmlAttribute = attr.split('=')[0];
        var htmlValue = attr.split('=')[1];

       result[htmlAttribute] = htmlValue;
    });
    
    console.log(result); // Your object with key:value
Jacob
  • 3,598
  • 4
  • 35
  • 56
FrenchMajesty
  • 1,101
  • 2
  • 14
  • 29
  • `\b(\w+)="(.*?)"` won't work if you have escaped quotes \" inside the attribute value for whatever reason – Harry B Apr 07 '22 at 08:59
1

@FrenchMajesty No need to do the splitting/removal, you can do it with raw regex as well:

// remember the 'g' flag, otherwise ...
var r = /\b(\w+)\s*=\s*"(.*?)"/g;

var s = '<script src="a" integrity="b" crossorigin="c" exclude="d"><//script>';
var d = {};

// ...  this loop will run indefinitely!
var m = r.exec(s);
while (m) {
    d[m[1]] = m[2];
    m = r.exec(s);
}

d; // { src: "a", integrity: "b", crossorigin: "c", exclude: "d" }
Nils Lindemann
  • 1,146
  • 1
  • 16
  • 26