Javascript to pull attributes from shortcode string

Question

I have a Javascript application which retrieves shortcode stings from a WordPress database. So I may end up with a variable like this:

var shortcode = '[wp-form id="1946" title="My Test Form"]';

I am looking to use pure Javascript to access the attributes so I can extract the title, etc. I imagine this will be some form or regex and split(). But so far my efforts get frustrated by splitting by whitespace.

Any ideas greatly appreciated.

Are attributes always the same? – Sergey Khalitov Jun 01 '16 at 19:15 — Sergey Khalitov, Jun 01 '16 at 19:15

score 6 · Accepted Answer · edited Feb 16 '22 at 17:14

6

Try to use this code:

const shortcode = '[wp-form id="1946" title="My Test Form" empty=""]';  

let attributes = {};
shortcode.match(/[\w-]+=".*?"/g).forEach(function(attribute) {
    attribute = attribute.match(/([\w-]+)="(.*?)"/);
    attributes[attribute[1]] = attribute[2];
});
console.log(attributes);

Output:

Object {id: "1946", title: "My Test Form", empty: ''}

edited Feb 16 '22 at 17:14

Obed Parlapiano

3,226
3
21
39

answered Jun 01 '16 at 19:23

Sergey Khalitov

987
7
17

Works great but failed when there is an attribute with no value. i.e `[wp-form id="1946" title="My Test Form" category="" tags=""]` appreciate if you could fix it. – Jignesh Bhavani Apr 04 '20 at 17:43
1

Ok! I figured it out. to solve above the issue, `".+?"` need to change with `".*?"` – Jignesh Bhavani Apr 05 '20 at 20:05
Updated answer to account for comment above – Obed Parlapiano Feb 16 '22 at 17:14

score 3 · Answer 2 · edited May 23 '17 at 10:28

Okay, even though I'm late to the party I'm going to throw an answer in. I'm surprised nobody complained "you can't parse with just a regular expression!" I guess this used to be a much more fashionable comment to make . Anyways, I think it's perfectly reasonable to use just a regex and see some reasonable attempts already given.

However, if you want to really parse the tag, here's a quick parser I whipped up.

function parseShortCode(shortCode) {
  var re = /(\s+|\W)|(\w+)/g;
  var match;
  var token;
  var curAttribute = '';
  var quoteChar;
  var mode = 'NOT STARTED'
  var parsedValue = {
    name: '',
    attributes: {}
  };
  
  while ((match = re.exec(shortCode)) != null) {
    token = match[0];
    switch (mode) {
      case 'NOT STARTED':
        if (token == '[') {
          mode = 'GETNAME';
        }
        break;
      case 'GETNAME':
        if (!(/\s/.test(token))) {
          parsedValue.name += token;
        } else if (parsedValue.name) {
          mode = 'PARSING';
        }
        break;
      case 'PARSING':
        // if non text char throw it
        if (token == "]") { mode = 'COMPLETE'; }
        else if (token == "=") {
          if (!curAttribute) throw ('invalid token: "' + token + '" encountered at ' + match.index);
          else mode = 'GET ATTRIBUTE VALUE';
        }
        else if (!/\s/.test(token)) {
          curAttribute += token;
        } else if (curAttribute) {
          mode = 'SET ATTRIBUTE'
        }
        break;
      case 'SET ATTRIBUTE':
        // these are always from match[1]
        if (/\s/.test(token)) { parsedValue.attributes[curAttribute] = null; }
        else if (token == '=') { mode = 'GET ATTRIBUTE VALUE'; }
        else { throw ('invalid token: "' + token + '" encountered at ' + match.index); }
        break;
      case 'GET ATTRIBUTE VALUE':
        if (!(/\s/.test(token))) {
          if (/["']/.test(token)) {
            quoteChar = token;
            parsedValue.attributes[curAttribute] = '';
            mode = 'GET QUOTED ATTRIBUTE VALUE';
          } else {
            parsedValue.attributes[curAttribute] = token;
            curAttribute = '';
            mode = 'PARSING';
          }
        }
        break;
      case 'GET QUOTED ATTRIBUTE VALUE':
        if (/\\/.test(token)) { mode = 'ESCAPE VALUE'; }
        else if (token == quoteChar) {
          mode = 'PARSING';
          curAttribute = '';
        }
        else { parsedValue.attributes[curAttribute] += token; }
        break;
      case 'ESCAPE VALUE':
        if (/\\'"/.test(token)) { parsedValue.attributes[curAttribute] += token; }
        else { parsedValue.attributes[curAttribute] += '\\' + token; }
        mode = 'GET QUOTED ATTRIBUTE VALUE';
        break;

    }
  }
  if (curAttribute && !parsedValue.attributes[curAttribute]) {
   parsedValue.attributes[curAttribute] = '';
  }
  return parsedValue;
}

function doUpdate() {
  var text = document.getElementById('shortcode').value;
  var output;
  try {
    output = parseShortCode(text);
  } catch (err) {
    output = err;
  }

  document.getElementById('result').innerHTML = JSON.stringify(output);
}

document.getElementById('updateBtn').addEventListener("click", doUpdate);
doUpdate();

Short Code:
<textarea type=text id="shortcode" style="width:100%; height:60px">[wp-form id="1946" title="My Test Form"]</textarea>
<div>
  <button id="updateBtn">Update</button>
</div>
<div>
  <pre id="result"></pre>
</div>

I'm sure this has bugs, but I got it to work with your case, and some cases the other answers couldn't handle. Unless shortcodes get really intense, I'd just stick with a regex. But if you encounter stuff like unquoted attribute values and empty attributes this might work for you.

Hi @daniel-gimenez. Thanks! And I changed a bit in your code to get content of the tag. Example: `parseShortCode('[embed widht=130]https://www.youtube.com/embed/nH5bnMAz6NM[/embed]')` See here: https://gist.github.com/jeffdrumgod/db908ac2e4623a3f586f60a119c9b772 — jeff_drumgod, Jul 19 '17 at 15:05

score 1 · Answer 3 · answered Jun 01 '16 at 19:20

Don't try to use String.prototype.split in this case, describe an attribute with its value and build a pattern to match them using RegExp.prototype.exec:

var re = /([\w-]+)="([^"]*)"/g; 
var str = '[wp-form id="1946" title="My Test Form"]';
var m;

while ((m = re.exec(str)) !== null) {
    console.log(m[1] + "\n" + m[2] + "\n");
}

score 0 · Answer 4 · answered Jun 01 '16 at 19:12

0

can be done using regex simply

var shortcode = '[wp-form id="1946" title="My Test Form"]';
// use of regex to extract id , title 
var arr  = /id\=\"(.*?)\".*title=\"(.*?)\"/.exec(shortcode);
var id = arr[1];
var title = arr[2];

answered Jun 01 '16 at 19:12

oneLeggedChicken

301
2
6

Javascript to pull attributes from shortcode string

4 Answers4