-2

I have some regex code used in jquery plugin which replaces my html code with empty string.

hash.replace(/^.*#/, '').replace(/\?.*$/, '');

here what should I do so that it does not escape my html code with empty string. like I have html code :

"Jquery is a scripting language.<br>
Most widely used language"

this text is having break tag, so it is replaced by empty string

user94559
  • 59,196
  • 6
  • 103
  • 103
neha
  • 1
  • 1

2 Answers2

0

Please try the following code:

 hash = hash.replace(/(<([^>]+)>)/ig, '');

FYI, replace method will not affect the original string unless you update it with the new value.

mdameer
  • 1,540
  • 2
  • 14
  • 17
0

Rather than using regular expressions, I'd strongly recommend using the DOM API to parse, and remove, HTML elements using a white-list of elements that may be retained:

function stripHTML(opts) {
  // The default settings for the function, can be overridden
  // by the user,

  // HTML:         String of text/HTML from which the HTML elements
  //               should be removed.
  // allowedHTML:  Array of Strings, the HTML elements that are
  //               permitted to remain within the returned HTML string.

  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },

    // creating an element for containing the supplied String
    // of content in order for it to be parsed:
    temp = document.createElement('div'),

    // uninitialised variables for later use:
    allowedHTML,
    elementNodes,
    parent;

  // Iterating over the keys of the opts Object if one has
  // been supplied, otherwise we iterate over the empty
  // object-literal to prevent an error being thrown:
  Object.keys(opts || {}).forEach(function(key) {

    // here we update the settings Object with the
    // properties, and property-values, from the
    // opts Object (if supplied):
    settings[key] = opts[key];
  });

  // if we have a settings.html property-value, and
  // settings.html is a String:
  if (settings.html && 'string' === typeof settings.html) {

    // assign the settings.html String as the innerHTML of
    // the created-element:
    temp.innerHTML = settings.html;

    // retrieve all elements from the created-element using
    // the universal selector ('*') from CSS and converting
    // the resulting Array-like collection into an Array,
    // using Array.from():
    elementNodes = Array.from(temp.querySelectorAll('*'));

    // here we ensure that the Array of elements is of the
    // type ['h1','span'] not ['<h1>','<span>'] by iterating
    // over the array of settings.allowedHTML and returning
    // a new Array of its elements using Array.prototype.map():
    allowedHTML = settings.allowedHTML.map(function(el) {
      // 'el' the first argument is a reference to the
      // current Array-element of the Array over which
      // we're iterating.

      // returning the string having first removed all ('g')
      // incidences of '<' or ('|') '>' from said string:
      return el.replace(/<|>/g, '');
    });

    // iterating over the elementNodes Array:
    elementNodes.forEach(function(node) {
      // 'node' is (again) a reference to the current
      // Array-element of the Array over which we're
      // iterating.

      // caching a reference to the parentNode of the
      // current element:
      parent = node.parentNode;

      // if the node's localName (same as tagName, but
      // lower-case) is not found in the Array of allowed HTML:
      if (settings.allowedHTML.indexOf(node.localName) === -1) {


        // while the node has a firstChild:
        while (node.firstChild) {
          // we insert that firstChild into the
          // node's parentNode ahead of the node itself:
          parent.insertBefore(node.firstChild, node);
        }

        // removing the node from the parent:
        parent.removeChild(node);
      }
    });

    // here we return the innerHTML of the created-element,
    // having trimmed its leading and trailing white-space:
    return temp.innerHTML.trim();
  }
}

console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time)"
}));
// => jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;

  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });

  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {
        
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        
        parent.removeChild(node);
      }
    });
    
    return temp.innerHTML.trim();
  }
}

console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time). "
}));

JS Fiddle demo.

The above allows for an empty-array of allowedHTML, which causes the function to remove all HTML tags (from somewhat limited testing):

console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time). ",
  'allowedHTML': []
}));
// => jQuery is a JavaScript library.And is the most widely-used such library (at this time).

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;

  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });

  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {
        
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        
        parent.removeChild(node);
      }
    });
    
    return temp.innerHTML.trim();
  }
}

console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).",
  'allowedHTML': []
}));

JS Fiddle demo.

Seems to cope reliably – insofar as any browser is capable of coping with – invalid HTML, such as unopened elements or elements which 'overlap' (the closing tag for the first-opened element appears before the closing tag for the second-opened element):

console.log(stripHTML({
  'html': "<div><h1>jQuery</div> is a JavaScript library.</h1><br>And is the most widely-used such library (at this time). "
}));
// => jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;

  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });

  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));

    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });

    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {

        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }

        parent.removeChild(node);
      }
    });

    return temp.innerHTML.trim();
  }
}

console.log(stripHTML({
  'html': "<div><h1>jQuery</div> is a JavaScript library.</h1><br>And is the most widely-used such library (at this time). "
}));

JS Fiddle demo.

It also seems to manage with (ridiculous) nesting:

console.log(stripHTML({
  'html': "<div>jQuery <h1>is <br>a <span><strong><em><span>JavaScript</span></em> library</strong></span>.</span><br>And is the most widely-used such library (at this time).</h1></div> "
}));

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;

  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });

  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));

    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });

    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (allowedHTML.indexOf(node.localName) === -1) {
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);

        }
        parent.removeChild(node);
      }
    });
    return temp.innerHTML.trim();
  }
}

console.log(stripHTML({
  'html': "<div>jQuery <h1>is <br>a <span><strong><em><span>JavaScript</span></em> library</strong></span>.</span><br>And is the most widely-used such library (at this time).</h1></div> "
}));

JS Fiddle demo.

I cannot, though, guarantee that this works, will work, or is able to work, against people inserting <script> elements in the stripHTML function's html string, such as:

console.log(stripHTML({
  'html': "<script>alert('Will this work?'); console.log('Maybe not?');</" + "script>"
}));
// => alert('Will this work?'); console.log('Maybe not?');

// it doesn't work in my (again: limited) testing, and
// there's no evaluation (eval()) of the inserted, or resulting
// string so it should be safe. This is not a guarantee, so
// please: test your edge cases

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;

  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });

  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));

    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });

    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {

        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }

        parent.removeChild(node);
      }
    });

    return temp.innerHTML.trim();
  }
}

console.log(stripHTML({
  'html': "<script>alert('Will this work?'); console.log('Maybe not?');</"+"script>"
}));

JS Fiddle demo.

References:

David Thomas
  • 249,100
  • 51
  • 377
  • 410