In my application I need to send and receive HTML in string form. I'd like to keep things safe, and because of that I need to check if dom elements in the string match allowed tags as well as if the style declarations are valid, and if there are no injected scripts. First thing that comes to mind is of course regexing the string, but this is tedious, might be buggy and for sure inefficient. Second idea is using something called XPath, but even though I've read some materials on MDN site, I still have no idea how to implement this sample code:
const XPathResult = Components.interfaces.nsIDOMXPathResult;
const ALLOWED_TAGS = ['div', 'span', 'b', 'i', 'u', 'br', 'font', 'img'];
const ALLOWED_STYLES = ['font-weight', 'font-size', 'font-family', 'text-decoration', 'color', 'background-color'];
const ALLOWED_ATTRIBUTES = ['style', 'name'];
const XPATH_PART_TAGS = ALLOWED_TAGS.map(function (v) {
return "name() != '" + v + "' and name() != '" + v.toUpperCase() + "'"; // case insensitive
}).join(' and ');
const XPATH_PART_ATTRS = ALLOWED_ATTRIBUTES.map(function (v) {
return "name() != '" + v + "' and name() != '" + v.toUpperCase() + "'"; // case insensitive
}).join(' and ');
const XPATH_BAD_TAGS = "//*[(namespace-uri() != 'http://www.w3.org/1999/xhtml') or (" + XPATH_PART_TAGS + ")]";
const XPATH_BAD_ATTRIBUTES = "//@*[((namespace-uri() != 'http://www.w3.org/1999/xhtml') and (namespace-uri() != '')) or (" + XPATH_PART_ATTRS+ ")]";
const XPATH_STYLE = "//@*[name() = 'style']";
/**
* Checks if inline style definition is considered secure
*
* @param {String} styleValue value of style attribute
* @return bool
*/
function isStyleSecure(styleValue) {
var styles = styleValue.split(';'),
style,
name, value,
i, l;
for (i = 0, l = styles.length; i < l; i++) {
style = styles[i].trim();
if (style === '') {
continue;
}
style = style.split(':', 2);
if (style.length !== 2) {
return false;
}
name = style[0].trim().toLowerCase();
value = style[1].trim();
if (ALLOWED_STYLES.indexOf(name) === -1) {
return false;
}
}
return true;
}
/**
* Singleton that verifies if given XHTML document fragment is considered secure.
* Uses whitelist-based checks on tag names, attribute names and document namespaces.
*
* @class
* @namespace core.SecurityFilter.MessageSecurityFilter
*/
var MessageSecurityFilter = {
/**
* Checks if given document fragment is safe
*
* @param {nsIDOMElement} element root element of the XHTML document fragment to analyze
* @return {bool} true if fragment is safe, false otherwise
*/
isSecure: function SecurityFilter_isSecure(element) {
var document = element.ownerDocument,
result,
attr;
result = document.evaluate('//*', element, null, XPathResult.ANY_TYPE, null);
result = document.evaluate(XPATH_BAD_TAGS, element, null, XPathResult.ANY_TYPE, null);
if (result.iterateNext()) {
return false;
}
result = document.evaluate(XPATH_BAD_ATTRIBUTES, element, null, XPathResult.ANY_TYPE, null);
if ((attr = result.iterateNext())) {
return false;
}
result = document.evaluate(XPATH_STYLE, element, null, XPathResult.ANY_TYPE, null);
while ((attr = result.iterateNext())) {
if (!isStyleSecure(attr.nodeValue)) {
return false;
}
}
return true;
}
};
And the first idea was to create documentFragment, and then check it's nodes with either treeWalker or just following dom tree with .firstChild etc. But I guess this solution is unsafe as it will leave me opened to all injected scripts. Am I right?
Is there any other way ?