you should let the browser do the 'heavy lifting'; obviously, the browser can parse HTML - after all, how else should it show us web pages? You can use JavaScript to make the browser parse HTML for you by setting .innerHTML
of some dom node to your HTML string, or by using .insertAdjacentHTML
. Then you have transformed your HTML string to a tree of DOM nodes, i.e., you have it parsed.
And there are browser builtin ways to turn your DOM tree into an XHTML string. You simply create an XHTML document programmatically, then you add any DOM tree to it (which can come from an HTML (non-XHTML) document, that is perfectly fine) with .appendChild
, and then the .outerHTML
and .innerHTML
methods of your DOM tree (which now have an XHTML document as owner document) will give XHTML.
If you're starting with a DOM node, you can use the following 2 functions:
var nsx = "http://www.w3.org/1999/xhtml";
function outerXHTML(node){
var xdoc = document.implementation.createDocument(nsx, 'html');
xdoc.documentElement.appendChild(node);
return node.outerHTML;
}
function innerXHTML(node){
var xdoc = document.implementation.createDocument(nsx, 'html');
xdoc.documentElement.appendChild(node);
return node.innerHTML;
}
(note that the node will be owned by the newly created XHTML document, so it will vanish from your original document. If it should remain there, then clone it before calling one of the above functions.)
And if you're starting with a string, we'll just have to set innerHTML of a newly created node before calling the above. For you convenience, here is a snippet. With 3 examples. 2 for html to xhtml, and one for xhtml to html.
function html2xhtml(html){
var nsx = "http://www.w3.org/1999/xhtml";
var body = document.createElement('body');
body.innerHTML = html;
var xdoc = document.implementation.createDocument(nsx, 'html');
xdoc.documentElement.appendChild(body);
return body.innerHTML;
}
function xhtml2html(xhtml){
var body = document.createElement('body');
body.innerHTML = xhtml;
var doc = document.implementation.createHTMLDocument();
doc.documentElement.appendChild(body);
return body.innerHTML;
}
var html1 = '<div>lorem<img>ipsum<img>dolor sit amet<br></div>';
var html2 = '<ul><li><svg><rect width="100" height="100"></rect></svg></li></ul>';
var html3x = '<img />';
var node1 = document.getElementById('node1');
var node1x = document.getElementById('node1x');
var node2 = document.getElementById('node2');
var node2x = document.getElementById('node2x');
var node3 = document.getElementById('node3');
var node3x = document.getElementById('node3x');
node1.textContent = html1;
node2.textContent = html2;
node3x.textContent = html3x;
node1x.textContent = html2xhtml(html1);
node2x.textContent = html2xhtml(html2);
node3.textContent = xhtml2html(html3x);
html<br><pre id='node1'></pre>xhtml<br><pre id='node1x'></pre><hr>
html<br><pre id='node2'></pre>xhtml<br><pre id='node2x'></pre><hr><hr>
xhtml<br><pre id='node3x'></pre>html<br><pre id='node3'></pre>
code older version
you can also do it with XMLSerializer (for the toString
part not the fromString part), credit @Kaiido.