str.replace(/\x26/g,'\x26amp;') // first do all & to &
.replace(/\x3c/g,'\x26lt;') // next do < to <
.replace(/\r\n|\n/g,'\x3cbr>'); // finally replace new lines with <br>
// Note: I chose hex char code to exclude the 'risky' characters from the code,
// that way they are also safe for inline-scripts (in older browsers).
// Also, I used local setting char-notation, not unicode,
// as they are the same for this purpose (and it saves some chars).
- Note: the order of replacements is important.
- Also note: this sequence did NOT end with an attempt to preserve whitespace (aka, avoid the white-space collapsing feature of HTML) by finding a 'HTML equivalent to …that DOES break;' (and replace spaces with that) because it is much simpler to use a
pre
tag to hold the contents (seems better for older browsers, notably IE<9) or disable the white-space collapsing feature in CSS: div.preview {white-space: [pre|pre-wrap];}
Simple example (live jsFiddle here):
<textarea style="width:98%" onkeyup="
this.nextSibling.innerHTML=this.value.replace(/\x26/g,'\x26amp;')
.replace(/\x3c/g,'\x26lt;')
.replace(/\r\n|\n/g,'\x3cbr>');
"></textarea><pre></pre>
Note, you might also want to hook onchange
etc.. also, you might want to have a separate function and cache the output (and input) element(s).
You could make this a lot more difficult, for example only replace the &
when absolutely needed, the same goes for <
.
Also you could transform all unwanted (non-'ASCII') characters to their unicode HTML-entity equivalent, but all this seems a little overkill for a simple preview as you probably want don't want to submit the user's data in this (overly effective) html-markup. A simple example for that would have been:
.replace(/[^\u0020-\u007e]/g, function(m){return '\x26#'+m.charCodeAt(0)+';';})
Hope this gets you started.
EDIT 1: Also have a look at demo jsFiddle v2 and demo jsFiddle v3 for some more advanced examples like:
( // START HTML encoder hookup
function(enc, e1, e2, e3){
(e1.onkeyup = function(){
e3.innerHTML = e2.value = enc(e1.value);
})();
}( // IIFE Passing encoder and elements
(function(){ //HTML enc
var amp=/\x26/g , lte=/\x3c/g , eol=/\r\n|\n/g
, noASCI=/[^\u0020-\u007e]/g
, toUCPd=function(m){ return '\x26#'+ m.charCodeAt(0) +';'; }
;
return function(s){return(
s.replace(amp, '\x26amp;')
.replace(lte, '\x26lt;')
.replace(eol, '\x3cbr>')
.replace(noASCI, toUCPd) //optional
);};
}
)() //IIFE returns HTML enc
, document.getElementsByTagName('textarea')[0]
, document.getElementsByTagName('textarea')[1]
, document.getElementsByTagName('pre')[0]
)
); // END HTML encoder hookup
/* Corresponding demo html:
Input:<br>
<textarea>	<this is a tab char & these ☺ë non-'ASCII'
continue to to type here</textarea><br>
Generated HTML code:<br>
<textarea>code output</textarea><br>
HTML output in pre:<br>
<pre>html output</pre>
*/
EDIT 2: almost forgot, you could look at the (quirky, forgotten but still valid) xmp
-tag. That would work until the user types </xmp
. Note for older IE: replace the whole xmp-element instead of setting/overwriting it's innerHTML
(because that doesn't work at least in IE6). However, as noted, there are a lot of quirks to this xmp
-tag regarding box-layout, overflow etc over the different browsers!
` and special characters to UTF-8, is that correct? – Dalorzo Aug 30 '14 at 20:44