If you want to convert numeric HTML character references to Unicode escape sequences, try the following (doesn't work with with code points above 0xFFFF):
function convertCharRefs(string) {
return string
.replace(/&#(\d+);/g, function(match, num) {
var hex = parseInt(num).toString(16);
while (hex.length < 4) hex = '0' + hex;
return "\\u" + hex;
})
.replace(/&#x([A-Za-z0-9]+);/g, function(match, hex) {
while (hex.length < 4) hex = '0' + hex;
return "\\u" + hex;
});
}
If you simply want to decode the character references:
function decodeCharRefs(string) {
return string
.replace(/&#(\d+);/g, function(match, num) {
return String.fromCodePoint(num);
})
.replace(/&#x([A-Za-z0-9]+);/g, function(match, num) {
return String.fromCodePoint(parseInt(num, 16));
});
}
Both functions use String.replace
with a function as replacement.