I was looking for a generic solution that handled all XML Escape sequences, invalid and valid alike, in a NVARCHAR and replaced them with the character values. I wrote this up as I could not find an example that did not involve enumerating each one with then thousands REPLACE
statements (at least 65535), and got it working. If there is a better way to do it (non-CLR), I'd like to know about it.
/* Converts '0F0F' to 0x0F0F */
CREATE FUNCTION dbo.GetHex(@input nvarchar(max))
RETURNS varbinary(max)
AS
BEGIN
RETURN CONVERT(varbinary(max), @input, 2)
END
GO
/* Converts '0F0F' to the nchar \u0F0F */
CREATE FUNCTION dbo.GetNcharHex(@input nvarchar(max))
RETURNS nchar(1)
AS
BEGIN
RETURN NCHAR(dbo.GetHex(@input))
END
GO
/* Converts '123' to the nchar \u7B (decimal 123 in hex) */
CREATE FUNCTION dbo.GetNcharDec(@input nvarchar(max))
RETURNS nchar(1)
AS
BEGIN
RETURN NCHAR(CONVERT(int, @input))
END
GO
/* Replaces a group of @n nchars inside @prefix and @suffix with the of the @n nchars interpreted as hexadecimal; if @prefix = '&#x', @suffix = ';', and @n = 2 then it will replace all 2 digit hex XML entities: e.g. 'R' with 'R' */
CREATE FUNCTION dbo.ReplaceNGroupsHex(@input nvarchar(max), @prefix nvarchar(max), @group nvarchar(max), @suffix nvarchar(max), @n int = 2)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE @pattern nvarchar(max), @location int
SET @pattern = '%' + @prefix + REPLICATE(@group, @n) + @suffix + '%'
WHILE(1=1)
BEGIN
SET @location = PATINDEX(@pattern, @input)
IF (@location = 0) BREAK
SET @input = REPLACE(@input, SUBSTRING(@input, @location, LEN(@prefix) + @n + LEN(@suffix)), dbo.GetNcharHex(SUBSTRING(@input, @location + LEN(@prefix), @n)))
END
RETURN @input
END
GO
/* Replaces a group of @n nchars inside @prefix and @suffix with the of the @n nchars interpreted as decimal; if @prefix = '&#', @suffix = ';', and @n = 2 then it will replace all 2 digit decimal XML entities: e.g. '!' with '!' */
CREATE FUNCTION dbo.ReplaceNGroupsDec(@input nvarchar(max), @prefix nvarchar(max), @group nvarchar(max), @suffix nvarchar(max), @n int = 1)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE @pattern nvarchar(max), @location int
SET @pattern = '%' + @prefix + REPLICATE(@group, @n) + @suffix + '%'
WHILE(1=1)
BEGIN
SET @location = PATINDEX(@pattern, @input)
IF (@location = 0) BREAK
SET @input = REPLACE(@input, SUBSTRING(@input, @location, LEN(@prefix) + @n + LEN(@suffix)), dbo.GetNcharDec(SUBSTRING(@input, @location + LEN(@prefix), @n)))
END
RETURN @input
END
GO
/* Replaces all Hexadecimal XML entities of @n length or lower (grouped in pairs); @n = 4 will result in 'AA' => 'AA', and @n = 2 will result in 'AA' => 'AA' */
CREATE FUNCTION dbo.ReplaceHexEntities(@input nvarchar(max), @n int = 4)
RETURNS nvarchar(max)
AS
BEGIN
WHILE(@n > 0)
BEGIN
SET @input = dbo.ReplaceNGroupsHex(@input, N'&#x', '[0-9,A-F,a-f]', ';', @n)
SET @n = @n - 2
END
RETURN @input
END
GO
/* Replaces all Decimal XML entities of @n length or lower; @n = 5 will result in '↑J' => '↑J', and @n = 2 will result in 'vF' => 'vF' */
CREATE FUNCTION dbo.ReplaceDecEntities(@input nvarchar(max), @n int = 5)
RETURNS nvarchar(max)
AS
BEGIN
WHILE(@n > 0)
BEGIN
SET @input = dbo.ReplaceNGroupsDec(@input, N'&#', '[0-9]', ';', @n)
SET @n = @n - 1
END
RETURN @input
END
GO
/* Replaces all XML Entities up to: \uFFFF (Hex) and \u1869F (Decimal 99999) */
CREATE FUNCTION dbo.ReplaceEntities(@input nvarchar(max))
RETURNS nvarchar(max)
AS
BEGIN
SET @input = dbo.ReplaceHexEntities(@input, DEFAULT)
SET @input = dbo.ReplaceDecEntities(@input, DEFAULT)
RETURN @input
END
GO
SELECT dbo.GetHex('00FFAA')
SELECT dbo.ReplaceNGroupsDec(N'zB', '&#', '[0-9]', ';', 5)
SELECT dbo.ReplaceHexEntities(N'zB', DEFAULT)
SELECT dbo.ReplaceDecEntities(N'zBE', DEFAULT)
SELECT dbo.ReplaceEntities(N'zBE'), LEN(dbo.ReplaceEntities(N'zBE'))