UPDATED ON 20190419 TO DEMO A SOLUTION THAT DOES NOT REQUIRE YOU TO CREATE AN NGrams8K FUNCTION
First grab a copy of NGrams8K
Remove "bad" characters:
DECLARE @string VARCHAR(1000) = 'øsmøeøø' COLLATE Latin1_General_100_BIN2;
SELECT
(
SELECT ng.token+''
FROM dbo.ngrams8k(@string,1) AS ng
WHERE ASCII(ng.token) < 127
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
Replace "bad" characters with spaces:
SELECT
(
SELECT CASE WHEN ASCII(ng.token) < 127 THEN ng.token ELSE ' ' END+''
FROM dbo.ngrams8k(@string,1) AS ng
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
... and for the 2017++ version if that's what you're running and you want cleaner code:
-- Remove bad characters
SELECT STRING_AGG(ng.token,'') WITHIN GROUP (ORDER BY ng.position)
FROM dbo.ngrams8k(@string,1) AS ng
WHERE ASCII(ng.token) < 127;
-- Replace bad characters
SELECT STRING_AGG(IIF(ASCII(ng.token) < 127,ng.token,' '),'') WITHIN GROUP (ORDER BY ng.position)
FROM dbo.ngrams8k(@string,1) AS ng;
UPDATED SOLUTIONS USING NGrams8K LOGIC TRANSFORMED INTO A SUBQUERY
Note my comments inside the query...
DECLARE @string VARCHAR(1000) = 'øsmøeøø' COLLATE Latin1_General_100_BIN2, @N INT = 1;
-- Remove bad characters
SELECT
(
SELECT ng.token+''
FROM
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
WHERE ASCII(ng.token) < 127
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
-- Replace bad characters
SELECT
(
SELECT CASE WHEN ASCII(ng.token) < 127 THEN ng.token ELSE ' ' END+''
FROM --dbo.ngrams8k(@string,1) AS ng
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
-- Remove bad characters using STRING_AGG (SQL 2017++)
SELECT STRING_AGG(ng.token,'') WITHIN GROUP (ORDER BY ng.position)
FROM --dbo.ngrams8k(@string,1) AS ng
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
WHERE ASCII(ng.token) < 127;
-- Replace bad characters using STRING_AGG (SQL 2017++)
SELECT STRING_AGG(IIF(ASCII(ng.token) < 127,ng.token,' '),'') WITHIN GROUP (ORDER BY ng.position)
FROM --dbo.ngrams8k(@string,1) AS ng
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng; -- dbo.NGrams8K as an inline function