The original encoding is probably UTF-8.
So you can use a UDF to make it more readable.
The UDF is based on this old answer from Xabi
CREATE FUNCTION Utf8ToUcs(@src varchar(MAX))
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @i INT
, @res NVARCHAR(MAX)=@src
, @pi VARCHAR(18);
SELECT
@pi = '%[à-ï][€-¿][€-¿]%'
, @i = PATINDEX(@pi, @src COLLATE Latin1_General_BIN);
WHILE @i > 0
SELECT
@res = STUFF(@res,@i,3, NCHAR(
((ASCII(SUBSTRING(@src,@i,1))&31)*4096)
+((ASCII(SUBSTRING(@src,@i+1,1))&63)*64)
+(ASCII(SUBSTRING(@src,@i+2,1))&63)))
, @src = STUFF(@src,@i,3,'.')
, @i = PATINDEX(@pi,@src COLLATE Latin1_General_BIN);
SELECT
@pi = '%[Â-ß][€-¿]%'
, @i = PATINDEX(@pi,@src COLLATE Latin1_General_BIN);
WHILE @i > 0
SELECT
@res = STUFF(@res,@i,2,NCHAR(
((ASCII(SUBSTRING(@src,@i,1))&31)*64)
+(ASCII(SUBSTRING(@src,@i+1,1))&63)))
, @src = STUFF(@src,@i,2,'.')
, @i = PATINDEX(@pi,@src COLLATE Latin1_General_BIN);
RETURN @res;
END;
select col
, dbo.Utf8ToUcs(col) as col_ucs
from test
col |
col_ucs |
Alecta Pensionsförsäkring, ömsesidigt |
Alecta Pensionsförsäkring, ömsesidigt |
Aelefónica, S.A. |
Aelefónica, S.A. |
Áæâàéêèeeeëöu |
Áæâàéêèeeeëöu |
And then you can also remove the accents.
Here's it's done via a collation. (solution taken from here)
select col
, cast(dbo.Utf8ToUcs(col) as varchar(100)) Collate SQL_Latin1_General_CP1253_CI_AI as col_cp1253
from test
col |
col_cp1253 |
Alecta Pensionsförsäkring, ömsesidigt |
Alecta Pensionsforsakring, omsesidigt |
Aelefónica, S.A. |
Aelefonica, S.A. |
Áæâàéêèeeeëöu |
A?aaeeeeeeeou |
Demo on db<>fiddle here