I think the best bet you have here is to maintain a table of misspellings that you want to find and replace within your data. This can either be done with a static reference table (recommended) or if you can't add tables, then a table variable within a procedure will suffice.
With this list of individual words, you can then loop through your data with a recursive cte to replace each misspelling in turn and return the final result. With this approach, you only need to maintain the list of individual words and not entire sentences as per some of the other answers proposed. You will need to insert your data into a temp table so as to retain the row_number
s and count
s in the same order, in case you have duplicate rows.
Output from query below:
TextValue | NewTextValue
----------------------+----------------------
gov primery skool | govt primary school
Not so incorect name | Not so incorrect name
-- Create data to use.
declare @Data table (TextValue nvarchar(50));
declare @Lookup table (IncorrectValue nvarchar(50),CorrectValue nvarchar(50));
insert into @Data values
('gov primery skool')
,('Not so incorect name');
insert into @Lookup values
('gov','govt')
,('primery','primary')
,('skool','school')
,('incorect','incorrect');
-- Actual query.
if object_id('tempdb..#DataRN') is not null
drop table #DataRN;
create table #DataRN (rn int
,cnt int
,cntrn int
,TextValue nvarchar(50)
,IncorrectValue nvarchar(50)
,CorrectValue nvarchar(50)
);
-- Create temp table that holds source data as well as some Row Numbers and Counts by TextValue to determine what order to retreive rows and to know which is the final row to pull out at the end.
insert into #DataRN
select row_number() over (order by d.TextValue, l.IncorrectValue, l.CorrectValue) as rn -- Provides order to select rows in cte below.
,row_number() over (partition by d.TextValue
order by d.TextValue, l.IncorrectValue, l.CorrectValue) as cntrn -- Provides ordering within each TextValue, to be compared with
,count(*) over (partition by d.TextValue) as cnt -- The total number of rows returned to complete all REPLACEs on the TextValue to find the last, and therefore complete, NewTextValue in cte.
,d.TextValue
,l.IncorrectValue
,l.CorrectValue
from @Data d
left join @Lookup l
on(d.TextValue like '%' + l.IncorrectValue + '%');
-- Recursive cte to apply each find and replace in order.
with cte as
(
select d.rn
,d.cnt
,d.cntrn
,d.TextValue
,cast(replace(d.TextValue,d.IncorrectValue,d.CorrectValue) as nvarchar(50)) as NewTextValue
from #DataRN d
where rn = 1
union all
select d.rn
,d.cnt
,d.cntrn
,d.TextValue
,cast(replace(case when d.TextValue = c.TextValue
then c.NewTextValue
else d.TextValue
end
,d.IncorrectValue
,d.CorrectValue
) as nvarchar(50)) as NewTextValue
from #DataRN d
inner join cte c
on(d.rn = c.rn+1)
)
select TextValue
,NewTextValue
from cte
where cnt = cntrn -- Where these two values are the same, we know that is the finished product.
order by 1;
if object_id('tempdb..#DataRN') is not null
drop table #DataRN;