Using Jeff Moden's 8K Tally-Ho CSV splitter
create function [dbo].[DelimitedSplit8K](
@pString varchar(8000) --WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
,@pDelimiter char(1)
)
returns table with schemabinding as
return
with E1(N) AS (
select N from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1) )E1(N)
), --10E+1 or 10 rows
E2(N) as (select 1 from E1 a cross join E1 b), --10E+2 or 100 rows
E4(N) as (select 1 from E2 a cross join E2 b), --10E+4 or 10,000 rows max
cteTally(N) as (
select top (isnull(datalength(@pString),0))
row_number() over (order by (select null))
from E4
),
start(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
select 1 union all
select t.N+1 from cteTally t where substring(@pString,t.N,1) = @pDelimiter
),
Len(N1,L1) as(--==== Return start and length (for use in substring)
select s.N1,
isnull(nullif(charindex(@pDelimiter,@pString,s.N1),0) - s.N1,8000)
from start s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
select
ItemNumber = row_number() over(order by l.N1),
Item = substring(@pString, l.N1, l.L1)
from Len l
;
go
and sample data as suggested in a CTE
with
data as (
select id,keyword from (values
--id keyword
(1, 'my name is john')
,(2, 'im mike')
,(3, 'david cameron')
,(4, 'electra')
,(5, 'goliath is my name')
,(6, 'michael jordan')
,(7, 'michael jordan')
)data(id,keyword)
),
this SQL
strings as (
select
data.id
,keyword
,s.Item
,s.ItemNumber
from data
cross apply dbo.DelimitedSplit8K(data.keyword,' ') as s
)
select
strings.id
from strings
where len(strings.item) > 0 -- remove zero-length string and thus doubled-up delimters
group by strings.id
having count(itemnumber) <=3;
yields as desired:
id
-----------
2
3
4
6
7
Note that the doubled-up spaces in id=7 are compressed by the removal of zero-length strings.