I have one email column that is having values like this 'claudio.passerini@uni.re.dit.mn.us'
. I want to take two characters strings between dot (to check for the countries and states codes).
i want result like this
col1=re,mn,us
I have one email column that is having values like this 'claudio.passerini@uni.re.dit.mn.us'
. I want to take two characters strings between dot (to check for the countries and states codes).
i want result like this
col1=re,mn,us
You can create your own function to split strings.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[fnSplitString]
(
@string NVARCHAR(MAX),
@delimiter CHAR(1)
)
RETURNS @output TABLE(splitdata NVARCHAR(MAX)
)
BEGIN
set @delimiter = coalesce(@delimiter, dbo.cSeparador());
DECLARE @start INT, @end INT
SELECT @start = 1, @end = CHARINDEX(@delimiter, @string)
WHILE @start < LEN(@string) + 1 BEGIN
IF @end = 0
SET @end = LEN(@string) + 1
INSERT INTO @output (splitdata)
VALUES(SUBSTRING(@string, @start, @end - @start))
SET @start = @end + 1
SET @end = CHARINDEX(@delimiter, @string, @start)
END
RETURN
END
Using this function you can get all your country&state codes :
select splitdata from dbo.fnSplitString('claudio.passerini@uni.re.dit.mn.us', '.')
where len(splitdata) = 2
You can modify that query to concatenate the result on a single string :
SELECT
STUFF((SELECT ',' + splitdata
FROM dbo.fnSplitString('claudio.passerini@uni.re.dit.mn.us', '.')
WHERE len(splitdata) = 2
FOR XML PATH('')), 1, 1, '')
Here is how you put it into an scalar function :
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[fnCountryCodes](@email nvarchar(max)) returns nvarchar(max)
AS
BEGIN
RETURN (SELECT
STUFF((SELECT ',' + splitdata
FROM dbo.fnSplitString(@email, '.')
WHERE len(splitdata) = 2
FOR XML PATH('')), 1, 1, ''));
END
You call it like this :
select dbo.fnCountryCodes('claudio.passerini@uni.re.dit.mn.us')
Alternatively you can create a table-valued function that returns all the 2 characters long substrings from the domain of a mail address :
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[fnCountryCodes] (@email NVARCHAR(MAX))
RETURNS @output TABLE(subdomain1 nvarchar(2), subdomain2 nvarchar(2), subdomain3 nvarchar(2), subdomain4 nvarchar(2), subdomain5 nvarchar(2))
as
BEGIN
DECLARE @subdomain1 nvarchar(2);
DECLARE @subdomain2 nvarchar(2);
DECLARE @subdomain3 nvarchar(2);
DECLARE @subdomain4 nvarchar(2);
DECLARE @subdomain5 nvarchar(2);
DECLARE CURSOR_SUBDOMAINS CURSOR FOR select splitdata from dbo.fnSplitString(@email, '.') where len(splitdata) = 2;
OPEN CURSOR_SUBDOMAINS;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO @subdomain1;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO @subdomain2;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO @subdomain3;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO @subdomain4;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO @subdomain5;
CLOSE CURSOR_SUBDOMAINS;
DEALLOCATE CURSOR_SUBDOMAINS;
INSERT INTO @output (subdomain1, subdomain2, subdomain3, subdomain4, subdomain5)
values (@subdomain1, @subdomain2, @subdomain3, @subdomain4, @subdomain5)
RETURN
END
You use it like that :
select * from dbo.fnCountryCodes('claudio.passerini@uni.re.dit.mn.us')
If yo want individual columns you will need to pivot your data after splitting out your strings with a table valued function as per Marc's answer. If you are happy having them in rows, you can just use the select
statement inside the brackets.
declare @t table (Email nvarchar(50));
insert into @t values('claudio.passerini@uni.re.dit.mn.us'),('claudio.passerini@uni.ry.dit.mn.urg'),('claudio.passerini@uni.rn.dit.mn.uk');
select Email
,[1]
,[2]
,[3]
,[4]
,[5]
,[6]
from(
select t.Email
,s.Item
,row_number() over (partition by t.Email order by s.Item) as rn
from @t t
cross apply dbo.DelimitedSplit8K(t.Email,'.') s
where len(s.Item) = 2
) a
pivot
(
max(Item) for rn in([1],[2],[3],[4],[5],[6])
) pvt
http://www.sqlservercentral.com/articles/Tally+Table/72993/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
Solution
To do exactly what you've asked; i.e. pull back just the 2 char codes from within the email address's domain, you could use a function such as this:
create function dbo.fn_Get2AlphaCharCodesFromEmail
(
@email nvarchar(254) --max length of an email is 254: http://stackoverflow.com/questions/386294/what-is-the-maximum-length-of-a-valid-email-address
) returns nvarchar(254)
as
begin
declare @result nvarchar(254) = null
, @maxLen int = 254
;with cte(i, remainder,result) as
(
select cast(0 as int)
, cast('.' + substring(@email,charindex('@',@email)+1,@maxLen) + '.' as nvarchar(254))
, cast(null as nvarchar(254))
union all
select cast(i+1 as int)
, cast(substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+3,@maxLen)as nvarchar(254))
, cast(coalesce(result + ',','') + substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+1,2) as nvarchar(254))
from cte
where patindex('%.[A-Z][A-Z].%',remainder) > 0
)
select top 1 @result = result from cte order by i desc;
Return @result;
end
go
--demo
select dbo.fn_Get2AlphaCharCodesFromEmail ('claudio.passerini@uni.re.dit.mn.us')
--returns: re,mn,us
select dbo.fn_Get2AlphaCharCodesFromEmail ('claudio.passerini@uni.123.dit.mnx.usx')
--returns: NULL
Explanation
Create a function called fn_Get2AlphaCharCodesFromEmail
in the schema dbo
which takes a single parameter, @email
which is a string of up to 254 characters, and returns a string of up to 254 characters.
create function dbo.fn_Get2AlphaCharCodesFromEmail
(
@email nvarchar(254)
) returns nvarchar(254)
as
begin
--... code that does the work goes here
end
declare the variables we'll be using later on.
@result
holds the value we'll be returning from the function@maxLen
records the maximum length of an email; this makes it slightly easier should this length ever need to change; though not entirely simple since we have to specify the 254 length in our column & variable definitions later on anyway.
declare @result nvarchar(254) = null , @maxLen int = 254
Now comes the interesting bit. We create a common table expression with 3 columns:
i
is used to record which iteration each record was produced in; the highest value of i
is the last record to be created.remainder
is used to hold the yet-to-be processed characters from the email.result
is used to record the 2 char codes; each new row adds another value to this column's comma separated values.
;with cte(i, remainder,result) as ( --code to iterate through the email string, breaking it down, goes here )
this gives us our first row in the cte "table".
i
(i.e. the first column) with value 0
to say that this is our first row (we could choose pretty much any value here; it doesn't matterremainder
(i.e. 2nd column) as the part of the email address which follows the @
character; i.e. the email's domain.result
(i.e. 3rd column) as null
; as we've not yet found a result (i.e. a 2 char string within the email's domain)there is no from
component as we're just getting data from the @email
variable; no tables/views/etc are required.
select cast(0 as int)
, cast('.' + substring(@email,charindex('@',@email)+1,@maxLen) + '.' as nvarchar(254))
, cast(null as nvarchar(254))
union all
is used to combing the first result(s) with the results of the next (recurring) statement. NB: The CTE code before this statement is run once to give initial values; the code after is run once for each new set of rows generated.
union all
The recurring code in the CTE is applied to new rows in the CTE until no new rows are generated.
i
takes the value of the previous iteration's row's i
incremented by 1.
select cast(i+1 as int)
remainder
takes the previous iteration's remainder, and removes everything before (and including) the next 2 character code (result).
patindex('%.[A-Z][A-Z].%',remainder)
returns a number giving the location of the a string containing a dot followed by 2 letters followed by a dot, occurring anywhere in the input string
, cast(substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+3,@maxLen)as nvarchar(254))
result
uses the same logic as remainder
, only it takes the 2 characters found, rather than everything after them. These characters are added on to the end of the previous iteartion's row's result value, separated by a comma.
, cast(coalesce(result + ',','') + substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+1,2) as nvarchar(254))
the from cte
part just says that we're referencing the same "table" we're creating; i.e. this is how the recursion occurs
from cte
the where statement is used to prevent infinite recursion; i.e. once there are no more 2 char codes left in the remainder, stop looking.
where patindex('%.[A-Z][A-Z].%',remainder) > 0
Once we've found all the 2 char codes in the string, we know that the last row's result will contain the complete set; as such we assign this single row's value to the @result
variable.
select top 1 @result = result
the from statement shows we're referencing the data we created in our with cte
statement
from cte
the order by
is used to determine which record comes first (i.e. which record is the top 1
record). We want it to be the last row generated by the CTE. Since we've been incrementing i
by 1 each time, this last record will have the highest value of i
, so by sorting by i desc
(descending) that last generated row will be the row we get.
order by i desc;
Finally, we return the result generated above.
Return @result;
Alternative Approach
However, if you're trying to extract information from your emails, I'd recommend an alternate approach... have a list of values that you're looking for, and compare your email with that, without having to break apart the email address (beyond splitting on the @ to ensure you're only checking the email's domain).
declare @countryCodes table (code nchar(2), name nvarchar(64)) --you'd use a real table for this; I'm just using a table variable so this demo's throwaway code
insert into @countryCodes (code, name)
values
('es','Spain')
,('fr','France')
,('uk','United Kingdom')
,('us','USA')
--etc.
--check a single mail
declare @mail nvarchar(256) = 'claudio.passerini@uni.re.dit.mn.us'
if exists (select top 1 1 from @countryCodes where '.' + substring(@mail,charindex('@',@mail)+1,256) + '.' like '%.' + code + '.%')
begin
select name from @countryCodes where '.' + substring(@mail,charindex('@',@mail)+1,256) + '.' like '%.' + code + '.%'
end
else
begin
select 'no results found'
end
--check a bunch of mails
declare @emailsToCheck table (email nvarchar(256))
insert into @emailsToCheck (email)
values
('claudio.passerini@uni.re.dit.mn.us')
,('someone@someplace.co.uk')
,('cant.see.me@never.never.land')
,('some.fr.address.hidden@france.not.in.this.bit')
select e.email, c.name
from @emailsToCheck e
left outer join @countryCodes c
on '.' + substring(email,charindex('@',email)+1,256) + '.' like '%.' + code + '.%'
order by e.email, c.name