0

I have the following data in a table in SQL Server 2008 R2:

 ID        Code    StartDate     EndDate
 10001     3       2014-07-25    2014-07-28
 10001     3       2014-07-29    2014-10-06
 10001     3       2014-10-07    2014-10-10
 10001     1       2014-10-11    2014-10-31
 10001     1       2014-11-01    2014-11-15
 10001     3       2014-11-16    2014-11-25
 10001     3       2014-11-26    NULL
 20002     3       2014-07-25    2014-07-28
 20002     3       2014-07-29    2014-10-06
 20002     3       2014-10-07    NULL
 30003     3       2014-07-25    2014-11-13
 30003     3       2014-11-14    2014-11-24
 30003     2       2014-11-25    NULL

I want to "collapse" any records with the same Code, and adjacent EndDate and StartDate. The results should be:

 ID        Code    StartDate     EndDate
 10001     3       2014-07-25    2014-10-10    
 10001     1       2014-10-11    2014-11-15
 10001     3       2014-11-16    NULL
 20002     3       2014-07-25    NULL
 30003     3       2014-07-25    2014-11-24
 30003     2       2014-11-25    NULL

I've been trying to use various sub-queries and the ROW_NUMBER() function, but just can't get it to work. I suspect this would be easily done with a CTE, but I haven't been able to wrap my head around how those work in order to try it here. Any ideas?

DakotaPaul
  • 195
  • 15
  • Interesting! Have you had a look at this blog on your search: http://sqlblogcasts.com/blogs/sqlandthelike/archive/2009/08/27/sql-and-contiguous-data-ranges.aspx – Peter Healy Dec 31 '14 at 00:39
  • If you do have any option to use SQL 2012 or later, this is fairly easy. – Steve Ford Dec 31 '14 at 11:17

4 Answers4

0

Try this,

CREATE TABLE #TEMP
(
    ID INT,
    CODE INT,
    STARTDATE DATE,
    ENDDATE DATE
)

INSERT INTO #TEMP VALUES
(10001,3,'2014-07-25','2014-07-28'),
(10001,3,'2014-07-29','2014-10-06'),
(10001,3,'2014-10-07','2014-10-10'),
(10001,1,'2014-10-11','2014-10-31'),
(10001,1,'2014-11-01','2014-11-15'),
(10001,3,'2014-11-16','2014-11-25'),
(10001,3,'2014-11-26',NULL),
(20002,3,'2014-07-25','2014-07-28'),
(20002,3,'2014-07-29','2014-10-06'),
(20002,3,'2014-10-07',NULL),
(30003,3,'2014-07-25','2014-11-13'),
(30003,3,'2014-11-14','2014-11-24'),
(30003,2,'2014-11-25',NULL)


SELECT T1.ID,T1.CODE,T1.STARTDATE,A.ENDDATE FROM (SELECT L.ID,L.CODE,MIN(STARTDATE) AS STARTDATE,DIFF FROM (SELECT ID,
       CODE,
       STARTDATE,
       ENDDATE,
       IsNull(Lag(CODE, 2)
                OVER (
                  ORDER BY ID, STARTDATE, ENDDATE), CODE) AS T_LAG,
       CODE - IsNull(Lag(CODE, 2)
                       OVER (
                         ORDER BY ID, STARTDATE, ENDDATE), CODE) AS DIFF
FROM   #TEMP ) L
GROUP BY L.ID,L.CODE,DIFF) T1
CROSS APPLY(
    SELECT ID,CODE,ENDDATE,DIFF FROM (SELECT ID,CODE,ENDDATE,DIFF,ROW_NUMBER() OVER (PARTITION BY ID,CODE,DIFF ORDER BY ID,CODE,STARTDATE DESC,ENDDATE DESC) AS T_R FROM (SELECT ID,
       CODE,
       STARTDATE,
       ENDDATE,
       IsNull(Lag(CODE, 2)
                OVER (
                  ORDER BY ID, STARTDATE, ENDDATE), CODE) AS T_LAG,
       CODE - IsNull(Lag(CODE, 2)
                       OVER (
                         ORDER BY ID, STARTDATE, ENDDATE), CODE) AS DIFF
FROM   #TEMP ) A) A
WHERE T_R=1 AND ID=T1.ID AND CODE=T1.CODE AND DIFF=T1.DIFF)A
ORDER BY T1.ID,T1.STARTDATE
PP006
  • 681
  • 7
  • 17
0

Try this (I'm replicating SQL 2012 Lead And Lag functionality using Row_Number() in 2008):

SQL Fiddle

MS SQL Server 2008 Schema Setup:

CREATE TABLE EventLog
(
ID Int,
Code tinyint,
StartDate Date,
EndDate Date Null
)

INSERT INTO EventLog
Values
(10001,     3,       '2014-07-25',    '2014-07-28'),
(10001,     3,       '2014-07-29',    '2014-10-06'),
(10001,     3,       '2014-10-07',    '2014-10-10'),
(10001,     1,       '2014-10-11',    '2014-10-31'),
(10001,     1,       '2014-11-01',    '2014-11-15'),
(10001,     3,       '2014-11-16',    '2014-11-25'),
(10001,     3,       '2014-11-26',     NULL),
(20002,     3,       '2014-07-25',    '2014-07-28'),
(20002,     3,       '2014-07-29',    '2014-10-06'),
(20002,     3,       '2014-10-07',     NULL),
(30003,     3,       '2014-07-25',    '2014-11-13'),
(30003,     3,       '2014-11-14',    '2014-11-24'),
(30003,     2,       '2014-11-25',    NULL)

Query 1:

WITH CTE
AS
(
    SELECT ID,
        Code,
        StartDate,
        EndDate,
        ROW_NUMBER() OVER (PARTITION  BY Id, Code ORDER BY Id, Code, StartDate) As RN
    FROM EventLog
),
CTE2
AS
(
    SELECT CTE.Id, CTE.Code, CTE.StartDate, CTE.EndDate, 
        CASE WHEN DATEDIFF(d, LAG.EndDate, CTE.StartDate) = 1 
             THEN Lag.EndDate 
             ELSE NULL 
        END AS PrevEndDate, 
        CASE WHEN DateDiff(d, LEAD.StartDate, CTE.EndDate) = -1 
             THEN Lead.StartDate 
             ELSE NULL 
        END As NextStartDate
    FROM CTE
    LEFT OUTER JOIN CTE AS Lag
        ON CTE.ID = Lag.ID AND CTE.Code = Lag.Code AND Lag.Rn = CTE.RN - 1
    LEFT OUTER JOIN CTE AS Lead
        ON CTE.ID = Lead.ID AND CTE.Code = Lead.Code AND Lead.Rn = CTE.RN + 1
),
StartAndEnd
As
(
    SELECT  ID, 
            Code, 
            StartDate, 
            EndDate,
            PrevEndDate,
            NextStartDate,
            ROW_NUMBER() OVER (PARTITION BY Id, Code ORDER BY ID, Code, StartDate) As RN
    FROM CTE2
    WHERE (PrevEndDate IS NULL Or NextStartDate IS NULL)
)
SELECT S.ID, s.Code, S.StartDate, E.EndDate
FROM StartAndEnd as S
LEFT JOIN StartAndEnd E
    ON S.ID = E.ID AND S.Code = E.Code AND E.RN = S.Rn + 1
WHERE S.PrevEndDate Is Null
ORDER By s.Id, S.StartDate

Results:

|    ID | CODE |  STARTDATE |    ENDDATE |
|-------|------|------------|------------|
| 10001 |    3 | 2014-07-25 | 2014-10-10 |
| 10001 |    1 | 2014-10-11 | 2014-11-15 |
| 10001 |    3 | 2014-11-16 |     (null) |
| 20002 |    3 | 2014-07-25 |     (null) |
| 30003 |    3 | 2014-07-25 | 2014-11-24 |
| 30003 |    2 | 2014-11-25 |     (null) |
Steve Ford
  • 7,433
  • 19
  • 40
0

Since your ranges are continuous, the problem essentially becomes a one. © Andriy M

Thanks to Steve Ford for the table:

declare @EventLog table
(
    Id          int,
    Code        tinyint,
    StartDate   date,
    EndDate     date null
)

insert into @EventLog
values
(10001,     3,       '2014-07-25',    '2014-07-28'),
(10001,     3,       '2014-07-29',    '2014-10-06'),
(10001,     3,       '2014-10-07',    '2014-10-10'),
(10001,     1,       '2014-10-11',    '2014-10-31'),
(10001,     1,       '2014-11-01',    '2014-11-15'),
(10001,     3,       '2014-11-16',    '2014-11-25'),
(10001,     3,       '2014-11-26',     null),
(20002,     3,       '2014-07-25',    '2014-07-28'),
(20002,     3,       '2014-07-29',    '2014-10-06'),
(20002,     3,       '2014-10-07',     null),
(30003,     3,       '2014-07-25',    '2014-11-13'),
(30003,     3,       '2014-11-14',    '2014-11-24'),
(30003,     2,       '2014-11-25',    null);

Thanks Andriy M for the solution:

declare @MaxDate date = '9999-12-31';

with cte as 
(
    select *,
      g = row_number() over (partition by Id        order by StartDate)
        - row_number() over (partition by Id, Code  order by StartDate)   
    from @EventLog
)
select
    Id, 
    Code,
    StartDate   = min(StartDate),
    EndDate     = nullif(max(isnull(EndDate, @MaxDate)), @MaxDate) 
from cte
group by
    Id, Code, g;
Community
  • 1
  • 1
Vadim Loboda
  • 2,431
  • 27
  • 44
  • Thank you, lobodava. I apologize if this breaks StackExchange etiquette, but what's the advantage of using a CTE for this rather than a subquery? – DakotaPaul Jan 02 '15 at 17:08
  • The CTE is mostly the syntactic sugar and provides more cleaner and readable SQL code. Besides that CTE allows us to use a subquery many times and make recursive queries. The question about CTE-SubQuery difference is a very popular one: http://stackoverflow.com/questions/706972/difference-between-cte-and-subquery – Vadim Loboda Jan 03 '15 at 19:40
0
;WITH StartDates
AS(
   Select e1.ID, e1.Code, e1.StartDate, ROW_NUMBER() OVER (Order By e1.ID asc) as RowNumber
   From @EventLog e1
   LEFT JOIN @EventLog e2 ON e1.Code = e2.Code and e2.EndDate = DATEADD(day,-1,e1.StartDate)
   WHERE e2.Id is null
),
EndDates as(
Select e1.ID, e1.Code, e1.EndDate, ROW_NUMBER() OVER (Order by e1.ID asc) as RowNumber
FROM @EventLog e1
LEFT JOIN @EventLog e2 ON e1.Code = e2.Code and e2.StartDate = DATEADD(day,1,e1.EndDate)
WHERE e2.Id is null
)

Select s.ID, s.Code, s.StartDate, e.EndDate
FROM StartDates s
JOIN EndDates e ON s.Code = e.Code and s.RowNumber = e.RowNumber
Krishnaveni B
  • 103
  • 1
  • 7