0

I am trying to count orders over a 24 hours sliding window. I have a 'detetime' field and I'm calculating the 24 hours window aggregating at the minute level. It should re-start counting every time the order time between two consecutive orders is over 1440 minutes or when the running time of consecutive orders is over 1440 minutes.


Environment is SQL server 2016, I can create Temp tables but no physical tables and no memory-optimized objects (I guess anything working on 2012+ should work).
I tried an inner join on the same table and tested with recursive CTEs, ROW_NUMBER etc. but the issue is that there is never a set number of rows for the 24 hours window and the base time from which to calculate the start of the period changes. The only constant I have is the 24 hours time span. Tried the following:
https://www.red-gate.com/simple-talk/sql/t-sql-programming/calculating-values-within-a-rolling-window-in-transact-sql/
Calculate running total / running balance

Cross apply seems to be working for the most part but in some instances - when calculating the running 24 hours window - it isn't. I tried changing the datetime conditions in the WHERE clause in many ways but I still can't figure out how to get it to work correctly.
I thought about creating a reset event at the 24 hours mark as showed here https://blog.jooq.org/2015/05/12/use-this-neat-window-function-trick-to-calculate-time-differences-in-a-time-series/ but at this point my brain is melting and I can't even get the logic straight.

DROP TABLE IF EXISTS #Data

CREATE TABLE #Data
(
    START_TIME          DATETIME 
    ,ORDER_ID           NUMERIC(18,0)
    ,PROD_ID            NUMERIC(18,0)
    ,ACC_ID             NUMERIC(18,0)
);

INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 198151606, 58666, 1601554883
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873061,58666,1601554883
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269222,58666,1601554883
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735393,58666,1601554883
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,58666,1601554883
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467555,58666,2300231016
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583916,58666,2300231016
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154497,58666,2300231016
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154818,58666,2300231016
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057919,58666,2300231016
UNION ALL SELECT '2018-07-05 02:09:00.000',1200205808,58666,2300231016
UNION ALL SELECT '2018-07-05 04:15:00.000',2200205814,58666,2300231016
UNION ALL SELECT '2018-07-05 17:23:00.000',3200370070,58666,2300231016
UNION ALL SELECT '2018-07-05 18:07:00.000',4200370093,58666,2300231016
UNION ALL SELECT '2018-07-06 20:15:00.000',5200571962,58666,2300231016
UNION ALL SELECT '2018-07-07 07:45:00.000',6200571987,58666,2300231016
UNION ALL SELECT '2018-07-07 12:13:00.000',7200571993,58666,2300231016
UNION ALL SELECT '2018-07-09 18:29:00.000',8200939551,58666,2300231016
UNION ALL SELECT '2018-07-09 21:05:00.000',9200939552,58666,2300231016
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107311,58666,2300231016
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016382,58669,2300231016
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181363,58669,2300231016
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181374,58669,2300231016
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583925,58669,2300231016
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583946,58669,2300231016
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231747,58669,2300231016
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293678,58669,2300231016
UNION ALL SELECT '2018-07-03 14:22:00.000',200029380,58669,2300231016
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057789,58669,2300231016
UNION ALL SELECT '2018-07-04 21:00:00.000',1200205781,58669,2300231016
UNION ALL SELECT '2018-07-05 15:12:00.000',2200254833,58669,2300231016
UNION ALL SELECT '2018-07-05 17:52:00.000',3200370071,58669,2300231016
UNION ALL SELECT '2018-07-09 22:30:00.000',4200939553,58669,2300231016
UNION ALL SELECT '2018-07-09 23:23:00.000',5200939566,58669,2300231016
UNION ALL SELECT '2018-07-30 17:45:00.000',6204364207,58666,2300231016
UNION ALL SELECT '2018-07-30 23:30:00.000',7204364211,58666,2300231016


;WITH TimeBetween AS(
SELECT  
    ACC_ID
    ,PROD_ID
    ,ORDER_ID
    ,START_TIME
    ,TIME_BETWEEN_ORDERS            = COALESCE(CASE WHEN DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
                                                                                ORDER BY START_TIME), START_TIME) >= 1440
                                            THEN 0 
                                            ELSE DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
                                                                                        ORDER BY START_TIME), START_TIME)
                                        END, 0)

FROM #Data
)

SELECT 
    TimeBetween.ACC_ID
    ,TimeBetween.PROD_ID
    ,TimeBetween.ORDER_ID
    ,TimeBetween.START_TIME
    ,TIME_BETWEEN_ORDERS

--Not working correctly, repeats the previous time at the end of the window when it should be 0.
    ,RUNNING_TIME_BETWEEN_ORDERS        = SUM(TIME_BETWEEN_ORDERS) OVER(PARTITION BY ACC_ID, PROD_ID ORDER BY START_TIME)

    ,Running24h.*

FROM TimeBetween

    CROSS APPLY(SELECT TOP 1
                    RUNNING_COUNT_24h                           = COUNT(*) OVER()  --Count admin units within the time window in the WHERE clause

            --Check what APPLY is returning for running time
                    ,RUNNING_TIME_BETWEEN_ORDERS_Apply          = DATEDIFF(MINUTE, StageBaseApply.START_TIME, TimeBetween.START_TIME)


             --Check what APPLY is using as base event anchor for the calculation   
                    ,START_TIME_Apply                           = StageBaseApply.START_TIME

                FROM #Data  AS StageBaseApply

                WHERE 
                    StageBaseApply.ACC_ID = TimeBetween.ACC_ID
                    AND StageBaseApply.PROD_ID = TimeBetween.PROD_ID
                    AND (StageBaseApply.START_TIME > DATEADD(MINUTE, -1440, TimeBetween.START_TIME) 
                            AND StageBaseApply.START_TIME <= TimeBetween.START_TIME
                            )

                ORDER BY StageBaseApply.START_TIME
                ) AS Running24h


ORDER BY ACC_ID,PROD_ID, START_TIME

When the running time between orders is over 24 hours the running count should re-start from 1. Currently it repeats the last value and the time it's using for the calculation seems to be off.

Current result from CROSS APPLY with notes on where it's not working and what it should be for what I'm trying to achieve

Marc0
  • 181
  • 7

2 Answers2

0

First create a Numbers table with at least as many rows as the minutes in the maximum time range you will ever be dealing with

CREATE TABLE dbo.Numbers(Number INT PRIMARY KEY);

WITH E1(N) AS 
(
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)                                       -- 1*10^1 or 10 rows
, E2(N) AS (SELECT 1 FROM E1 a, E1 b)   -- 1*10^2 or 100 rows
, E4(N) AS (SELECT 1 FROM E2 a, E2 b)   -- 1*10^4 or 10,000 rows
, E8(N) AS (SELECT 1 FROM E4 a, E4 b)   -- 1*10^8 or 100,000,000 rows
, Nums AS (SELECT TOP (10000000) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM E8)
INSERT INTO dbo.Numbers
SELECT N
FROM Nums 

And then you should be able to use something like this (I'm assuming that all start times are exact minutes and there are no duplicates per ACC_ID,PROD_ID,START_TIME as shown in your example data, if there are you will need to pre-aggregate at the minute level before participating in the left join)

WITH G
     AS (SELECT ACC_ID,
                PROD_ID,
                MIN = MIN(START_TIME),
                MAX = MAX(START_TIME),
                Range = DATEDIFF(MINUTE, MIN(START_TIME), MAX(START_TIME))
         FROM   #Data
         GROUP  BY ACC_ID,
                   PROD_ID),
     E
     AS (SELECT *
         FROM   G
                JOIN dbo.Numbers N
                  ON N.Number <= Range + 1),
   R AS (SELECT    E.ACC_ID,
                  E.PROD_ID,
                  D.START_TIME,
                  Cnt = COUNT(D.START_TIME) OVER (PARTITION BY E.ACC_ID, E.PROD_ID 
                                                      ORDER BY DATEADD(MINUTE, NUMBER-1, MIN) 
                                                  ROWS BETWEEN 1439 PRECEDING AND CURRENT ROW)
        FROM      E
        LEFT JOIN #Data D
        ON        D.ACC_ID = E.ACC_ID
                  AND D.PROD_ID = E.PROD_ID
                  AND D.START_TIME = DATEADD(MINUTE, NUMBER-1, MIN) )
SELECT   *
FROM     R
WHERE    START_TIME IS NOT NULL
ORDER BY ACC_ID,
         PROD_ID,
         START_TIME
Martin Smith
  • 438,706
  • 87
  • 741
  • 845
  • Thank you for this solution but I don't think it's doing what I need. The times are exact minutes and there are no duplicates per `ACC_ID,PROD_ID,START_TIME`. However the result set I get with the numbers table is only 8 rows. I added a new screenshot with the results I'm trying to achieve. There are two highlighted instances where it doesn't work and this is where I'm struggling. – Marc0 Dec 24 '18 at 21:03
0

After finding this post on how to reset a running sum, I think I may have finally been able to crack this nut. Not sure about how well it scales but it is working.

I also added a new column for order quantity since it may be useful sometimes to track the orders running total during the same time window.

The sliding time window can be set in this CASE statement:
CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0 ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG END

DROP TABLE IF EXISTS #Data

CREATE TABLE #Data
(
    ORDER_TIME          DATETIME 
    ,ORDER_ID           NUMERIC(18,0)
    ,PROD_ID            NUMERIC(18,0)
    ,ACCOUNT_ID         NUMERIC(18,0)
    ,ORDER_QUANTITY     INT
);

INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 1981516061, 158666, 1601554883,5
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873062,158666,1601554883,3
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269223,158666,1601554883,2
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735394,158666,1601554883,4
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,158666,1601554883,7
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467553,158666,2300231016,6
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583913,158666,2300231016,6
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154492,158666,2300231016,4
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154814,158666,2300231016,5
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057915,158666,2300231016,4
UNION ALL SELECT '2018-07-05 02:09:00.000',2002058086,158666,2300231016,4
UNION ALL SELECT '2018-07-05 04:15:00.000',2002058147,158666,2300231016,3
UNION ALL SELECT '2018-07-05 17:23:00.000',2003700706,158666,2300231016,2
UNION ALL SELECT '2018-07-05 18:07:00.000',2003700938,158666,2300231016,1
UNION ALL SELECT '2018-07-06 20:15:00.000',2005719626,158666,2300231016,7
UNION ALL SELECT '2018-07-07 07:45:00.000',2005719879,158666,2300231016,8
UNION ALL SELECT '2018-07-07 12:13:00.000',2005719931,158666,2300231016,9
UNION ALL SELECT '2018-07-09 18:29:00.000',2009395510,158666,2300231016,8
UNION ALL SELECT '2018-07-09 21:05:00.000',2009395523,158666,2300231016,6
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107312,158666,2300231016,5
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016381,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181365,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181376,258669,2300231016,3
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583923,258669,2300231016,9
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583943,258669,2300231016,2
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231742,258669,2300231016,1
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293679,258669,2300231016,1
UNION ALL SELECT '2018-07-03 14:22:00.000',2000293804,258669,2300231016,3
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057785,258669,2300231016,2
UNION ALL SELECT '2018-07-04 21:00:00.000',2002057813,258669,2300231016,1
UNION ALL SELECT '2018-07-05 15:12:00.000',2002548332,258669,2300231016,7
UNION ALL SELECT '2018-07-05 17:52:00.000',2003700719,258669,2300231016,6
UNION ALL SELECT '2018-07-09 22:30:00.000',2009395530,258669,2300231016,5
UNION ALL SELECT '2018-07-09 23:23:00.000',2009395666,258669,2300231016,3
UNION ALL SELECT '2018-07-30 17:45:00.000',2043642075,158666,2300231016,2
UNION ALL SELECT '2018-07-30 23:30:00.000',2043642114,158666,2300231016,4




;WITH NextEventLag AS(
--Returns the next event information.
SELECT 
    ORDER_TIME  
    ,ORDER_ID
    ,PROD_ID    
    ,ACCOUNT_ID 
    ,RowNum                     = ROW_NUMBER() OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME)

--NEXT_ORDER_TIME_LAG: Returns the time difference between two consecutive order times.
    ,NEXT_ORDER_TIME_LAG        = DATEDIFF(MINUTE, LAG(ORDER_TIME, 1, ORDER_TIME) OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME), ORDER_TIME)

    ,ORDER_QUANTITY

FROM #Data
)

,RunningOrders AS(
SELECT 
    RowNum
    ,ORDER_TIME
    ,ACCOUNT_ID
    ,PROD_ID
    ,NEXT_ORDER_TIME_LAG
    ,LAG_LESS_THAN_24h              = 0
    ,ORDER_QUANTITY

FROM NextEventLag

WHERE RowNum = 1


UNION ALL

SELECT 
    NextEventLag.RowNum
    ,NextEventLag.ORDER_TIME
    ,NextEventLag.ACCOUNT_ID
    ,NextEventLag.PROD_ID
    ,NextEventLag.NEXT_ORDER_TIME_LAG

--If the time lag between consecutive events and the time running sum is over 1440 minutes then set the value to 0. 
--Change the NEXT_ORDER_TIME_LAG time interval to the desired interval value in minutes.
    ,LAG_LESS_THAN_24h              = CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0 
                                       ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG
                                      END
    ,NextEventLag.ORDER_QUANTITY

FROM RunningOrders
    INNER JOIN NextEventLag             ON RunningOrders.RowNum + 1 = NextEventLag.RowNum
                                        AND RunningOrders.ACCOUNT_ID = NextEventLag.ACCOUNT_ID
                                        AND RunningOrders.PROD_ID = NextEventLag.PROD_ID
)

,GroupedLags AS(
--This Groups together the LAG(s) less than 1440 minutes and is used by the outer query window functions
--to calculate the running aggregates. 
SELECT RunningOrders.* 
    ,Running24h.*

FROM RunningOrders

    CROSS APPLY(SELECT TOP 1
                    Groups                          = COUNT(*) OVER(ORDER BY GroupApply.LAG_LESS_THAN_24h)  --Count admin units within the time window in the WHERE clause

                FROM RunningOrders  AS GroupApply

                WHERE 
                    GroupApply.ACCOUNT_ID = RunningOrders.ACCOUNT_ID
                    AND GroupApply.PROD_ID = RunningOrders.PROD_ID
                    AND GroupApply.ORDER_TIME <= RunningOrders.ORDER_TIME

                --ORDER BY StageBaseApply.ORDER_TIME
                ) AS Running24h
)


select 
    GroupedLags.ACCOUNT_ID
    ,GroupedLags.PROD_ID
    ,GroupedLags.ORDER_TIME
    ,GroupedLags.NEXT_ORDER_TIME_LAG
    ,GroupedLags.LAG_LESS_THAN_24h
    ,RUNNING_COUNT_24h                              = ROW_NUMBER() OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME)
    ,RUNNING_SUM_24h                                = SUM(ORDER_QUANTITY) OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME) 

from GroupedLags

ORDER BY 
    GroupedLags.ACCOUNT_ID
    ,GroupedLags.PROD_ID
    ,GroupedLags.ORDER_TIME

Here is the db<>fiddle demo

Marc0
  • 181
  • 7