SQL Server: stored procedure using recursive CTE finding values matching a total

Question

I need to find within a stored procedure which values match a wanted total following valex's solution recursive query in SQL Server

The following works pretty well assuming the CTE anchor recordset is very small

CREATE TABLE #t ([id] INT, [num] FLOAT);

DECLARE @wanted FLOAT = 100000

INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
       (6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
       (11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
       (16, 10000), (17, 74000)

 /* when you add more records the query becomes too slow, remove this comment
    to test*/
 /*,(18,10000),(19,78000),(20,10000),(21,10000),(22,80000),(23,19000), 
 (24,8000),(25,5000),(26,10000),(27,4000),(28,46000),(29,48000),(30,20000),
 (31,10000),(32,25000),(33,10000),(34,13000),(35,16000),(36,10000),
 (37,5000), 38,5000),(39,30000),(40,15000),(41,10000)*/
;

CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);

WITH CTE AS
(
    SELECT 
        id, num AS CSum, 
        CAST(id AS VARCHAR(MAX)) AS path 
    FROM 
        #t
     WHERE num <= @wanted

    UNION ALL

    SELECT 
        #t.id, #t.num + CTE.CSum AS CSum, 
        CTE.path + ',' + CAST(#t.id AS VARCHAR(MAX)) AS path
    FROM
        #T 
    INNER JOIN 
        CTE ON #T.num + CTE.CSum <= @wanted AND CTE.id < #T.id
    WHERE
        #T.num + CTE.CSum <= @wanted 
)
SELECT TOP 1 Path
FROM CTE  
WHERE CTE.CSum = @wanted 
ORDER BY id

DROP TABLE #t

It will return 3,4 which are the first 2 rows whose [num] values gives the @wanted total.

This works reasonably fast when there are just a few records in the temp table #t but when you remove the comment and all remaining records (from id 17 to id 41) the query just takes forever because the CTE grows exponentially.

Is there a way to speed up the code? i just need the first matching total (the list anchor dataset is ordered so a result like 3,4 is better than 8,20,22)

Please explain what you are trying to do *in this question*. Cross references are not only hard to follow. The link could disappear or be edited and no longer relevant to this question. — Gordon Linoff, Oct 17 '18 at 20:10
You need the "first matching total"... Do you mean two rows, or can be three, four, more? Do they need to be consecutive, or they can be any rows? — The Impaler, Oct 17 '18 at 20:14
You have ID indexed, but also have predicates on Num as well as ID - I would through in an immediate guess around making that NC index a covering index and add num as one of the indexed columns. A query plan would help here though — Andrew, Oct 17 '18 at 20:32
@Gordon Linoff sorry for the cross reference i wanted to credit valex for the code. I want to find which records can be picked up to match a given total it can be 2 up to 100 rows for a given total — Wushel, Oct 17 '18 at 20:35
@The Impaler there could be more than a combination in the anchor dataset which lead to the wanted total, it would be better if i can select the ones with the lowest id , but if it improves performances any record would be fine assuming they can add up to the wanted total — Wushel, Oct 17 '18 at 20:39
if the provided example i'm looking for any records where the sum of [num] gives 100000 — Wushel, Oct 17 '18 at 20:45
As I see it, the number of possible combination grows exponentially for every row you add to the base table. If a CTE could peek for a "stop condition" then it you could use it to cut the exploration of the rest of candidates, once a solution is found. — The Impaler, Oct 17 '18 at 20:56
Precisely that's what i want, i wouldn't need to do any further exploration, one sequence would be enough they problem is i don't know how to stop the recursion prematurely — Wushel, Oct 17 '18 at 21:02

score 0 · Accepted Answer · answered Oct 17 '18 at 21:36

What if you took an iterative approach? This would be pretty simple to give the ability to stop as soon as a solution is found.

This was put together quickly, so you may can optimize further. I tested for your example (ran in less than 1 second) and several other combinations and levels of depth.

Result Depth       Total       IdList     NumList
------ ----------- ----------- ---------- -------------
Found  1           100000      3,4        53000,47000

Full Code:

-- Configuration
DECLARE @wanted FLOAT = 100000
DECLARE @MaxDepth INT = 10 -- Customize how many levels you want to look
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
IF OBJECT_ID('tempdb..#T') IS NULL BEGIN
    CREATE TABLE #T (Id INT, Num INT)
    INSERT INTO #t ([id], [num])
    VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
        (6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
        (11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
        (16, 10000), (17, 74000)
    CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
END

-- Setup processing table
IF OBJECT_ID('tempdb..#U') IS NOT NULL DROP TABLE #U
CREATE TABLE #U (
    MaxId INT,
    Total INT,
    IdList VARCHAR(MAX),
    NumList VARCHAR(MAX)
)

-- Initial population from source table
INSERT #U
    SELECT Id, Num,
        CONVERT(VARCHAR(10), Id), 
        CONVERT(VARCHAR(10), Num)
    FROM #T

-- Iterative approach
DECLARE @Depth INT = 0
WHILE NOT EXISTS (SELECT * FROM #U WHERE Total = @wanted) BEGIN

    -- Increment depth
    SET @Depth = @Depth + 1
    IF @Depth >= @MaxDepth BEGIN
        PRINT 'Max depth reached'
        RETURN -- Stop processing further
    END

    -- Calculate sum for this depth
    IF OBJECT_ID('tempdb..#V') IS NOT NULL
        DROP TABLE #V
    SELECT
        T.Id AS MaxId,
        U.Total + T.Num AS Total,
        U.IdList + ',' + CONVERT(VARCHAR(10), T.Id) AS IdList,
        U.NumList + ',' + CONVERT(VARCHAR(10), T.Num) AS NumList
        INTO #V
    FROM #U U
        INNER JOIN #T T
            ON U.MaxId < T.Id

    -- Replace data for next iteration
    TRUNCATE TABLE #U
    INSERT #U
        SELECT * FROM #V

    -- Check if no more combinations available
    IF @@ROWCOUNT = 0 BEGIN
        PRINT 'All combinations tested'
        RETURN -- Stop processing further
    END

END

-- Return result
SELECT TOP 1 'Found' AS [Result], @Depth AS Depth, Total, IdList, NumList FROM #U WHERE Total = @wanted

That's it :-) very nice solution indeed it's way faster than the recursive approach and works with the full size recordset — Wushel, Oct 17 '18 at 21:54

SQL Server: stored procedure using recursive CTE finding values matching a total

1 Answers1