SQL Server 2008 update table and switch values query optimization

Question

I have a table with values

ID          Son        Father
----------- ---------- ----------
1           Mark       Gerard
2           Gerard     Ivan
3           Leo        Samuel
4           Samuel     Johan
5           Ivan       Carles

I need to change table like this:

ID          Son        Father
----------- ---------- ----------
1           Mark       Carles
2           Gerard     Carles
3           Leo        Johan
4           Samuel     Johan
5           Ivan       Carles

The goal is to find a major 'Father' and update all 'Son' records with this value. Major 'Father' can be different.

My code is next:

DECLARE @CNT INT
DECLARE @CH_1 NVARCHAR(10)
DECLARE @CH_2 NVARCHAR(10)

CREATE TABLE #PPL (ID INT, Son NVARCHAR(10), Father NVARCHAR(10))

INSERT INTO #PPL VALUES (1, 'Mark', 'Gerard')
INSERT INTO #PPL VALUES (2, 'Gerard', 'Ivan')
INSERT INTO #PPL VALUES (3, 'Leo', 'Samuel')
INSERT INTO #PPL VALUES (4, 'Samuel', 'Johan')
INSERT INTO #PPL VALUES (5, 'Ivan', 'Carles')

SET @I = 1
SET @CNT = (SELECT COUNT(ID) FROM #PPL)

WHILE @I <= @CNT
BEGIN
    SET @J = 1  

        WHILE @J <= @CNT
        BEGIN
            SET @CH_1 = (SELECT Son FROM #PPL WHERE ID = @J)
            SET @CH_2 = (SELECT Father FROM #PPL WHERE ID = @J)
            UPDATE #PPL SET Father = @CH_2 WHERE Father = @CH_1
            SET @J = @J + 1
        END;

    SET @I = @I + 1
END;

SELECT * FROM #PPL

DROP TABLE #PPL

This code is working correct, but for the low number of records. How this code can be optimized?

Thanks!

as an example: http://stackoverflow.com/questions/14274942/sql-server-cte-and-recursion-example — xQbert, Jun 08 '16 at 13:36

score 1 · Answer 1 · edited Jun 08 '16 at 14:15

Here is how you can do it with a recursive CTE.

CREATE TABLE #PPL (ID INT, Son NVARCHAR(10), Father NVARCHAR(10))

INSERT INTO #PPL VALUES (1, 'Mark', 'Gerard')
INSERT INTO #PPL VALUES (2, 'Gerard', 'Ivan')
INSERT INTO #PPL VALUES (3, 'Leo', 'Samuel')
INSERT INTO #PPL VALUES (4, 'Samuel', 'Johan')
INSERT INTO #PPL VALUES (5, 'Ivan', 'Carles')

;WITH CTE_FamilyGenealogy
AS
(
    SELECT  ID
            ,Son
            ,Father
            ,1 AS [Level]
    FROM    #PPL Ancor
    UNION ALL
    SELECT   CTE_FamilyGenealogy.ID
            ,CTE_FamilyGenealogy.Son
            ,Fathers.Father AS Father
            ,CTE_FamilyGenealogy.[Level] + 1 AS [Level]
    FROM    #PPL Fathers
    INNER JOIN CTE_FamilyGenealogy ON CTE_FamilyGenealogy.Father = Fathers.Son
),
CTE_MajorFathers
AS
(
    SELECT  ID
            ,Son
            ,Father
            ,ROW_NUMBER() OVER (PARTITION BY Son ORDER BY [Level] DESC) AS RowRank
    FROM    CTE_FamilyGenealogy
)
SELECT  ID
        ,Son
        ,Father
FROM    CTE_MajorFathers
WHERE   RowRank = 1
ORDER BY ID

The Recursive CTE CTE_FamilyGenealogy finds all the father son combination and determine the level within the family tree. The CTE_MajorFathers CTE uses ROW_NUMBER to rank the possible combinations based on the Level with in the FamilyGenealogy to determine the Major Father.

score 0 · Answer 2 · answered Jun 08 '16 at 20:36

Try following approach base on recursion (see recursive Common Table Expressions) and HIERARCHYID (SQL2008+) data type. The basic idea is to build for every row one hierarchy value starting from the "first" father :-) and ending with "last" son :-). For example: for first row (1, 'Mark', 'Gerard') this node/family tree is /5/2/1/ where /5/ is "first" father ;-) and /1/ is the "last" son. Next it convert these values to hiearchyid values and it uses GetLevel and GetAncestor methods to compute the "first" father: Father1ID: Johan or Carles.

IF OBJECT_ID('tempdb.dbo.#Results') IS NOT NULL
BEGIN
    DROP TABLE #Results;
END
CREATE TABLE #Results (ID INT NOT NULL PRIMARY KEY, Father1ID INT);

WITH CteRec
AS (
    -- It returns Father only rows
    SELECT  l1.ID, l1.Son, l1.Father, CONVERT(VARCHAR(900), '/'+LTRIM(l1.ID)+'/') AS Node -- FamilyTree
    FROM    #PPL AS l1 -- First level
    WHERE   NOT EXISTS(SELECT * FROM #PPL p WHERE p.Son = l1.Father)
    UNION ALL 
    -- It returns Son only and Son-Father rows
    SELECT  ln.ID, ln.Son, ln.Father, CONVERT(VARCHAR(900), prt.Node+LTRIM(ln.ID)+'/') AS Node -- FamilyTree
    FROM    #PPL AS ln -- Next level
    JOIN    CteRec AS prt ON prt.Son = ln.Father
)
INSERT  #Results (ID, Father1ID)
SELECT  ID, 
        Father1ID = CONVERT(INT,REPLACE(CONVERT(HIERARCHYID, Node).GetAncestor(CONVERT(HIERARCHYID, Node).GetLevel()-1).ToString(),'/',''))
FROM    CteRec;

SELECT  p.*, r.Father1ID, rp.Father AS Father1Name
FROM    #PPL p 
INNER JOIN #Results r ON p.ID = r.ID
INNER JOIN #PPL rp ON r.Father1ID = rp.ID
-- Also you ca use #Result with UPDATE statement but I would store this values within new column Father1

score 0 · Answer 3 · answered Jun 10 '16 at 20:23

Recursive CTE's are overrated =)

This simple approach will run through just as fast (on normal data), will never complain about max recursion and is easy to read. The only downside I can see is that it might go into an eternal loop when the data is corrupt.

CREATE TABLE #PPL (ID INT, Son NVARCHAR(10), Father NVARCHAR(10))

INSERT INTO #PPL VALUES (1, 'Mark', 'Gerard')
INSERT INTO #PPL VALUES (2, 'Gerard', 'Ivan')
INSERT INTO #PPL VALUES (3, 'Leo', 'Samuel')
INSERT INTO #PPL VALUES (4, 'Samuel', 'Johan')
INSERT INTO #PPL VALUES (5, 'Ivan', 'Carles')

DECLARE @rowcount int = -1
WHILE @rowcount <> 0
    BEGIN
       UPDATE upd
          SET Father = new.Father
         FROM #PPL upd
         JOIN #PPL new
           ON new.Son = upd.Father
        WHERE upd.Father <> new.Father

        SELECT @rowcount = @@ROWCOUNT
    END

SELECT * FROM #PPL

PS: it probably helps to have an index on the Son column when running on large datasets.

SQL Server 2008 update table and switch values query optimization

3 Answers3