Background
I'm running Postgres 11 on CentOS 7.
I recently learned the basics of recursive CTEs in Postgres thanks to S-Man's answer to my recent question.
The problem
While working on a closely related issue (counting parts sold within bundles and assemblies) and using this recursive CTE, I ran into a problem where the query looped indefinitely and never completed.
I tracked this down to the presence of non-spurious 'self-referential' entries in the relator
table, i.e. rows with the same value for parent_name
and child_name
.
I know that these are the source of the problem because when I recreated the situation with test tables and data, the undesired looping behavior occurred when these rows were present, and disappeared when these rows were absent or when UNION
(which excludes duplicate returned rows) was used in the CTE rather than UNION ALL
.
I think the data model itself probably needs adjusting so that these 'self-referential' rows aren't necessary, but for now, what I need to do is get this query to return the desired data on completion and stop looping.
How can I achieve this result? All guidance much appreciated!
Tables and test data
CREATE TABLE the_schema.names_categories (
id INTEGER NOT NULL PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
created_at TIMESTAMPTZ DEFAULT now(),
thing_name TEXT NOT NULL,
thing_category TEXT NOT NULL
);
CREATE TABLE the_schema.relator (
id INTEGER NOT NULL PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
created_at TIMESTAMPTZ DEFAULT now(),
parent_name TEXT NOT NULL,
child_name TEXT NOT NULL,
child_quantity INTEGER NOT NULL
);
/* NOTE: listing_name below is like an alias of a relator.parent_name as it appears in a catalog,
required to know because it is these listing_names that are reflected by sales.sold_name */
CREATE TABLE the_schema.catalog_listings (
id INTEGER NOT NULL PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
created_at TIMESTAMPTZ DEFAULT now(),
listing_name TEXT NOT NULL,
parent_name TEXT NOT NULL
);
CREATE TABLE the_schema.sales (
id INTEGER NOT NULL PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
created_at TIMESTAMPTZ DEFAULT now(),
sold_name TEXT NOT NULL,
sold_quantity INTEGER NOT NULL
);
CREATE VIEW the_schema.relationships_with_child_category AS (
SELECT
c.listing_name,
r.parent_name,
r.child_name,
r.child_quantity,
n.thing_category AS child_category
FROM
the_schema.catalog_listings c
INNER JOIN
the_schema.relator r
ON c.parent_name = r.parent_name
INNER JOIN
the_schema.names_categories n
ON r.child_name = n.thing_name
);
INSERT INTO the_schema.names_categories (thing_name, thing_category)
VALUES ('parent1', 'bundle'), ('child1', 'assembly'), ('child2', 'assembly'),('subChild1', 'component'),
('subChild2', 'component'), ('subChild3', 'component');
INSERT INTO the_schema.catalog_listings (listing_name, parent_name)
VALUES ('listing1', 'parent1'), ('parent1', 'child1'), ('parent1','child2'), ('child1', 'child1'), ('child2', 'child2');
INSERT INTO the_schema.catalog_listings (listing_name, parent_name)
VALUES ('parent1', 'child1'), ('parent1','child2');
/* note the two 'self-referential' entries */
INSERT INTO the_schema.relator (parent_name, child_name, child_quantity)
VALUES ('parent1', 'child1', 1),('child1', 'subChild1', 1), ('child1', 'subChild2', 1)
('parent1', 'child2', 1),('child2', 'subChild1', 1), ('child2', 'subChild3', 1), ('child1', 'child1', 1), ('child2', 'child2', 1);
INSERT INTO the_schema.sales (sold_name, sold_quantity)
VALUES ('parent1', 1), ('parent1', 2), ('listing1', 1);
The present query, loops indefinitely with the required UNION ALL
WITH RECURSIVE cte AS (
SELECT
s.sold_name,
s.sold_quantity,
r.child_name,
r.child_quantity,
r.child_category as category
FROM
the_schema.sales s
JOIN the_schema.relationships_with_child_category r
ON s.sold_name = r.listing_name
UNION ALL
SELECT
cte.sold_name,
cte.sold_quantity,
r.child_name,
r.child_quantity,
r.child_category
FROM cte
JOIN the_schema.relationships_with_child_category r
ON cte.child_name = r.parent_name
)
SELECT
child_name,
SUM(sold_quantity * child_quantity)
FROM cte
WHERE category = 'component'
GROUP BY child_name
;