I'm attempting to convert the following SQL Server query into a GreenPlum version of the query:
INSERT INTO #TMP1 (part_id, file_id, location, measure_date)
SELECT DISTINCT
pt.part_id, qf.file_id, qf.edl_desc, pt.measure_date
FROM
part pt WITH (NOLOCK)
INNER JOIN
file_model qm with (nolock) on qm.file_model_id = pt.file_model_id
INNER JOIN
file qf with (nolock) on qf.file_id = qm.file_id;
INSERT INTO @part_list (file_id, part_id, measure_date)
SELECT DISTINCT
t1.file_id, k.part_id, k.measure_date
FROM
#TMP1 t1 WITH (NOLOCK)
CROSS APPLY
(SELECT DISTINCT TOP (300)
t2.part_id, t2.measure_date
FROM
#TMP1 t2 WITH (NOLOCK)
WHERE
t1.file_id = t2.file_id and t1.location = t2.location
ORDER BY
t2.measure_date DESC) k
WHERE
t1.measure_date >= dateadd(day, 30, getdate());
The idea here being that the final table contains the most recent up to 300 parts for all parts programs that are active (ie manufactured something) in the last 30 days.
Per the answers to this question, I am aware that LATERAL JOIN would do it, except my organization is using an older version of Postgres that does not have LATERAL, so I was left with implementing the following function instead:
CREATE FUNCTION BuildActiveParts(p_day INT, p_n INT)
RETURNS SETOF RECORD --TABLE (part_id bigint,file_id int, measure_date timestamp, location varchar(255))
AS $$
DECLARE
part_active RECORD;
part_list RECORD;
BEGIN
FOR part_active IN
SELECT DISTINCT qf.file_id, qf.location
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id WHERE pt.measure_date >= current_date - p_day LOOP
FOR part_list IN
SELECT DISTINCT pt.part_id, qf.file_id, pt.measure_date, qf.location
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id WHERE qf.file_id = part_active.file_id
AND qf.location = part_active.location
ORDER BY pt.measure_date DESC LIMIT p_n LOOP
RETURN NEXT part_list;
END LOOP;
END LOOP;
END
$$ LANGUAGE plpgsql;
-- Later used in:
--Build list of all active programs in last p_day days. This temporary table is a component of a larger function that produces a table based on this and other other calculations, called daily.
-- Note: this insert yields 'function cannot execute because it accesses relation'
INSERT INTO TMP_part_list ( part_id, file_id, measure_date, location)
SELECT DISTINCT * FROM BuildActiveParts(p_day, p_n) AS active_parts (part_id int, file_id text, measure_date timestamp, location text )
;
Unfortunately, this function is used in inserts to another table (an unavoidable reality of my business requirements), so while the function returns nice happy results when run in isolation, I get a big angry function cannot execute on segment because it accesses relation
when I try to use it for its intended purpose. While I've seen suggestions to the effect of "make a VIEW instead", that's not really an option because a view resulting from the script this functionality is a part of would take too long to query.
What can I do, beyond embarking on a months-long excursion through a jungle of red tape to convince my organization to update their stuff, to resolve this?
Edit: Here are some attempts based on comments:
Attempt with function, did not work because of function cannot execute on segment because it accesses relation
:
DROP FUNCTION IF EXISTS BuildRecentParts(TEXT, TEXT, INT);
CREATE FUNCTION BuildRecentParts(file_id TEXT, location_in TEXT, p_n INT)
RETURNS SETOF RECORD --TABLE (measure_date timestamp, part_id bigint)
AS $$
DECLARE
part_list RECORD;
BEGIN
FOR part_list IN
SELECT DISTINCT pt.measure_date, pt.part_id
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE qf.file_id = file_id
AND qf.edl_desc = location_in
ORDER BY pt.measure_date DESC LIMIT p_n LOOP
RETURN NEXT part_list;
END LOOP;
END
$$ LANGUAGE plpgsql;
SELECT DISTINCT qf.file_id, qf.edl_desc, (SELECT pti.measure_date, pti.part_id FROM part pti
INNER JOIN file_model qmi on qmi.file_model_id = pti.file_model_id
INNER JOIN file qfi on qfi.file_id = qmi.file_id
WHERE qfi.file_id = qf.file_id
AND qfi.edl_desc = qf.edl_desc
ORDER BY pti.measure_date DESC LIMIT 300)
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id
WHERE pt.measure_date >= current_date - 30 ;
Attempt without function, will not work because subquery has multiple columns:
CREATE TEMPORARY TABLE TMP_TMP1 (part_id bigint, file_id varchar(255), location varchar(255), measure_date timestamp) DISTRIBUTED BY (part_id);
INSERT INTO TMP_TMP1 (part_id, file_id, location, measure_date)
SELECT DISTINCT pt.part_id, qf.file_id, qf.edl_desc, pt.measure_date
FROM part pt
INNER JOIN file_model qm on qm.file_model_id = pt.file_model_id
INNER JOIN file qf on qf.file_id = qm.file_id;
ANALYZE TMP_TMP1;
SELECT DISTINCT t1.file_id, t1.location, (SELECT t2.measure_date, t2.part_id FROM TMP_TMP1 t2
WHERE t2.file_id = t1.file_id
AND t2.location = t1.location
ORDER BY t2.measure_date DESC LIMIT 300)
FROM TMP_TMP1 t1
WHERE t1.measure_date >= current_date - 30;
I also attempted a recursive CTE, but found that that was unsupported.