I'm using PostgreSQL 10.6. I have several tables partitioned by day. Each day has its own data. I want to select rows from this tables within a day.
drop table IF EXISTS request;
drop table IF EXISTS request_identity;
CREATE TABLE IF NOT EXISTS request (
id bigint not null,
record_date date not null,
payload text not null
) PARTITION BY LIST (record_date);
CREATE TABLE IF NOT EXISTS request_p1 PARTITION OF request FOR VALUES IN ('2001-01-01');
CREATE TABLE IF NOT EXISTS request_p2 PARTITION OF request FOR VALUES IN ('2001-01-02');
CREATE INDEX IF NOT EXISTS i_request_p1_id ON request_p1 (id);
CREATE INDEX IF NOT EXISTS i_request_p2_id ON request_p2 (id);
do $$
begin
for i in 1..100000 loop
INSERT INTO request (id,record_date,payload) values (i, '2001-01-01', 'abc');
end loop;
for i in 100001..200000 loop
INSERT INTO request (id,record_date,payload) values (i, '2001-01-02', 'abc');
end loop;
end;
$$;
CREATE TABLE IF NOT EXISTS request_identity (
record_date date not null,
parent_id bigint NOT NULL,
identity_name varchar(32),
identity_value varchar(32)
) PARTITION BY LIST (record_date);
CREATE TABLE IF NOT EXISTS request_identity_p1 PARTITION OF request_identity FOR VALUES IN ('2001-01-01');
CREATE TABLE IF NOT EXISTS request_identity_p2 PARTITION OF request_identity FOR VALUES IN ('2001-01-02');
CREATE INDEX IF NOT EXISTS i_request_identity_p1_payload ON request_identity_p1 (identity_name, identity_value);
CREATE INDEX IF NOT EXISTS i_request_identity_p2_payload ON request_identity_p2 (identity_name, identity_value);
do $$
begin
for i in 1..100000 loop
INSERT INTO request_identity (parent_id,record_date,identity_name,identity_value) values (i, '2001-01-01', 'NAME', 'somename'||i);
end loop;
for i in 100001..200000 loop
INSERT INTO request_identity (parent_id,record_date,identity_name,identity_value) values (i, '2001-01-02', 'NAME', 'somename'||i);
end loop;
end;
$$;
analyze request;
analyze request_identity;
I make select inside 1 day and see a good request plan:
explain analyze select *
from request
where record_date between '2001-01-01' and '2001-01-01'
and exists (select * from request_identity where parent_id = id and identity_name = 'NAME' and identity_value = 'somename555' and record_date between '2001-01-01' and '2001-01-01')
limit 100;
Limit (cost=8.74..16.78 rows=1 width=16)
-> Nested Loop (cost=8.74..16.78 rows=1 width=16)
-> HashAggregate (cost=8.45..8.46 rows=1 width=8)
Group Key: request_identity_p1.parent_id
-> Append (cost=0.42..8.44 rows=1 width=8)
-> Index Scan using i_request_identity_p1_payload on request_identity_p1 (cost=0.42..8.44 rows=1 width=8)
Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename555'::text))
Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-01'::date))
-> Append (cost=0.29..8.32 rows=1 width=16)
-> Index Scan using i_request_p1_id on request_p1 (cost=0.29..8.32 rows=1 width=16)
Index Cond: (id = request_identity_p1.parent_id)
Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-01'::date))
But if I make a select for 2 days or more, then PostgreSQL first appends rows of all partitions of request_identity and all partitions of request, and then joins them. So this is the SQL that is not working as i want:
explain analyze select *
from request
where record_date between '2001-01-01' and '2001-01-02'
and exists (select * from request_identity where parent_id = id and identity_name = 'NAME' and identity_value = 'somename1777' and record_date between '2001-01-01' and '2001-01-02')
limit 100;
Limit (cost=17.19..50.21 rows=2 width=16)
-> Nested Loop (cost=17.19..50.21 rows=2 width=16)
-> Unique (cost=16.90..16.91 rows=2 width=8)
-> Sort (cost=16.90..16.90 rows=2 width=8)
Sort Key: request_identity_p1.parent_id
-> Append (cost=0.42..16.89 rows=2 width=8)
-> Index Scan using i_request_identity_p1_payload on request_identity_p1 (cost=0.42..8.44 rows=1 width=8)
Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename1777'::text))
Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
-> Index Scan using i_request_identity_p2_payload on request_identity_p2 (cost=0.42..8.44 rows=1 width=8)
Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename1777'::text))
Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
-> Append (cost=0.29..16.63 rows=2 width=16)
-> Index Scan using i_request_p1_id on request_p1 (cost=0.29..8.32 rows=1 width=16)
Index Cond: (id = request_identity_p1.parent_id)
Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
-> Index Scan using i_request_p2_id on request_p2 (cost=0.29..8.32 rows=1 width=16)
Index Cond: (id = request_identity_p1.parent_id)
Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
In my case it doesn't make sense to join (with nested loops) of these appends since the consistent rows are only within 1 day partitions group.
The desired result for me is that PostgreSQL makes joins between request_p1 to request_identity_p1, and request_p2 to request_identity_p2 first and only after that is makes appends of results.
The question is:
Is there a way to perform joins between partitions separately within 1 day partitions group?
Thanks.