I need to calculate intersection between trade_name
column in a grls_data
table and data ->> 'product_info'
in fsa_raw
table by substring matching.
Value count in the tables:
grls
: 38596grls_data
: 47434fsa_raw
: 651380
I have a following SQL schema:
CREATE TABLE public.grls
(
id integer NOT NULL DEFAULT nextval('grls_id_seq'::regclass),
use_version integer,
CONSTRAINT grls_pkey PRIMARY KEY (id),
CONSTRAINT grls_use_version_foreign FOREIGN KEY (use_version)
REFERENCES public.grls_data (id) MATCH SIMPLE
ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
OIDS=FALSE
);
CREATE INDEX grls_use_version_index
ON public.grls
USING btree
(use_version);
CREATE TABLE public.grls_data
(
id integer NOT NULL DEFAULT nextval('grls_data_id_seq'::regclass),
grls_id integer NOT NULL,
trade_name text NOT NULL,
// other columns
CONSTRAINT grls_data_pkey PRIMARY KEY (id),
CONSTRAINT grls_data_grls_id_foreign FOREIGN KEY (grls_id)
REFERENCES public.grls (id) MATCH SIMPLE
ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
OIDS=FALSE
);
CREATE INDEX grls_data_grls_id_index
ON public.grls_data
USING btree
(grls_id);
CREATE INDEX grls_data_trade_name_index
ON public.grls_data
USING hash
(trade_name COLLATE pg_catalog."default");
CREATE TABLE public.fsa_raw
(
id integer NOT NULL DEFAULT nextval('fsa_raw_id_seq'::regclass),
data jsonb,
CONSTRAINT fsa_raw_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE INDEX fsa_raw_data_idx
ON public.fsa_raw
USING gin
(data jsonb_path_ops);
CREATE INDEX words_trgm_gin
ON public.fsa_raw
USING gin
((data ->> 'product_info'::text) COLLATE pg_catalog."default" gin_trgm_ops);
So, i wrote a following SQL query for test:
WITH tns AS (
SELECT DISTINCT ('%' || trade_name || '%') AS tn FROM grls JOIN grls_data ON
grls.use_version = grls_data.id
)
SELECT COUNT(*) FROM fsa_raw
WHERE (data ->> 'product_info') ILIKE ANY (SELECT tn FROM tns)
But my query have a really giant cost:
Aggregate (cost=169494199.21..169494199.22 rows=1 width=8)
I can't wait for weeks, months to get query results. How do i optimize my SQL-query to get results much faster?