0

I need to calculate intersection between trade_name column in a grls_data table and data ->> 'product_info' in fsa_raw table by substring matching.

Value count in the tables:

  1. grls: 38596
  2. grls_data: 47434
  3. fsa_raw: 651380

I have a following SQL schema:

CREATE TABLE public.grls
(
  id integer NOT NULL DEFAULT nextval('grls_id_seq'::regclass),
  use_version integer,
  CONSTRAINT grls_pkey PRIMARY KEY (id),
  CONSTRAINT grls_use_version_foreign FOREIGN KEY (use_version)
      REFERENCES public.grls_data (id) MATCH SIMPLE
      ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
  OIDS=FALSE
);

CREATE INDEX grls_use_version_index
  ON public.grls
  USING btree
  (use_version);

CREATE TABLE public.grls_data
(
  id integer NOT NULL DEFAULT nextval('grls_data_id_seq'::regclass),
  grls_id integer NOT NULL,
  trade_name text NOT NULL,
  // other columns
  CONSTRAINT grls_data_pkey PRIMARY KEY (id),
  CONSTRAINT grls_data_grls_id_foreign FOREIGN KEY (grls_id)
      REFERENCES public.grls (id) MATCH SIMPLE
      ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
  OIDS=FALSE
);

CREATE INDEX grls_data_grls_id_index
  ON public.grls_data
  USING btree
  (grls_id);


CREATE INDEX grls_data_trade_name_index
  ON public.grls_data
  USING hash
  (trade_name COLLATE pg_catalog."default");

CREATE TABLE public.fsa_raw
(
  id integer NOT NULL DEFAULT nextval('fsa_raw_id_seq'::regclass),
  data jsonb,
  CONSTRAINT fsa_raw_pkey PRIMARY KEY (id)
)
WITH (
  OIDS=FALSE
);

CREATE INDEX fsa_raw_data_idx
  ON public.fsa_raw
  USING gin
  (data jsonb_path_ops);

CREATE INDEX words_trgm_gin
  ON public.fsa_raw
  USING gin
  ((data ->> 'product_info'::text) COLLATE pg_catalog."default" gin_trgm_ops);

So, i wrote a following SQL query for test:

WITH tns AS (
   SELECT DISTINCT ('%' || trade_name || '%') AS tn FROM grls JOIN grls_data ON
   grls.use_version = grls_data.id
)
SELECT COUNT(*) FROM fsa_raw
WHERE (data ->> 'product_info') ILIKE ANY (SELECT tn FROM tns)

But my query have a really giant cost:

Aggregate (cost=169494199.21..169494199.22 rows=1 width=8)

I can't wait for weeks, months to get query results. How do i optimize my SQL-query to get results much faster?

clemens
  • 16,716
  • 11
  • 50
  • 65
Dmitry K.
  • 3,065
  • 2
  • 21
  • 32

0 Answers0