-1

I'm using django's ORM to run a query that does some aggregation but I need to filter on parameters from other tables first. The problem is that one of my table is quite large with 400M rows and the joins are taking forever. My DB is under heavy load while it is running and it runs for hours. I have to optimise this query by doing some raw sql instead of using the ORM but I'd like help on how to fix the huge joining issue.

Here is my query. The large table is keyword_tracker_keywordranking, the rest is about average.

SELECT DISTINCT
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."page_number",
    COUNT(*) AS "tally",
    AVG("kr"."ranking") AS "avg_rank",
    "o"."brand" AS "brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date AS "date",
    "kr"."search_result_kind" AS "kind"
FROM "keyword_tracker_keywordranking" "kr"
INNER JOIN "keyword_tracker_keyword" "k" ON ("kr"."keyword_id" = "k"."id")
INNER JOIN "keyword_tracker_keywordsettings" "ks" ON ("k"."id" = "ks"."keyword_id")
INNER JOIN "account_account" ON ("ks"."account_id" = "account_account"."id")
INNER JOIN "offers_boooffer" "o" ON ("kr"."offer_id" = "o"."id")
WHERE ("kr"."datetime" >= '2019-08-21' AND "kr"."datetime" < '2019-08-22' AND "account_account"."client" = 'client')
GROUP BY
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."search_result_kind",
    "kr"."page_number",
    "o"."brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date

And here is the explain. We can see that it spends forever doing a nested loop :

[
  {
    "Plan": {
      "Node Type": "Unique",
      "Parallel Aware": false,
      "Startup Cost": 4983963.55,
      "Total Cost": 4983963.58,
      "Plan Rows": 1,
      "Plan Width": 72,
      "Actual Startup Time": 411341.511,
      "Actual Total Time": 411341.511,
      "Actual Rows": 0,
      "Actual Loops": 1,
      "Output": [
        "kr.offer_id",
        "kr.keyword_id",
        "kr.page_number",
        "(count(*))",
        "(avg(kr.ranking))",
        "o.brand",
        "((timezone('Europe/Paris'::text, kr.datetime))::date)",
        "kr.search_result_kind"
      ],
      "Plans": [
        {
          "Node Type": "Sort",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Startup Cost": 4983963.55,
          "Total Cost": 4983963.56,
          "Plan Rows": 1,
          "Plan Width": 72,
          "Actual Startup Time": 411341.51,
          "Actual Total Time": 411341.51,
          "Actual Rows": 0,
          "Actual Loops": 1,
          "Output": [
            "kr.offer_id",
            "kr.keyword_id",
            "kr.page_number",
            "(count(*))",
            "(avg(kr.ranking))",
            "o.brand",
            "((timezone('Europe/Paris'::text, kr.datetime))::date)",
            "kr.search_result_kind"
          ],
          "Sort Key": [
            "kr.offer_id",
            "kr.keyword_id",
            "kr.page_number",
            "(count(*))",
            "(avg(kr.ranking))",
            "o.brand",
            "((timezone('Europe/Paris'::text, kr.datetime))::date)",
            "kr.search_result_kind"
          ],
          "Sort Method": "quicksort",
          "Sort Space Used": 25,
          "Sort Space Type": "Memory",
          "Plans": [
            {
              "Node Type": "Aggregate",
              "Strategy": "Sorted",
              "Partial Mode": "Simple",
              "Parent Relationship": "Outer",
              "Parallel Aware": false,
              "Startup Cost": 4983963.5,
              "Total Cost": 4983963.54,
              "Plan Rows": 1,
              "Plan Width": 72,
              "Actual Startup Time": 411341.463,
              "Actual Total Time": 411341.463,
              "Actual Rows": 0,
              "Actual Loops": 1,
              "Output": [
                "kr.offer_id",
                "kr.keyword_id",
                "kr.page_number",
                "count(*)",
                "avg(kr.ranking)",
                "o.brand",
                "((timezone('Europe/Paris'::text, kr.datetime))::date)",
                "kr.search_result_kind"
              ],
              "Group Key": [
                "kr.offer_id",
                "kr.keyword_id",
                "kr.search_result_kind",
                "kr.page_number",
                "o.brand",
                "((timezone('Europe/Paris'::text, kr.datetime))::date)"
              ],
              "Plans": [
                {
                  "Node Type": "Sort",
                  "Parent Relationship": "Outer",
                  "Parallel Aware": false,
                  "Startup Cost": 4983963.5,
                  "Total Cost": 4983963.51,
                  "Plan Rows": 1,
                  "Plan Width": 36,
                  "Actual Startup Time": 411341.461,
                  "Actual Total Time": 411341.461,
                  "Actual Rows": 0,
                  "Actual Loops": 1,
                  "Output": [
                    "kr.offer_id",
                    "kr.keyword_id",
                    "kr.page_number",
                    "o.brand",
                    "((timezone('Europe/Paris'::text, kr.datetime))::date)",
                    "kr.search_result_kind",
                    "kr.ranking"
                  ],
                  "Sort Key": [
                    "kr.offer_id",
                    "kr.keyword_id",
                    "kr.search_result_kind",
                    "kr.page_number",
                    "o.brand",
                    "((timezone('Europe/Paris'::text, kr.datetime))::date)"
                  ],
                  "Sort Method": "quicksort",
                  "Sort Space Used": 25,
                  "Sort Space Type": "Memory",
                  "Plans": [
                    {
                      "Node Type": "Nested Loop",
                      "Parent Relationship": "Outer",
                      "Parallel Aware": false,
                      "Join Type": "Inner",
                      "Startup Cost": 1.85,
                      "Total Cost": 4983963.49,
                      "Plan Rows": 1,
                      "Plan Width": 36,
                      "Actual Startup Time": 411341.455,
                      "Actual Total Time": 411341.455,
                      "Actual Rows": 0,
                      "Actual Loops": 1,
                      "Output": [
                        "kr.offer_id",
                        "kr.keyword_id",
                        "kr.page_number",
                        "o.brand",
                        "(timezone('Europe/Paris'::text, kr.datetime))::date",
                        "kr.search_result_kind",
                        "kr.ranking"
                      ],
                      "Inner Unique": true,
                      "Plans": [
                        {
                          "Node Type": "Nested Loop",
                          "Parent Relationship": "Outer",
                          "Parallel Aware": false,
                          "Join Type": "Inner",
                          "Startup Cost": 1.42,
                          "Total Cost": 4983960.83,
                          "Plan Rows": 1,
                          "Plan Width": 31,
                          "Actual Startup Time": 411341.455,
                          "Actual Total Time": 411341.455,
                          "Actual Rows": 0,
                          "Actual Loops": 1,
                          "Output": [
                            "kr.offer_id",
                            "kr.keyword_id",
                            "kr.page_number",
                            "kr.ranking",
                            "kr.datetime",
                            "kr.search_result_kind"
                          ],
                          "Inner Unique": true,
                          "Join Filter": "(ks.account_id = account_account.id)",
                          "Rows Removed by Join Filter": 0,
                          "Plans": [
                            {
                              "Node Type": "Nested Loop",
                              "Parent Relationship": "Outer",
                              "Parallel Aware": false,
                              "Join Type": "Inner",
                              "Startup Cost": 1.42,
                              "Total Cost": 4983958.91,
                              "Plan Rows": 1,
                              "Plan Width": 35,
                              "Actual Startup Time": 286863.945,
                              "Actual Total Time": 382461.602,
                              "Actual Rows": 4879993,
                              "Actual Loops": 1,
                              "Output": [
                                "kr.offer_id",
                                "kr.keyword_id",
                                "kr.page_number",
                                "kr.ranking",
                                "kr.datetime",
                                "kr.search_result_kind",
                                "ks.account_id"
                              ],
                              "Inner Unique": false,
                              "Join Filter": "(kr.keyword_id = ks.keyword_id)",
                              "Rows Removed by Join Filter": 0,
                              "Plans": [
                                {
                                  "Node Type": "Nested Loop",
                                  "Parent Relationship": "Outer",
                                  "Parallel Aware": false,
                                  "Join Type": "Inner",
                                  "Startup Cost": 1,
                                  "Total Cost": 4983957.47,
                                  "Plan Rows": 1,
                                  "Plan Width": 35,
                                  "Actual Startup Time": 286841.365,
                                  "Actual Total Time": 373503.135,
                                  "Actual Rows": 4272224,
                                  "Actual Loops": 1,
                                  "Output": [
                                    "kr.offer_id",
                                    "kr.keyword_id",
                                    "kr.page_number",
                                    "kr.ranking",
                                    "kr.datetime",
                                    "kr.search_result_kind",
                                    "k.id"
                                  ],
                                  "Inner Unique": true,
                                  "Plans": [
                                    {
                                      "Node Type": "Index Scan",
                                      "Parent Relationship": "Outer",
                                      "Parallel Aware": false,
                                      "Scan Direction": "Forward",
                                      "Index Name": "keyword_tracker_keywordranking_id_datetime_idx",
                                      "Relation Name": "keyword_tracker_keywordranking",
                                      "Schema": "public",
                                      "Alias": "kr",
                                      "Startup Cost": 0.57,
                                      "Total Cost": 4983954.82,
                                      "Plan Rows": 1,
                                      "Plan Width": 31,
                                      "Actual Startup Time": 286821.372,
                                      "Actual Total Time": 325753.923,
                                      "Actual Rows": 4272224,
                                      "Actual Loops": 1,
                                      "Output": [
                                        "kr.id",
                                        "kr.page_url",
                                        "kr.marketplace",
                                        "kr.page_number",
                                        "kr.ranking",
                                        "kr.product_title",
                                        "kr.brand",
                                        "kr.image_url",
                                        "kr.nb_comments",
                                        "kr.prime",
                                        "kr.rating",
                                        "kr.keyword_id",
                                        "kr.offer_id",
                                        "kr.asin",
                                        "kr.nb_results",
                                        "kr.parent_offer_id",
                                        "kr.amazon_choice_id",
                                        "kr.bestseller_id",
                                        "kr.price",
                                        "kr.is_add_on",
                                        "kr.datetime",
                                        "kr.search_result_kind",
                                        "kr.created",
                                        "kr.modified"
                                      ],
                                      "Index Cond": "((kr.datetime >= '2019-08-21 00:00:00+00'::timestamp with time zone) AND (kr.datetime < '2019-08-22 00:00:00+00'::timestamp with time zone))",
                                      "Rows Removed by Index Recheck": 0
                                    },
                                    {
                                      "Node Type": "Index Only Scan",
                                      "Parent Relationship": "Inner",
                                      "Parallel Aware": false,
                                      "Scan Direction": "Forward",
                                      "Index Name": "keyword_tracker_keyword_pkey",
                                      "Relation Name": "keyword_tracker_keyword",
                                      "Schema": "public",
                                      "Alias": "k",
                                      "Startup Cost": 0.43,
                                      "Total Cost": 2.65,
                                      "Plan Rows": 1,
                                      "Plan Width": 4,
                                      "Actual Startup Time": 0.011,
                                      "Actual Total Time": 0.011,
                                      "Actual Rows": 1,
                                      "Actual Loops": 4272224,
                                      "Output": [
                                        "k.id"
                                      ],
                                      "Index Cond": "(k.id = kr.keyword_id)",
                                      "Rows Removed by Index Recheck": 0,
                                      "Heap Fetches": 27999076
                                    }
                                  ]
                                },
                                {
                                  "Node Type": "Index Only Scan",
                                  "Parent Relationship": "Inner",
                                  "Parallel Aware": false,
                                  "Scan Direction": "Forward",
                                  "Index Name": "keyword_tracker_keywords_keyword_id_account_id_2d8e95e8_uniq",
                                  "Relation Name": "keyword_tracker_keywordsettings",
                                  "Schema": "public",
                                  "Alias": "ks",
                                  "Startup Cost": 0.42,
                                  "Total Cost": 1.43,
                                  "Plan Rows": 1,
                                  "Plan Width": 8,
                                  "Actual Startup Time": 0.002,
                                  "Actual Total Time": 0.002,
                                  "Actual Rows": 1,
                                  "Actual Loops": 4272224,
                                  "Output": [
                                    "ks.keyword_id",
                                    "ks.account_id"
                                  ],
                                  "Index Cond": "(ks.keyword_id = k.id)",
                                  "Rows Removed by Index Recheck": 0,
                                  "Heap Fetches": 742872
                                }
                              ]
                            },
                            {
                              "Node Type": "Seq Scan",
                              "Parent Relationship": "Inner",
                              "Parallel Aware": false,
                              "Relation Name": "account_account",
                              "Schema": "public",
                              "Alias": "account_account",
                              "Startup Cost": 0,
                              "Total Cost": 1.91,
                              "Plan Rows": 1,
                              "Plan Width": 4,
                              "Actual Startup Time": 0.005,
                              "Actual Total Time": 0.005,
                              "Actual Rows": 0,
                              "Actual Loops": 4879993,
                              "Output": [
                                "account_account.id",
                                "account_account.name",
                                "account_account.client",
                                "account_account.created_on",
                                "account_account.scraping_frequency",
                                "account_account.created",
                                "account_account.modified"
                              ],
                              "Filter": "((account_account.client)::text = 'client'::text)",
                              "Rows Removed by Filter": 75
                            }
                          ]
                        },
                        {
                          "Node Type": "Index Scan",
                          "Parent Relationship": "Inner",
                          "Parallel Aware": false,
                          "Scan Direction": "Forward",
                          "Index Name": "offers_boooffer_pkey",
                          "Relation Name": "offers_boooffer",
                          "Schema": "public",
                          "Alias": "o",
                          "Startup Cost": 0.44,
                          "Total Cost": 2.66,
                          "Plan Rows": 1,
                          "Plan Width": 13,
                          "Actual Startup Time": 0,
                          "Actual Total Time": 0,
                          "Actual Rows": 0,
                          "Actual Loops": 0,
                          "Output": [
                            "o.brand",
                            "o.id"
                          ],
                          "Index Cond": "(o.id = kr.offer_id)",
                          "Rows Removed by Index Recheck": 0
                        }
                      ]
                    }
                  ]
                }
              ]
            }
          ]
        }
      ]
    },
    "Triggers": []
  }
]
PhilipGarnero
  • 2,399
  • 4
  • 17
  • 24
  • 1
    Please show us the execution plan generated using `explain (analyze, buffers, FORMAT TEXT)` the JSON format isn't meant for humans –  Aug 23 '19 at 10:52
  • For a large data, you could use view. Join the tables those you need to join and make a view from that output then you could use that view whenever you need it. – aknkrstozkn Aug 23 '19 at 11:01
  • Please add relevant DDL (+some info on cardinalities) to your question. – wildplasser Aug 25 '19 at 14:01

2 Answers2

0

This index should help:

CREATE INDEX ON keyword_tracker_keywordranking (id, datetime);

Your condition on datetime is strange: it selects only records WHERE datetime = '2019-08-21 00:00:00'.

The estimates seem quite off, improve them with

ANALYZE keyword_tracker_keywordranking
Laurenz Albe
  • 209,280
  • 17
  • 206
  • 263
0

From the top of my head - kr seems to be the heaviest, so I would pre-filter it:

Other step you can do is to pre-select - so for the relevant tables in the join statement - do the nested queries selecting only the columns, that you are really using (in case of wide tables, where you use only few columns).

SELECT DISTINCT
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."page_number",
    COUNT(*) AS "tally",
    AVG("kr"."ranking") AS "avg_rank",
    "o"."brand" AS "brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date AS "date",
    "kr"."search_result_kind" AS "kind"
FROM (select * from "keyword_tracker_keywordranking" where "datetime" = '2019-08-21') "kr"
INNER JOIN "keyword_tracker_keyword" "k" ON ("kr"."keyword_id" = "k"."id")
INNER JOIN "keyword_tracker_keywordsettings" "ks" ON ("k"."id" = "ks"."keyword_id")
INNER JOIN (select * from "account_account" where "client" = 'client') "account_account" ON ("ks"."account_id" = "account_account"."id")
INNER JOIN "offers_boooffer" "o" ON ("kr"."offer_id" = "o"."id")
GROUP BY
    "kr"."offer_id",
    "kr"."keyword_id",
    "kr"."search_result_kind",
    "kr"."page_number",
    "o"."brand",
    ("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date

Grzegorz Skibinski
  • 12,624
  • 2
  • 11
  • 34
  • 1
    That's essentially the same as replacing `"kr"."datetime" >= '2019-08-21' AND "kr"."datetime" <= '2019-08-21'` with `"kr"."datetime" = '2019-08-21'` in the original query. The filtered sub-select won't have an influence on the execution plan - Postgres' query optimizer is smart enough for that –  Aug 23 '19 at 11:06