I'm using django's ORM to run a query that does some aggregation but I need to filter on parameters from other tables first. The problem is that one of my table is quite large with 400M rows and the joins are taking forever. My DB is under heavy load while it is running and it runs for hours. I have to optimise this query by doing some raw sql instead of using the ORM but I'd like help on how to fix the huge joining issue.
Here is my query. The large table is keyword_tracker_keywordranking
, the rest is about average.
SELECT DISTINCT
"kr"."offer_id",
"kr"."keyword_id",
"kr"."page_number",
COUNT(*) AS "tally",
AVG("kr"."ranking") AS "avg_rank",
"o"."brand" AS "brand",
("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date AS "date",
"kr"."search_result_kind" AS "kind"
FROM "keyword_tracker_keywordranking" "kr"
INNER JOIN "keyword_tracker_keyword" "k" ON ("kr"."keyword_id" = "k"."id")
INNER JOIN "keyword_tracker_keywordsettings" "ks" ON ("k"."id" = "ks"."keyword_id")
INNER JOIN "account_account" ON ("ks"."account_id" = "account_account"."id")
INNER JOIN "offers_boooffer" "o" ON ("kr"."offer_id" = "o"."id")
WHERE ("kr"."datetime" >= '2019-08-21' AND "kr"."datetime" < '2019-08-22' AND "account_account"."client" = 'client')
GROUP BY
"kr"."offer_id",
"kr"."keyword_id",
"kr"."search_result_kind",
"kr"."page_number",
"o"."brand",
("kr"."datetime" AT TIME ZONE 'Europe/Paris')::date
And here is the explain. We can see that it spends forever doing a nested loop :
[
{
"Plan": {
"Node Type": "Unique",
"Parallel Aware": false,
"Startup Cost": 4983963.55,
"Total Cost": 4983963.58,
"Plan Rows": 1,
"Plan Width": 72,
"Actual Startup Time": 411341.511,
"Actual Total Time": 411341.511,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"(count(*))",
"(avg(kr.ranking))",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 4983963.55,
"Total Cost": 4983963.56,
"Plan Rows": 1,
"Plan Width": 72,
"Actual Startup Time": 411341.51,
"Actual Total Time": 411341.51,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"(count(*))",
"(avg(kr.ranking))",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Sort Key": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"(count(*))",
"(avg(kr.ranking))",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Sort Method": "quicksort",
"Sort Space Used": 25,
"Sort Space Type": "Memory",
"Plans": [
{
"Node Type": "Aggregate",
"Strategy": "Sorted",
"Partial Mode": "Simple",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 4983963.5,
"Total Cost": 4983963.54,
"Plan Rows": 1,
"Plan Width": 72,
"Actual Startup Time": 411341.463,
"Actual Total Time": 411341.463,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"count(*)",
"avg(kr.ranking)",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind"
],
"Group Key": [
"kr.offer_id",
"kr.keyword_id",
"kr.search_result_kind",
"kr.page_number",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)"
],
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 4983963.5,
"Total Cost": 4983963.51,
"Plan Rows": 1,
"Plan Width": 36,
"Actual Startup Time": 411341.461,
"Actual Total Time": 411341.461,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)",
"kr.search_result_kind",
"kr.ranking"
],
"Sort Key": [
"kr.offer_id",
"kr.keyword_id",
"kr.search_result_kind",
"kr.page_number",
"o.brand",
"((timezone('Europe/Paris'::text, kr.datetime))::date)"
],
"Sort Method": "quicksort",
"Sort Space Used": 25,
"Sort Space Type": "Memory",
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1.85,
"Total Cost": 4983963.49,
"Plan Rows": 1,
"Plan Width": 36,
"Actual Startup Time": 411341.455,
"Actual Total Time": 411341.455,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"o.brand",
"(timezone('Europe/Paris'::text, kr.datetime))::date",
"kr.search_result_kind",
"kr.ranking"
],
"Inner Unique": true,
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1.42,
"Total Cost": 4983960.83,
"Plan Rows": 1,
"Plan Width": 31,
"Actual Startup Time": 411341.455,
"Actual Total Time": 411341.455,
"Actual Rows": 0,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"kr.ranking",
"kr.datetime",
"kr.search_result_kind"
],
"Inner Unique": true,
"Join Filter": "(ks.account_id = account_account.id)",
"Rows Removed by Join Filter": 0,
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1.42,
"Total Cost": 4983958.91,
"Plan Rows": 1,
"Plan Width": 35,
"Actual Startup Time": 286863.945,
"Actual Total Time": 382461.602,
"Actual Rows": 4879993,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"kr.ranking",
"kr.datetime",
"kr.search_result_kind",
"ks.account_id"
],
"Inner Unique": false,
"Join Filter": "(kr.keyword_id = ks.keyword_id)",
"Rows Removed by Join Filter": 0,
"Plans": [
{
"Node Type": "Nested Loop",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 1,
"Total Cost": 4983957.47,
"Plan Rows": 1,
"Plan Width": 35,
"Actual Startup Time": 286841.365,
"Actual Total Time": 373503.135,
"Actual Rows": 4272224,
"Actual Loops": 1,
"Output": [
"kr.offer_id",
"kr.keyword_id",
"kr.page_number",
"kr.ranking",
"kr.datetime",
"kr.search_result_kind",
"k.id"
],
"Inner Unique": true,
"Plans": [
{
"Node Type": "Index Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "keyword_tracker_keywordranking_id_datetime_idx",
"Relation Name": "keyword_tracker_keywordranking",
"Schema": "public",
"Alias": "kr",
"Startup Cost": 0.57,
"Total Cost": 4983954.82,
"Plan Rows": 1,
"Plan Width": 31,
"Actual Startup Time": 286821.372,
"Actual Total Time": 325753.923,
"Actual Rows": 4272224,
"Actual Loops": 1,
"Output": [
"kr.id",
"kr.page_url",
"kr.marketplace",
"kr.page_number",
"kr.ranking",
"kr.product_title",
"kr.brand",
"kr.image_url",
"kr.nb_comments",
"kr.prime",
"kr.rating",
"kr.keyword_id",
"kr.offer_id",
"kr.asin",
"kr.nb_results",
"kr.parent_offer_id",
"kr.amazon_choice_id",
"kr.bestseller_id",
"kr.price",
"kr.is_add_on",
"kr.datetime",
"kr.search_result_kind",
"kr.created",
"kr.modified"
],
"Index Cond": "((kr.datetime >= '2019-08-21 00:00:00+00'::timestamp with time zone) AND (kr.datetime < '2019-08-22 00:00:00+00'::timestamp with time zone))",
"Rows Removed by Index Recheck": 0
},
{
"Node Type": "Index Only Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "keyword_tracker_keyword_pkey",
"Relation Name": "keyword_tracker_keyword",
"Schema": "public",
"Alias": "k",
"Startup Cost": 0.43,
"Total Cost": 2.65,
"Plan Rows": 1,
"Plan Width": 4,
"Actual Startup Time": 0.011,
"Actual Total Time": 0.011,
"Actual Rows": 1,
"Actual Loops": 4272224,
"Output": [
"k.id"
],
"Index Cond": "(k.id = kr.keyword_id)",
"Rows Removed by Index Recheck": 0,
"Heap Fetches": 27999076
}
]
},
{
"Node Type": "Index Only Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "keyword_tracker_keywords_keyword_id_account_id_2d8e95e8_uniq",
"Relation Name": "keyword_tracker_keywordsettings",
"Schema": "public",
"Alias": "ks",
"Startup Cost": 0.42,
"Total Cost": 1.43,
"Plan Rows": 1,
"Plan Width": 8,
"Actual Startup Time": 0.002,
"Actual Total Time": 0.002,
"Actual Rows": 1,
"Actual Loops": 4272224,
"Output": [
"ks.keyword_id",
"ks.account_id"
],
"Index Cond": "(ks.keyword_id = k.id)",
"Rows Removed by Index Recheck": 0,
"Heap Fetches": 742872
}
]
},
{
"Node Type": "Seq Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Relation Name": "account_account",
"Schema": "public",
"Alias": "account_account",
"Startup Cost": 0,
"Total Cost": 1.91,
"Plan Rows": 1,
"Plan Width": 4,
"Actual Startup Time": 0.005,
"Actual Total Time": 0.005,
"Actual Rows": 0,
"Actual Loops": 4879993,
"Output": [
"account_account.id",
"account_account.name",
"account_account.client",
"account_account.created_on",
"account_account.scraping_frequency",
"account_account.created",
"account_account.modified"
],
"Filter": "((account_account.client)::text = 'client'::text)",
"Rows Removed by Filter": 75
}
]
},
{
"Node Type": "Index Scan",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "offers_boooffer_pkey",
"Relation Name": "offers_boooffer",
"Schema": "public",
"Alias": "o",
"Startup Cost": 0.44,
"Total Cost": 2.66,
"Plan Rows": 1,
"Plan Width": 13,
"Actual Startup Time": 0,
"Actual Total Time": 0,
"Actual Rows": 0,
"Actual Loops": 0,
"Output": [
"o.brand",
"o.id"
],
"Index Cond": "(o.id = kr.offer_id)",
"Rows Removed by Index Recheck": 0
}
]
}
]
}
]
}
]
}
]
},
"Triggers": []
}
]