0

I'm having a bit of a brain lapse ironing out a query as an inner join. The reason for the shift is performance, the "IN MAX" approach is too expensive - so it needs to be reworked.

I've attempted the solution presented at Retrieving the last record in each group - MySQL, however, applying those strategies results in a data set that is divergent from the valid-but-slow query presented below.

Assuming a table with this structure and data, which is a table that stores grades for plays by user:

CREATE TABLE IF NOT EXISTS `plays` (
  `id` char(36) COLLATE utf8_unicode_ci NOT NULL COMMENT '(DC2Type:uuid)', 
  `user_id` int(10) unsigned NOT NULL, 
  `step_id` int(10) unsigned NOT NULL DEFAULT '0', 
  `time_started` datetime DEFAULT NULL COMMENT '(DC2Type:datetime_immutable)', 
  `grade` decimal(5, 2) DEFAULT '0.00', 
  PRIMARY KEY (`id`), 
  KEY `search_idx` (`user_id`, `step_id`), 
  KEY `grade_idx` (`grade`), 
  KEY `time_idx` (`time_started`), 
  KEY `user_idx` (`user_id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8 COLLATE = utf8_unicode_ci;
INSERT INTO `plays` (
  `id`, `user_id`, `step_id`, `time_started`, 
  `grade`
) 
VALUES 
  (
    '05455f0f-eb7f-4563-b20b-0a78b3fb616f', 
    24, 717, '2022-03-14 19:22:51', '0.00'
  ), 
  (
    '0adf9143-07fb-4ec6-9b00-0ff58c8cf4c7', 
    24, 29, '2018-09-26 14:09:07', '0.00'
  ), 
  (
    '0b0ebedc-c640-4d97-90a6-9cad64e78f9e', 
    24, 170, '2021-01-04 19:52:42', '93.00'
  ), 
  (
    '152c76da-f3ec-44cb-8e16-b24a8f97f9e7', 
    24, 48, '2019-01-11 20:13:44', '100.00'
  ), 
  (
    '19320ca3-997f-4696-9402-d00746507c87', 
    24, 29, '2018-11-29 14:33:58', '88.89'
  ), 
  (
    '1e7a1594-5c63-48ca-9717-35f058b76c80', 
    24, 29, '2019-05-27 14:03:33', '72.73'
  ), 
  (
    '2001f8f0-d17f-4ecd-8feb-e4517cf3e708', 
    24, 32, '2018-11-29 14:32:37', '100.00'
  ), 
  (
    '287e23b3-bcdf-4a98-ac86-8aa4411d191a', 
    24, 724, '2022-02-15 19:19:12', '73.00'
  ), 
  (
    '29fbd162-57a1-47ba-befd-00d8e90a3fe7', 
    24, 519, '2021-11-04 19:33:22', '88.89'
  ), 
  (
    '2aa6d5e0-cfb5-424b-b1f9-4e142e8c9d9c', 
    24, 29, '2018-09-26 14:22:07', '0.00'
  ), 
  (
    '2ed3127a-f95b-4ed7-8072-e56b10dad864', 
    24, 729, '2022-09-13 19:29:10', '0.00'
  ), 
  (
    '2ee70c04-9785-463a-bfd0-779d45e9319f', 
    24, 724, '2022-02-15 17:47:04', '0.00'
  ), 
  (
    '30731861-0c4e-437a-a3af-0a4f228194ed', 
    24, 30, '2018-11-01 13:23:35', '0.00'
  ), 
  (
    '3419e88e-56a1-4272-91b8-fb1025d4518e', 
    24, 519, '2021-07-23 21:10:30', '0.00'
  ), 
  (
    '34888d18-95c7-44f7-a864-5f80abbacb72', 
    24, 732, '2022-03-16 18:39:30', '0.00'
  ), 
  (
    '3cec5b90-6fe5-409d-ad77-8fe705de9c24', 
    24, 162, '2019-09-22 10:41:11', '0.00'
  ), 
  (
    '3f6b8dc4-1511-439f-ac11-663b251b07e0', 
    24, 521, '2021-09-27 13:14:04', '0.00'
  ), 
  (
    '40e196a1-c48e-4196-9ebc-0b8829ec31de', 
    24, 48, '2019-01-11 19:46:17', '100.00'
  ), 
  (
    '41d6c28f-3d53-47b6-a5db-65068162d8b5', 
    24, 717, '2022-03-14 19:21:50', '0.00'
  ), 
  (
    '4286296a-1b7d-4b2d-a8ce-d97b0173c9f1', 
    24, 29, '2018-09-26 13:36:07', '87.50'
  ), 
  (
    '4a0acf0a-eafd-4b09-981b-bbc35f9444d6', 
    24, 48, '2019-01-11 20:12:14', '100.00'
  ), 
  (
    '508f9528-8570-49ea-971f-2783ecda87e3', 
    24, 724, '2022-02-15 16:43:39', '0.00'
  ), 
  (
    '6130dc7b-b55d-42b9-8aa9-288c068fb5e8', 
    24, 724, '2022-02-15 16:47:39', '0.00'
  ), 
  (
    '6264137e-822c-4b4f-abc1-8d099b1bb0a5', 
    24, 29, '2018-12-03 20:37:50', '0.00'
  ), 
  (
    '636ebc8e-db11-4bc8-8605-732c88a3f209', 
    24, 724, '2022-02-15 19:09:37', '0.00'
  ), 
  (
    '6820915d-a4c4-463e-aeaa-fe9f048a2f46', 
    24, 230, '2020-02-04 00:43:29', '62.50'
  ), 
  (
    '6b8dcb63-626f-4f61-b74a-fdbe76c7af4c', 
    24, 717, '2022-03-14 19:16:52', '0.00'
  ), 
  (
    '6ba57e47-d4ca-4f21-bb12-20058f72ff11', 
    24, 742, '2022-07-06 17:43:19', '0.00'
  ), 
  (
    '7087eb2a-5477-4941-b8c0-6fc18311a672', 
    24, 724, '2022-02-15 19:13:35', '0.00'
  ), 
  (
    '7520fd2e-df77-41ff-9cb1-b3d8d0608c33', 
    24, 230, '2022-03-14 19:24:05', '37.50'
  ), 
  (
    '7ef687a8-6678-428f-b615-11f779ae155f', 
    24, 724, '2022-02-15 16:44:18', '0.00'
  ), 
  (
    '843f175e-9d5b-41e7-b32b-0448df691b73', 
    24, 211, '2021-08-12 20:59:20', '100.00'
  ), 
  (
    '8ed865f4-64eb-4150-a0ff-b885a3ae5bf3', 
    24, 567, '2021-06-04 14:08:26', '80.00'
  ), 
  (
    '947896a3-6f5c-48e8-9877-2a7c14e70ca0', 
    24, 724, '2022-02-15 20:51:11', '0.00'
  ), 
  (
    '95c647af-ff0a-495e-bf1e-78f89222306f', 
    24, 717, '2022-03-14 19:17:26', '100.00'
  ), 
  (
    '99e5777f-7f6c-493a-b635-763d5b175ac0', 
    24, 724, '2022-02-15 16:17:08', '0.00'
  ), 
  (
    '9c8ba2db-b02d-4bff-b516-bb7aaeb726d8', 
    24, 724, '2022-02-15 19:14:39', '0.00'
  ), 
  (
    'b3178efc-5823-42cf-aa07-9f5e8e2db002', 
    24, 732, '2022-03-16 18:26:30', '0.00'
  ), 
  (
    'bda4cccb-e75e-4b7c-8fe2-057b2e0a95c6', 
    24, 724, '2022-02-15 16:06:53', '0.00'
  ), 
  (
    'c234db3b-6023-4ee0-9b3f-b4745326e35f', 
    24, 211, '2020-10-07 01:59:29', '83.33'
  ), 
  (
    'cbacc24d-ea84-4e60-b1f9-6198a7b79501', 
    24, 732, '2022-03-16 18:40:40', '100.00'
  ), 
  (
    'cde6ef4a-ef3f-4fc6-8e6b-341877dd0158', 
    24, 48, '2019-01-11 19:43:09', '100.00'
  ), 
  (
    'd0461d61-1a10-4d1d-bc64-fc30517c520e', 
    24, 229, '2020-02-04 00:42:53', '0.00'
  ), 
  (
    'd225531c-dfd4-4aec-9586-36319cf4b31d', 
    24, 724, '2022-02-15 17:50:40', '0.00'
  ), 
  (
    'd95944c8-f62a-4faf-a45e-39fe31307a26', 
    24, 724, '2022-02-15 17:48:40', '0.00'
  ), 
  (
    'e1a94c22-858a-40ba-84e4-185b2efff0e4', 
    24, 732, '2022-03-16 18:27:12', '0.00'
  ), 
  (
    'e2eee6cd-e3f2-4bf8-a50c-5806389a5ffa', 
    24, 32, '2019-01-28 18:19:28', '100.00'
  ), 
  (
    'e3785d8b-23a8-4e14-97ca-dd2b443911b7', 
    24, 32, '2019-01-28 18:19:10', '0.00'
  ), 
  (
    'eee4c630-8ba0-4c12-be4b-26fdf3eb4d39', 
    24, 230, '2022-03-14 19:28:34', '87.50'
  ), 
  (
    'efc49a51-9afa-4bef-98c3-50921591f77e', 
    24, 732, '2022-03-16 18:30:47', '100.00'
  ), 
  (
    'f222aafd-4184-4e17-9cf8-75cf70810fe9', 
    24, 48, '2019-03-01 19:31:43', '0.00'
  ), 
  (
    'f7396e03-695a-4dcd-a630-629463bd8254', 
    24, 230, '2022-03-14 19:26:04', '37.50'
  ), 
  (
    'f801ae4d-5c21-42cd-b8fb-f2e262b3aa14', 
    24, 724, '2022-02-15 16:39:54', '0.00'
  ), 
  (
    'fd4acb63-17f3-4f02-803c-2de5e4907fbe', 
    24, 169, '2021-01-04 19:42:55', '100.00'
  )

The goal is to select a singular full row that contains the max grade. In other words, get the "play" that netted you your best grade for any step.

This query returns the right data set, but is not optimal; in a very large data set, it is taking upward of 3k seconds to net a result. It can be seen as a fiddle here as well: http://sqlfiddle.com/#!9/39fe80/2

SELECT 
  * 
FROM 
  plays p 
WHERE 
  p.user_id = 24 
  AND p.time_started BETWEEN '2020-08-01' 
  AND '2022-09-27' 
  AND (
    p.grade IN (
      SELECT 
        MAX(p2.grade) 
      FROM 
        plays p2 
      WHERE 
        p.user_id = p2.user_id 
        AND p.step_id = p2.step_id 
        AND (
          p2.time_started BETWEEN '2020-08-01' 
          AND '2022-09-27'
        )
    )
  ) 
GROUP BY 
  p.user_id, 
  p.step_id 
ORDER BY 
  p.time_started ASC

I've been trying to rewrite it as an inner join, http://sqlfiddle.com/#!9/39fe80/4, but am having trouble netting the matched result set.

SELECT 
  * 
FROM 
  plays p 
  INNER JOIN (
    SELECT 
      p2.id, 
      MAX(p2.grade) grade 
    FROM 
      plays p2 
    WHERE 
      p2.time_started BETWEEN '2020-08-01' 
      AND '2022-09-27' 
    GROUP BY 
      p2.user_id, 
      p2.step_id
  ) X ON X.id = p.id
ORDER BY 
  p.time_started

Any guidance you can offer would be appreciated!

Saeven
  • 2,280
  • 1
  • 20
  • 33
  • `X.id = p.id` should be `X.id = p.id AND x.grade = p.grade` – Barmar Sep 29 '22 at 20:58
  • Thanks @barmar if I do this, however, it removes necessary results. If you compare, you will see that step 732 disappears if I make this change. – Saeven Sep 29 '22 at 21:08
  • 1
    You should be selecting the same columns you group by, and comparing those columns. So `select p2.user_id, p2.step_id, MAX(p2.grade) grade` and then `ON x.user_id = p.user_id AND x.step_id = p.step_id AND x.grade = p.grade` – Barmar Sep 29 '22 at 22:13
  • Thanks for the added help, @Barmar. A fiddle applying that strategy brings us a bit closer, but yields duplicates in the result set http://sqlfiddle.com/#!9/39fe80/13 – Saeven Sep 29 '22 at 23:21
  • Can you add the expected result to the question? – Barmar Sep 29 '22 at 23:25
  • If the user has multiple rows with the max grade, you'll get all of them. You need some way to decide between them, such as the maximum `time_started`. – Barmar Sep 29 '22 at 23:26
  • "Groupwise-max", though possible, is clumsy in SQL. – Rick James Sep 30 '22 at 16:06

0 Answers0