I'm having a bit of a brain lapse ironing out a query as an inner join. The reason for the shift is performance, the "IN MAX" approach is too expensive - so it needs to be reworked.
I've attempted the solution presented at Retrieving the last record in each group - MySQL, however, applying those strategies results in a data set that is divergent from the valid-but-slow query presented below.
Assuming a table with this structure and data, which is a table that stores grades for plays by user:
CREATE TABLE IF NOT EXISTS `plays` (
`id` char(36) COLLATE utf8_unicode_ci NOT NULL COMMENT '(DC2Type:uuid)',
`user_id` int(10) unsigned NOT NULL,
`step_id` int(10) unsigned NOT NULL DEFAULT '0',
`time_started` datetime DEFAULT NULL COMMENT '(DC2Type:datetime_immutable)',
`grade` decimal(5, 2) DEFAULT '0.00',
PRIMARY KEY (`id`),
KEY `search_idx` (`user_id`, `step_id`),
KEY `grade_idx` (`grade`),
KEY `time_idx` (`time_started`),
KEY `user_idx` (`user_id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8 COLLATE = utf8_unicode_ci;
INSERT INTO `plays` (
`id`, `user_id`, `step_id`, `time_started`,
`grade`
)
VALUES
(
'05455f0f-eb7f-4563-b20b-0a78b3fb616f',
24, 717, '2022-03-14 19:22:51', '0.00'
),
(
'0adf9143-07fb-4ec6-9b00-0ff58c8cf4c7',
24, 29, '2018-09-26 14:09:07', '0.00'
),
(
'0b0ebedc-c640-4d97-90a6-9cad64e78f9e',
24, 170, '2021-01-04 19:52:42', '93.00'
),
(
'152c76da-f3ec-44cb-8e16-b24a8f97f9e7',
24, 48, '2019-01-11 20:13:44', '100.00'
),
(
'19320ca3-997f-4696-9402-d00746507c87',
24, 29, '2018-11-29 14:33:58', '88.89'
),
(
'1e7a1594-5c63-48ca-9717-35f058b76c80',
24, 29, '2019-05-27 14:03:33', '72.73'
),
(
'2001f8f0-d17f-4ecd-8feb-e4517cf3e708',
24, 32, '2018-11-29 14:32:37', '100.00'
),
(
'287e23b3-bcdf-4a98-ac86-8aa4411d191a',
24, 724, '2022-02-15 19:19:12', '73.00'
),
(
'29fbd162-57a1-47ba-befd-00d8e90a3fe7',
24, 519, '2021-11-04 19:33:22', '88.89'
),
(
'2aa6d5e0-cfb5-424b-b1f9-4e142e8c9d9c',
24, 29, '2018-09-26 14:22:07', '0.00'
),
(
'2ed3127a-f95b-4ed7-8072-e56b10dad864',
24, 729, '2022-09-13 19:29:10', '0.00'
),
(
'2ee70c04-9785-463a-bfd0-779d45e9319f',
24, 724, '2022-02-15 17:47:04', '0.00'
),
(
'30731861-0c4e-437a-a3af-0a4f228194ed',
24, 30, '2018-11-01 13:23:35', '0.00'
),
(
'3419e88e-56a1-4272-91b8-fb1025d4518e',
24, 519, '2021-07-23 21:10:30', '0.00'
),
(
'34888d18-95c7-44f7-a864-5f80abbacb72',
24, 732, '2022-03-16 18:39:30', '0.00'
),
(
'3cec5b90-6fe5-409d-ad77-8fe705de9c24',
24, 162, '2019-09-22 10:41:11', '0.00'
),
(
'3f6b8dc4-1511-439f-ac11-663b251b07e0',
24, 521, '2021-09-27 13:14:04', '0.00'
),
(
'40e196a1-c48e-4196-9ebc-0b8829ec31de',
24, 48, '2019-01-11 19:46:17', '100.00'
),
(
'41d6c28f-3d53-47b6-a5db-65068162d8b5',
24, 717, '2022-03-14 19:21:50', '0.00'
),
(
'4286296a-1b7d-4b2d-a8ce-d97b0173c9f1',
24, 29, '2018-09-26 13:36:07', '87.50'
),
(
'4a0acf0a-eafd-4b09-981b-bbc35f9444d6',
24, 48, '2019-01-11 20:12:14', '100.00'
),
(
'508f9528-8570-49ea-971f-2783ecda87e3',
24, 724, '2022-02-15 16:43:39', '0.00'
),
(
'6130dc7b-b55d-42b9-8aa9-288c068fb5e8',
24, 724, '2022-02-15 16:47:39', '0.00'
),
(
'6264137e-822c-4b4f-abc1-8d099b1bb0a5',
24, 29, '2018-12-03 20:37:50', '0.00'
),
(
'636ebc8e-db11-4bc8-8605-732c88a3f209',
24, 724, '2022-02-15 19:09:37', '0.00'
),
(
'6820915d-a4c4-463e-aeaa-fe9f048a2f46',
24, 230, '2020-02-04 00:43:29', '62.50'
),
(
'6b8dcb63-626f-4f61-b74a-fdbe76c7af4c',
24, 717, '2022-03-14 19:16:52', '0.00'
),
(
'6ba57e47-d4ca-4f21-bb12-20058f72ff11',
24, 742, '2022-07-06 17:43:19', '0.00'
),
(
'7087eb2a-5477-4941-b8c0-6fc18311a672',
24, 724, '2022-02-15 19:13:35', '0.00'
),
(
'7520fd2e-df77-41ff-9cb1-b3d8d0608c33',
24, 230, '2022-03-14 19:24:05', '37.50'
),
(
'7ef687a8-6678-428f-b615-11f779ae155f',
24, 724, '2022-02-15 16:44:18', '0.00'
),
(
'843f175e-9d5b-41e7-b32b-0448df691b73',
24, 211, '2021-08-12 20:59:20', '100.00'
),
(
'8ed865f4-64eb-4150-a0ff-b885a3ae5bf3',
24, 567, '2021-06-04 14:08:26', '80.00'
),
(
'947896a3-6f5c-48e8-9877-2a7c14e70ca0',
24, 724, '2022-02-15 20:51:11', '0.00'
),
(
'95c647af-ff0a-495e-bf1e-78f89222306f',
24, 717, '2022-03-14 19:17:26', '100.00'
),
(
'99e5777f-7f6c-493a-b635-763d5b175ac0',
24, 724, '2022-02-15 16:17:08', '0.00'
),
(
'9c8ba2db-b02d-4bff-b516-bb7aaeb726d8',
24, 724, '2022-02-15 19:14:39', '0.00'
),
(
'b3178efc-5823-42cf-aa07-9f5e8e2db002',
24, 732, '2022-03-16 18:26:30', '0.00'
),
(
'bda4cccb-e75e-4b7c-8fe2-057b2e0a95c6',
24, 724, '2022-02-15 16:06:53', '0.00'
),
(
'c234db3b-6023-4ee0-9b3f-b4745326e35f',
24, 211, '2020-10-07 01:59:29', '83.33'
),
(
'cbacc24d-ea84-4e60-b1f9-6198a7b79501',
24, 732, '2022-03-16 18:40:40', '100.00'
),
(
'cde6ef4a-ef3f-4fc6-8e6b-341877dd0158',
24, 48, '2019-01-11 19:43:09', '100.00'
),
(
'd0461d61-1a10-4d1d-bc64-fc30517c520e',
24, 229, '2020-02-04 00:42:53', '0.00'
),
(
'd225531c-dfd4-4aec-9586-36319cf4b31d',
24, 724, '2022-02-15 17:50:40', '0.00'
),
(
'd95944c8-f62a-4faf-a45e-39fe31307a26',
24, 724, '2022-02-15 17:48:40', '0.00'
),
(
'e1a94c22-858a-40ba-84e4-185b2efff0e4',
24, 732, '2022-03-16 18:27:12', '0.00'
),
(
'e2eee6cd-e3f2-4bf8-a50c-5806389a5ffa',
24, 32, '2019-01-28 18:19:28', '100.00'
),
(
'e3785d8b-23a8-4e14-97ca-dd2b443911b7',
24, 32, '2019-01-28 18:19:10', '0.00'
),
(
'eee4c630-8ba0-4c12-be4b-26fdf3eb4d39',
24, 230, '2022-03-14 19:28:34', '87.50'
),
(
'efc49a51-9afa-4bef-98c3-50921591f77e',
24, 732, '2022-03-16 18:30:47', '100.00'
),
(
'f222aafd-4184-4e17-9cf8-75cf70810fe9',
24, 48, '2019-03-01 19:31:43', '0.00'
),
(
'f7396e03-695a-4dcd-a630-629463bd8254',
24, 230, '2022-03-14 19:26:04', '37.50'
),
(
'f801ae4d-5c21-42cd-b8fb-f2e262b3aa14',
24, 724, '2022-02-15 16:39:54', '0.00'
),
(
'fd4acb63-17f3-4f02-803c-2de5e4907fbe',
24, 169, '2021-01-04 19:42:55', '100.00'
)
The goal is to select a singular full row that contains the max grade. In other words, get the "play" that netted you your best grade for any step.
This query returns the right data set, but is not optimal; in a very large data set, it is taking upward of 3k seconds to net a result. It can be seen as a fiddle here as well: http://sqlfiddle.com/#!9/39fe80/2
SELECT
*
FROM
plays p
WHERE
p.user_id = 24
AND p.time_started BETWEEN '2020-08-01'
AND '2022-09-27'
AND (
p.grade IN (
SELECT
MAX(p2.grade)
FROM
plays p2
WHERE
p.user_id = p2.user_id
AND p.step_id = p2.step_id
AND (
p2.time_started BETWEEN '2020-08-01'
AND '2022-09-27'
)
)
)
GROUP BY
p.user_id,
p.step_id
ORDER BY
p.time_started ASC
I've been trying to rewrite it as an inner join, http://sqlfiddle.com/#!9/39fe80/4, but am having trouble netting the matched result set.
SELECT
*
FROM
plays p
INNER JOIN (
SELECT
p2.id,
MAX(p2.grade) grade
FROM
plays p2
WHERE
p2.time_started BETWEEN '2020-08-01'
AND '2022-09-27'
GROUP BY
p2.user_id,
p2.step_id
) X ON X.id = p.id
ORDER BY
p.time_started
Any guidance you can offer would be appreciated!