I am trying figure out to create the best data model in general for the long term and in the short term support analysis of understanding what drop offs in activity between events (ie 30 users made it to enter their username, 29 entered password and 28 clicked login).
I you can copy my code to this sql repl https://replit.com/languages/sqlite.
I have the query below to create the table with mock data and the last query is my attempt at capturing how many events occurred at each 'step' I define, but I feel like my query is just off and would love a second opinion on where to improve.
CREATE TABLE events (
event_name ,
user_id ,
original_timestamp
) ;
insert into events(event_name, user_id,original_timestamp )
values
('ACTION_b','084f907b-4482-4195-b0f09-bb4cdda7fdd85da', '9/2/2022 19:15:00'),
('ACTION_a','5129d71e-c8dc-4d68-9ff3a-b974ddad76ad658', '9/2/2022 19:13:40'),
('ACTION_c','6722c2bb-edad-4c69-a7fff-117addaa5c6fafd', '9/2/2022 19:12:08'),
('ACTION_a','6722c2bb-edad-4c69-a7fff-117addaa5c6fafd', '9/2/2022 19:09:38'),
('ACTION_d','3eaf5f91-67ee-4566-84f90-b74cdda6db3330f', '9/2/2022 19:07:01'),
('ACTION_d','9b1f6b84-149f-42b5-9ff08-db63dda461baeed', '9/2/2022 19:06:57'),
('ACTION_f','3eaf5f91-67ee-4566-84f90-b74cdda6db3330f', '9/2/2022 19:06:18'),
('ACTION_e','e3ca0103-29b5-40d0-89f62-2f5ddda45b75093', '9/2/2022 19:01:05'),
('ACTION_a','6722c2bb-edad-4c69-a7fff-117addaa5c6fafd', '9/2/2022 18:53:44'),
('ACTION_a','e2bd2b3b-29f8-4a69-aefd7-b84adda6cdedf4e', '9/2/2022 18:51:47'),
('ACTION_e','3c18859e-da54-4226-b5f29-57dbddac4d7f326', '9/2/2022 18:51:15'),
('ACTION_a','0f5a0630-ad11-4a92-89fe6-fcfedda0962ea14', '9/2/2022 18:49:30'),
('ACTION_d','6c17787c-3c3f-4b78-b2f0d-3191dda5ead3cf4', '9/2/2022 18:48:28'),
('ACTION_a','3c18859e-da54-4226-b5f29-57dbddac4d7f326', '9/2/2022 18:47:01'),
('ACTION_a','01bdfc88-c025-4415-89fb0-13cadda7289a0e7', '9/2/2022 18:46:15'),
('ACTION_d','01bdfc88-c025-4415-89fb0-13cadda7289a0e7', '9/2/2022 18:45:58'),
('ACTION_f','01bdfc88-c025-4415-89fb0-13cadda7289a0e7', '9/2/2022 18:45:43'),
('ACTION_a','01bdfc88-c025-4415-89fb0-13cadda7289a0e7', '9/2/2022 18:45:22'),
('ACTION_d','01bdfc88-c025-4415-89fb0-13cadda7289a0e7', '9/2/2022 18:45:10'),
('ACTION_a','9712d934-cbb0-46a4-8cf03-df02dda45922d1e', '9/2/2022 18:44:53'),
('ACTION_a','4b4ffe18-ba93-49ed-a4fcd-73d4dda04518195', '9/2/2022 18:42:21'),
('ACTION_a','4b4ffe18-ba93-49ed-a4fcd-73d4dda04518195', '9/2/2022 18:42:12'),
('ACTION_d','0f5a0630-ad11-4a92-89fe6-fcfedda0962ea14', '9/2/2022 18:37:19'),
('ACTION_a','8a5634bd-b6b1-4d58-a7fe9-f93fddafa02aff6', '9/2/2022 18:33:30'),
('ACTION_a','f42aec00-e4b1-4376-bbf44-5e97dda2d2633df', '9/2/2022 18:32:41'),
('ACTION_c','d034d8e0-08da-492f-adf99-af78ddaf8499668', '9/2/2022 18:26:17'),
('ACTION_c','38b9888e-bbb4-4332-8cf6e-cb62ddaf6d93ca1', '9/2/2022 18:24:25'),
('ACTION_a','a3a04e39-cb72-433c-befc8-fb1ddda9591509e', '9/2/2022 18:23:54'),
('ACTION_a','a3a04e39-cb72-433c-befc8-fb1ddda9591509e', '9/2/2022 18:23:35'),
('ACTION_c','4b4ffe18-ba93-49ed-a4fcd-73d4dda04518195', '9/2/2022 18:22:36'),
('ACTION_a','8ac00e6a-f463-45fc-9efa7-5823dda06c29659', '9/2/2022 18:16:45'),
('ACTION_a','a287e339-cd04-48e4-89fc3-5cd0dda83b16683', '9/2/2022 18:16:42'),
('ACTION_a','420f5e07-2f3f-45c3-b4f10-ab9cdda8d9f8bb1', '9/2/2022 18:14:53'),
('ACTION_c','420f5e07-2f3f-45c3-b4f10-ab9cdda8d9f8bb1', '9/2/2022 18:14:38'),
('ACTION_g','8c44d559-82c6-408d-87f53-28bdddab01d999a', '9/2/2022 18:12:56'),
('ACTION_f','8c44d559-82c6-408d-87f53-28bdddab01d999a', '9/2/2022 18:12:38'),
('ACTION_d','83ba2d64-5158-4508-85fa2-e279dda305bf55b', '9/2/2022 18:10:35'),
('ACTION_a','83ba2d64-5158-4508-85fa2-e279dda305bf55b', '9/2/2022 18:09:56'),
('ACTION_a','069f8bdd-7968-4d69-9afee-a7f7ddafbb16959', '9/2/2022 18:08:56');
with create_flow as (select user_id
from events e
where e.event_name = 'ACTION_a'
),
choose_ as (
select
e.user_id
from create_flow v
inner join events e on e.user_id = v.user_id
where e.event_name in ( 'ACTION_b',
'ACTION_c',
'ACTION_d' )
),
site_details as (
select
distinct e.user_id
from choose_ s --
inner join events e on e.user_id = s.user_id
where e.event_name in
(
'ACTION_e' --
)
),
-- site created now
site_advance as (
select
distinct e.user_id
from site_details a --
inner join events e on e.user_id = a.user_id
where e.event_name = 'ACTION_f'
),
visit_site as (
select
distinct e.user_id
from site_advance a --
inner join events e on e.user_id = a.user_id
where e.event_name = 'ACTION_g'
)
select 'CREATE' as step, COUNT(*) c from create_flow
union all--
select 'THIS' as step, COUNT(*) c from choose_
union all
select 'FLOW' as step, COUNT(*) c from site_details
union all
select 'OF' as step, COUNT(*) c from site_advance
union all
select 'EVENTS' as step, COUNT(*) c from visit_site
order by c desc;