I am using the below query to execute on AWS Athena. It gives error 'Query exhausted resources at this scale factor. '. In the query stats tab, I see that the data scanned is approximately 36 GB.
WITH session_dataset AS (
SELECT user_id,
max(medium) as medium,
max(event_date) as event_date,
session_id
FROM view_session
where date(event_date) <= date_add('day', - 1, current_date)
and date(event_date) >= date_add('day', - 90, current_date)
and category not in ('Offline Sources')
GROUP BY user_id,
session_id
),
user_conversion AS (
select user_id,
session_id,
name,
event_date,
has_crm,
customer_retention_type
from view_session
where cohort_type = 'conversion'
and name is not null
and date(event_date) <= date_add('day', - 1, current_date)
and date(event_date) >= date_add('day', - 90, current_date)
),
dataset_yesterday AS (
SELECT uc.user_id,
uc.name,
max(uc.has_crm) as has_crm,
max(uc.customer_retention_type) as customer_retention_type,
count(sd.session_id) as view_count,
date_diff(
'day',
date(min(sd.event_date)),
date(max(uc.event_date))
) AS days_convert,
array_agg(
sd.medium
order by sd.event_date,
split(sd.session_id, '_') [ 1 ] asc
) as medium_list
FROM session_dataset sd,
user_conversion uc
where date(sd.event_date) <= date(uc.event_date)
and date(sd.event_date) >= date_add('day', - 1, current_date)
and uc.user_id = sd.user_id
and split(uc.session_id, '_') [ 1 ] >= split(sd.session_id, '_') [ 1 ]
GROUP BY uc.user_id,
uc.session_id,
uc.name
),
dataset_week AS (
SELECT uc.user_id,
uc.name,
max(uc.has_crm) as has_crm,
max(uc.customer_retention_type) as customer_retention_type,
count(sd.session_id) as view_count,
date_diff(
'day',
date(min(sd.event_date)),
date(max(uc.event_date))
) AS days_convert,
array_agg(
sd.medium
order by sd.event_date,
split(sd.session_id, '_') [ 1 ] asc
) as medium_list
FROM session_dataset sd,
user_conversion uc
where date(sd.event_date) <= date(uc.event_date)
and date(sd.event_date) >= date_add('day', - 7, current_date)
and uc.user_id = sd.user_id
and split(uc.session_id, '_') [ 1 ] >= split(sd.session_id, '_') [ 1 ]
GROUP BY uc.user_id,
uc.session_id,
uc.name
),
dataset_month AS (
SELECT uc.user_id,
uc.name,
max(uc.has_crm) as has_crm,
max(uc.customer_retention_type) as customer_retention_type,
count(sd.session_id) as view_count,
date_diff(
'day',
date(min(sd.event_date)),
date(max(uc.event_date))
) AS days_convert,
array_agg(
sd.medium
order by sd.event_date,
split(sd.session_id, '_') [ 1 ] asc
) as medium_list
FROM session_dataset sd,
user_conversion uc
where date(sd.event_date) <= date(uc.event_date)
and date(sd.event_date) >= date_add('day', - 30, current_date)
and uc.user_id = sd.user_id
and split(uc.session_id, '_') [ 1 ] >= split(sd.session_id, '_') [ 1 ]
GROUP BY uc.user_id,
uc.session_id,
uc.name
),
dataset_quarter AS (
SELECT uc.user_id,
uc.name,
max(uc.has_crm) as has_crm,
max(uc.customer_retention_type) as customer_retention_type,
count(sd.session_id) as view_count,
date_diff(
'day',
date(min(sd.event_date)),
date(max(uc.event_date))
) AS days_convert,
array_agg(
sd.medium
order by sd.event_date,
split(sd.session_id, '_') [ 1 ] asc
) as medium_list
FROM session_dataset sd,
user_conversion uc
where date(sd.event_date) <= date(uc.event_date)
and date(sd.event_date) >= date_add('day', - 90, current_date)
and uc.user_id = sd.user_id
and split(uc.session_id, '_') [ 1 ] >= split(sd.session_id, '_') [ 1 ]
GROUP BY uc.user_id,
uc.session_id,
uc.name
)
select 'yesterday' as window,
name,
sum(days_convert) as days_convert,
count(name) as total_conversion,
sum(view_count) as total_view,
count(
distinct IF(has_crm = '1', user_id, NULL)
) AS customer_count,
count(distinct IF(has_crm != '1' or has_crm is null, user_id, NULL)) AS anonymous_customer_count,
count(
distinct IF(
lower(customer_retention_type) = 'returning',
user_id,
NULL
)
) AS returning_customer_count,
count(
distinct IF(
lower(customer_retention_type) = 'new',
user_id,
NULL
)
) AS new_customer_count,
medium_list [ 1 ] as first_click,
medium_list [ cardinality(medium_list) ] as last_click,
medium_list
from dataset_yesterday
group by name,
medium_list
union all
select 'month' as window,
name,
sum(days_convert) as days_convert,
count(name) as total_conversion,
sum(view_count) as total_view,
count(
distinct IF(has_crm = '1', user_id, NULL)
) AS customer_count,
count(distinct IF(has_crm != '1' or has_crm is null, user_id, NULL)) AS anonymous_customer_count,
count(
distinct IF(
lower(customer_retention_type) = 'returning',
user_id,
NULL
)
) AS returning_customer_count,
count(
distinct IF(
lower(customer_retention_type) = 'new',
user_id,
NULL
)
) AS new_customer_count,
medium_list [ 1 ] as first_click,
medium_list [ cardinality(medium_list) ] as last_click,
medium_list
from dataset_month
group by name,
medium_list
union all
union all
select 'quarter' as window,
name,
sum(days_convert) as days_convert,
count(name) as total_conversion,
sum(view_count) as total_view,
count(
distinct IF(has_crm = '1', user_id, NULL)
) AS customer_count,
count(distinct IF(has_crm != '1' or has_crm is null, user_id, NULL)) AS anonymous_customer_count,
count(
distinct IF(
lower(customer_retention_type) = 'returning',
user_id,
NULL
)
) AS returning_customer_count,
count(
distinct IF(
lower(customer_retention_type) = 'new',
user_id,
NULL
)
) AS new_customer_count,
medium_list [ 1 ] as first_click,
medium_list [ cardinality(medium_list) ] as last_click,
from dataset_quarter
group by name,
medium_list
I found similar queries in Stack Overflow. In one post, they asked to remove the order by clause.
How can I do that in the above query?