I need to break a Hive query grouped by an ID column over the quarters of calendar year 2018. Below is how I am currently going about it I would like another option to achieve the same result with fewer queries.
--Query 1 quarter 1 2018 plus three identical queries for Q2,Q3,Q4
Create TABLE Q12018 stored as ORC as
select
ID,
count(1) as cnt,
sum(revenue) as revenue,
sum( (CASE
WHEN condition1
THEN 1
ELSE 0 END)) as metric1,
sum( (CASE
WHEN condition2
THEN revenue
ELSE 0 END)) as metric2,
sum( (CASE
WHEN condition3
THEN 1
ELSE 0 END)) as metric3,
sum( (CASE
WHEN codition4
THEN revenue
ELSE 0 END)) as metric4
from mainTable
where month between 201801 and 201803
group by
ID;
--Query 2
Create TABLE combined2018 stored as ORC as
select * from Q12018
union all
select * from Q22018
union all
select * from Q32018
union all
select * from Q42018 ;
--Query 3
Create TABLE Agg2018 stored as ORC as
Select
ID,
Sum(cnt),
Sum(revenue),
Sum(metric1),
Sum(metric2),
sum(metric3),
sum(metric4)
from combined2018
group by ID