I am using multiple union all and then doing the sum of each column, but this query runs like forever. I have 96GB memory cluster. Please tell me what should i do for performance improvement. Following is my query in hive.
total as
(
select * from
(
select * from table1
union all
select * from table2
union all
select * from table3
union all
select * from table4
union all
select * from table5
union all
select * from table6
union all
select * from table7
union all
select * from table8
union all
select * from table9
)p
)
Select * from
(
select
sum(col_1),
sum(col_2),
sum(col_3),
sum(col_4),
sum(col_5),
sum(col_6),
sum(col_7),
sum(col_8),
sum(col_9),
sum(col_10)
from total
)q;