I use hive 1.2.1. and The default block size is set to 128MB. And the file in hdfs is saved in orc format.
There are too many small files, so I gave the hive.file.merge option, but it doesn't seem to work. I would appreciate it if you could tell me the reason.
1. Setting Properties
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict ;
set hive.exec.max.dynamic.partitions=1000 ;
set hive.exec.max.dynamic.partitions.pernode=1000;
set hive.merge.mapredfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.smallfiles.avgsize=268435456;
set hive.merge.size.per.task=536870912 ;
set hive.exec.compress.output=true;
set hive.exec.compress.intermediate=true;
set hive.intermediate.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set hive.intermediate.compression.type=BLOCK;
set hive.execution.engine=mr;
2. Table Create DDL
CREATE EXTERNAL TABLE `temp.test_dst`(
`colA` string COMMENT ' ',
`colB` string COMMENT ' ',
`colC` string COMMENT ' ',
`date` string COMMENT ' ',
PARTITIONED BY (
`date` string COMMENT ' ')
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
TBLPROPERTIES (
'orc.compress'='ZLIB');
3. Insert Data
Insert overwrite table temp.test_dst partition(date)
select colA, colB, colC, date from temp.src_table
where date = '2021-11-01'
cluster by colA, colB;
4. Result
319.0 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000014_0
254.2 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000034_0
253.0 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000053_0
252.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000054_0
252.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000055_0
252.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000056_0
170.1 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000088_0
157.7 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000093_0
148.1 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000099_0
130.7 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000104_0
85.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000126_0
83.2 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000128_0
49.0 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000153_0
48.3 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000155_0
34.6 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000166_0
26.5 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000177_0
26.0 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000178_0
22.8 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000181_0
18.5 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000185_0
17.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000187_0
13.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000190_0
10.5 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000191_0
9.3 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000194_0
5.5 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000200_0
5.3 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000202_0
4.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000203_0
4.1 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000205_0
3.8 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000206_0
2.3 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000210_0
1.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000211_0
1.9 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000212_0
1.8 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000213_0
1.5 M hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000214_0
605.2 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000216_0
568.5 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000217_0
461.5 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000218_0
459.6 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000219_0
237.2 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000220_0
200.0 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000221_0
162.2 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000222_0
92.3 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000223_0
74.1 K hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000224_0