0

I use hive 1.2.1. and The default block size is set to 128MB. And the file in hdfs is saved in orc format.

There are too many small files, so I gave the hive.file.merge option, but it doesn't seem to work. I would appreciate it if you could tell me the reason.

1. Setting Properties

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict ;
set hive.exec.max.dynamic.partitions=1000 ;
set hive.exec.max.dynamic.partitions.pernode=1000;

set hive.merge.mapredfiles=true;
set hive.merge.mapfiles=true; 
set hive.merge.smallfiles.avgsize=268435456;
set hive.merge.size.per.task=536870912 ;

set hive.exec.compress.output=true;
set hive.exec.compress.intermediate=true;
set hive.intermediate.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set hive.intermediate.compression.type=BLOCK;

set hive.execution.engine=mr;

2. Table Create DDL

CREATE EXTERNAL TABLE `temp.test_dst`(
  `colA` string COMMENT ' ', 
  `colB` string COMMENT ' ', 
  `colC` string COMMENT ' ', 
  `date` string COMMENT ' ', 
PARTITIONED BY (  
  `date` string COMMENT ' ')
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
TBLPROPERTIES (  
  'orc.compress'='ZLIB');

3. Insert Data

Insert overwrite table temp.test_dst partition(date) 
select colA, colB, colC, date from temp.src_table
where date = '2021-11-01' 
cluster by colA, colB;

4. Result

319.0 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000014_0
254.2 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000034_0
253.0 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000053_0
252.9 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000054_0
252.9 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000055_0
252.9 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000056_0
170.1 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000088_0
157.7 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000093_0
148.1 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000099_0
130.7 M  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000104_0
85.9 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000126_0
83.2 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000128_0
49.0 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000153_0
48.3 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000155_0
34.6 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000166_0
26.5 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000177_0
26.0 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000178_0
22.8 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000181_0
18.5 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000185_0
17.9 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000187_0
13.9 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000190_0
10.5 M   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000191_0
9.3 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000194_0
5.5 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000200_0
5.3 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000202_0
4.9 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000203_0
4.1 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000205_0
3.8 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000206_0
2.3 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000210_0
1.9 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000211_0
1.9 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000212_0
1.8 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000213_0
1.5 M    hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000214_0
605.2 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000216_0
568.5 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000217_0
461.5 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000218_0
459.6 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000219_0
237.2 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000220_0
200.0 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000221_0
162.2 K  hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000222_0
92.3 K   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000223_0
74.1 K   hdfs://user/hive/warehouse/temp.db/test_dst/date=2021-11-01/000224_0
alwaysnoob
  • 33
  • 4

0 Answers0