1

I have a source table employee_source and a target partitioned table employee_partitioned. The employee_source is replenished every day by new data and I want to transport the delta to target employee_partitioned every day with cleaning by id at the same date and updating record if the identifiers will be equal in records from different dates. Could you help me to write the correct insert to employee_partitioned

-- init
drop table if exists employee_source;
create table if not exists employee_source
(
    employee_id  string,
    name         string,
    process_date string
);

drop table if exists employee_partitioned;
create table employee_partitioned
(
    employee_id string,
    name        string
) partitioned by (process_date string);

First inserting

insert into table employee_source
values
       ('100', 'Michael', '2020-03-12'),
       ('100', 'Michael', '2020-03-12'),
       ('101', 'Will', '2020-03-12');

First cleaning and transport

insert into table employee_partitioned partition (process_date)
select employee_id, name, process_date
from employee_source eh
where process_date = '2020-03-12';

Second inserting

insert into table employee_source
values ('101', 'Arnold', '2020-03-13'),
       ('102', 'Steven', '2020-03-13'),
       ('103', 'Lucy', '2020-03-13');

Second cleaning and transport

insert into table employee_partitioned partition (process_date)
select employee_id, name, process_date
from employee_source eh
where process_date = '2020-03-13';


select *
from employee_partitioned;

The expected result is

-- 100     Michael 2020-03-12
-- 101     Arnold  2020-03-13
-- 102     Steven  2020-03-13
-- 103     Lucy    2020-03-13
Michael
  • 27
  • 1
  • 3

0 Answers0