6

I'm trying to convert the following Oracle query into Postgres

select
        this_.GLOBAL_TRANSACTION_ID as y0_,
        this_.BUSINESS_IDENTIFIER as y1_,
        this_.ENVIRONMENT as y2_,
        count(*) as y3_,
        this_.HOST_NAME as y4_,
        listagg(process,
        ', ') within
    group (order by
        date_time) as process,
        min(this_.DATE_TIME) as y6_,
        max(this_.DATE_TIME) as y7_,
        max(status)keep(dense_rank last
    order by
        date_time,
        decode(status,
        'COMPLETED',
        'd',
        'FAILED',
        'c',
        'TERMINATED',
        'b',
        'STARTED',
        'a',
        'z')) as status
    from
        ACTIVITY_MONITOR_TRANSACTION this_
    where
        this_.DATE_TIME between ? and ?
        and 1=1
    group by
        this_.GLOBAL_TRANSACTION_ID,
        this_.BUSINESS_IDENTIFIER,
        this_.ENVIRONMENT,
        this_.HOST_NAME,
        global_transaction_id,
        business_identifier,
        global_transaction_id,
        business_identifier
    order by
        y7_ asc

the problem is I don't know how to convert this block:

max(status)keep(dense_rank last
    order by
        date_time,
        decode(status,
        'COMPLETED',
        'd',
        'FAILED',
        'c',
        'TERMINATED',
        'b',
        'STARTED',
        'a',
        'z')) as status

The aim of this block is to get the latest status, and in case of exact same time (it is possible!) assign the status following the order above.

This is an example of data:
      ID        DATA_TIME          GLOBAL_TRANSACTION_ID   STATUS
===================================================================
 54938456;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"STARTED"
 54938505;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"COMPLETED"
 54938507;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"FAILED"
 54938507;"2015-04-20 09:38:25";"8d276718-eca7-4fd0-a266 ;"FAILED"

The status should be "COMPLETED" so my query should return, among other rows, the following:

 GLOBAL_TRANSACTION_ID    COUNT    (...)       STATUS
=====================================================
 8d276718-eca7-4fd0-a266    4      (...)     COMPLETED

I have tried splitting the query into 2:

select
    this_.GLOBAL_TRANSACTION_ID as y0_,
    this_.BUSINESS_IDENTIFIER as y1_,
    this_.ENVIRONMENT as y2_,
    count(*) as y3_,
    this_.HOST_NAME as y4_,
    array_to_string(array_agg(distinct process),
    ',') as process,
    min(this_.DATE_TIME) as y6_,
    max(this_.DATE_TIME) as y7_,
    max(this_.STATUS) as y8_
from
    ACTIVITY_MONITOR_TRANSACTION this_
where
    this_.DATE_TIME between ? and ?
group by
    this_.GLOBAL_TRANSACTION_ID,
    this_.BUSINESS_IDENTIFIER,
    this_.ENVIRONMENT,
    this_.HOST_NAME,
    global_transaction_id,
    business_identifier
order by
    y7_ desc limit ?

and then

select
    status
from
    activity_monitor_transaction
where
    GLOBAL_TRANSACTION_ID=?
order by
    date_time DESC,
    CASE status
        WHEN 'COMPLETED'THEN 'd'
        WHEN 'FAILED' THEN 'c'
        WHEN 'TERMINATED' THEN 'b'
        WHEN 'STARTED' THEN 'a'
        ELSE 'z'
    END DESC LIMIT 1

But this is causing me performance issues as I have to execute the second query once per row.

This is the table script for postgres:

CREATE TABLE activity_monitor_transaction
(
  id numeric(11,0) NOT NULL,
  date_time timestamp(6) without time zone NOT NULL,
  global_transaction_id character varying(40) NOT NULL,
  repost_flag character(1) NOT NULL DEFAULT 'N'::bpchar,
  environment character varying(20),
  transaction_mode character varying(20),
  status character varying(20),
  step character varying(80),
  event character varying(20),
  event_code character varying(20),
  event_subcode character varying(20),
  summary character varying(200),
  business_identifier character varying(80),
  alternate_business_identifier character varying(80),
  domain character varying(20),
  process character varying(80),
  service_name character varying(80),
  service_version character varying(20),
  detail text,
  app_name character varying(80),
  app_user character varying(20),
  host_name character varying(80),
  thread_name character varying(200),
  CONSTRAINT activity_monitor_transact_pk PRIMARY KEY (id)
  USING INDEX TABLESPACE actmon_data
)

and some data:

insert into ACTIVITY_MONITOR_TRANSACTION values 
(54938456,'2015-04-20 09:39:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','STARTED','servicereq.p2p.rso.blaze.dedup.in.channel','PROCESS','','','','3100729','51174628','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');

insert into ACTIVITY_MONITOR_TRANSACTION values 
(54938505,'2015-04-20 09:45:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','COMPLETED','servicereq.p2p.rso.blaze.service.out.channel','PROCESS','','','','3100729','51174628','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');

insert into ACTIVITY_MONITOR_TRANSACTION values 
(54938507,'2015-04-20 09:45:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','FAILED','inputChannel','PROCESS','','','','3100729','','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');

Is there any way to mimic the keep dense_rank block into postgres in order to have just one query?

jvaello
  • 63
  • 1
  • 5
  • I believe folks who would want to help you, might need your table definition and the data, i.e. the table creation scripts and the insert statements. I have upvoted your question only because you have pointed out the portion where you need help. Now, please post the required details as I have suggested. – Lalit Kumar B Apr 20 '15 at 14:27
  • Added, hope that helps! – jvaello Apr 20 '15 at 14:52
  • No. Sorry, but, that won't help. Just think, If I would give you the same, how would you create a table out of it? You need to reverse engineer, write your own create and insert statements. Why not provide the create and insert statements. I have tried my best to help you about how to ask a question, I hope you get your desired solution. – Lalit Kumar B Apr 20 '15 at 14:56
  • 1
    Have you tried to use something like `first_value(status) OVER (partition by global_transaction_id order by date_time, CASE ... END)` ? – Ihor Romanchenko Apr 20 '15 at 15:49
  • Thanks Igor. It does not work because I would need to group by date_time. I would like to have the rows grouped by the other columns, so that rows with different date_times will be grouped, and the "count" will show the number of rows grouped. See the example above (I have edited) – jvaello Apr 21 '15 at 07:47

2 Answers2

4

You can use PostgreSQL WINDOW FUNCTIONS

-- we only added infos to the activity_monitor_transaction
-- we are free to group by date_time or status
SELECT
  first_value(status) OVER w AS global_transaction_status,
  count(*) OVER w AS global_transaction_count,
  activity_monitor_transaction.*
FROM
  activity_monitor_transaction
WINDOW w AS (
  PARTITION BY global_transaction_id
  ORDER BY date_time DESC, id DESC
  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
Clément Prévost
  • 8,000
  • 2
  • 36
  • 51
  • 2
    although in this case you will also need to have some way to de -dup because the window is not the same as `group by` - it will return one row per record in the activity table rather than a single row – wrschneider Jul 05 '18 at 15:22
1

Sample code

CREATE TABLE prices
( 
 ticker VARCHAR2(3),
 pdate DATE,
 price FLOAT
);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-22', 55.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-22', 56.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-25', 57.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-25', 57.22);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-27', 58.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-11-29', 57.13);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-11-29', 57.20);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-22', 65.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-22', 65.12);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-25', 67.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-27', 68.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-22', 88.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-25', 88.33);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-25', 89.33);

Oracle code

SELECT 
  ticker,
  min(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as minfirstprice,
  max(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as maxfirstprice,
  min(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as minlastprice,
  max(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as maxlastprice,
  sum(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as sumfirstprice,
  sum(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as sumlastprice
FROM prices
GROUP BY ticker
ORDER BY ticker;

Postgresql code

with _prepare as (
    select ticker, pdate,
           min(price) over w as minprice,
           max(price) over w as maxprice,
           sum(price) over w as sumprice
    FROM prices
    WINDOW w AS (partition by ticker,pdate)    
)
SELECT distinct * from 
(
    select ticker,
           first_value(minprice) over w1 as minfirstprice,
           first_value(maxprice) over w1 as maxfirstprice,
           first_value(minprice) over w2 as minlastprice,
           first_value(maxprice) over w2 as maxlastprice,
           first_value(sumprice) over w1 as sumfirstprice,
           first_value(sumprice) over w2 as sumlastprice
    from _prepare
    WINDOW w1 AS (partition by ticker ORDER BY pdate), 
           w2 AS (partition by ticker ORDER BY pdate desc)
) t2
ORDER BY ticker;