0

DB-Fiddle

CREATE TABLE sales (
    id SERIAL PRIMARY KEY,
    event_date DATE,
    customer VARCHAR,
    orderID VARCHAR,
    sales_volume DECIMAL
);

INSERT INTO sales
(event_date, customer, orderID, sales_volume)
VALUES 
('2020-01-08', 'Customer_A', 'Order_001', '130'),
('2020-01-12', 'Customer_A', 'Order_002', '120'),
('2020-01-18', 'Customer_A', 'Order_003', '115'),
('2020-01-22', 'Customer_C', 'Order_001', '300'),
('2020-01-23', 'Customer_C', 'Order_002', '500'),
('2020-01-24', 'Customer_D', 'Order_001', '400'),
('2020-01-26', 'Customer_E', 'Order_001', '200'),

('2020-04-08', 'Customer_B', 'Order_001', '325'),
('2020-04-12', 'Customer_B', 'Order_002', '875'),
('2020-04-15', 'Customer_B', 'Order_003', '910'),
('2020-04-20', 'Customer_B', 'Order_004', '723'),
('2020-04-30', 'Customer_C', 'Order_003', '665'),

('2020-06-01', 'Customer_B', 'Order_005', '982'),
('2020-06-15', 'Customer_B', 'Order_006', '100'),
('2020-06-19', 'Customer_C', 'Order_004', '250'),
('2020-06-20', 'Customer_C', 'Order_005', '322'),
('2020-06-30', 'Customer_E', 'Order_002', '924'),
('2020-06-25', 'Customer_A', 'Order_004', '445');

Expected Result:

customer     |   orderid     |      event_date    |     sales_volume
-------------|---------------|--------------------|----------------------
Customer_C   |   Order_001   |      2020-01-22    |       300      
Customer_C   |   Order_002   |      2020-01-23    |       500
Customer_C   |   Order_003   |      2020-04-30    |       665
Customer_C   |   Order_004   |      2020-06-19    |       250
Customer_C   |   Order_005   |      2020-06-20    |       322
-------------|---------------|--------------------|------------------------
Customer_E   |   Order_001   |      2020-01-26    |       200
Customer_E   |   Order_002   |      2020-06-30    |       924
-------------|---------------|--------------------|------------------------
Customer_A   |   Order_001   |      2020-01-08    |       130
Customer_A   |   Order_002   |      2020-01-12    |       120
Customer_A   |   Order_003   |      2020-01-18    |       115
Customer_A   |   Order_004   |      2020-06-25    |       445

I want to randomly extract 60% of all customers from the table with their corresponding data.

In the example above 60% means 3 customers from the total of 5 customers.
Those 3 customers should be selected randomly.

Do you have any idea if this is possible?
If yes, how do I need to modify the below query?

SELECT
s.customer,
s.orderID,
s.event_date,
SUM(s.sales_volume) AS sales_volume
FROM sales s
GROUP BY 1,2,3
ORDER BY 1,2,3;
Michi
  • 4,663
  • 6
  • 33
  • 83
  • https://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql , https://stackoverflow.com/questions/54149394/selecting-a-random-sample-from-a-view-in-postgresql – Mitch Wheat Apr 12 '21 at 14:54
  • Do you have a table with all the customers? –  Apr 12 '21 at 15:03

1 Answers1

0

Hmmm . . . this might be simplest by first selecting the customers and then the sales:

select s.*
from (select customer,
             ntile(5) over (order by random()) as tile
      from sales
      group by customer
     ) c join
     sales s
     on c.customer = s.customer
where tile <= 3;
Gordon Linoff
  • 1,242,037
  • 58
  • 646
  • 786