-1

Select distinct users group by time range

how do i do the link above in google big query's version of SQL?

Update with details:

I have a table with the following info

 |day| user_id  

I want to calculate the number of distinct user_id's for a date:

  1. For that date
  2. For that week up to that date (Week to date)
  3. For the month up to that date (Month to date)

Example of input table:

 | day        | user_id    
 | 2013-01-01 | 1          
 | 2013-01-03 | 3          
 | 2013-01-06 | 4          
 | 2013-01-07 | 4          

Expected output:

 | day        | time_series | cnt        |                 
 | 2013-01-01 | D           | 1          |                 
 | 2013-01-01 | W           | 1          |                 
 | 2013-01-01 | M           | 1          |                 
 | 2013-01-03 | D           | 1          |                 
 | 2013-01-03 | W           | 2          |                 
 | 2013-01-03 | M           | 2          |                 
 | 2013-01-06 | D           | 1          |                 
 | 2013-01-06 | W           | 1          |                 
 | 2013-01-06 | M           | 3          |                 
 | 2013-01-07 | D           | 1          |                 
 | 2013-01-07 | W           | 1          |                 
 | 2013-01-07 | M           | 3          |

P.S. Similar question was ask for postgresql - but I need the version for BigQuery

Mikhail Berlyant
  • 165,386
  • 8
  • 154
  • 230
Alec
  • 21
  • 1

1 Answers1

0

Below is for BigQuery Standard SQL

Option #1

#standardSQL
WITH `project.dataset.table` AS (
  SELECT DATE '2013-01-01' day, 1 user_id UNION ALL
  SELECT '2013-01-03', 3 UNION ALL
  SELECT '2013-01-06', 4 UNION ALL
  SELECT '2013-01-07', 4 
)
SELECT day, 'D' series, COUNT(DISTINCT user_id) users 
FROM `project.dataset.table` GROUP BY day 
UNION ALL
SELECT DISTINCT day, 'W', (SELECT COUNT(DISTINCT id) FROM UNNEST(users) id) 
FROM (
  SELECT day,  ARRAY_AGG(user_id) OVER(PARTITION BY DATE_TRUNC(day, WEEK) ORDER BY day) users
  FROM `project.dataset.table`
)
UNION ALL
SELECT DISTINCT day, 'M', (SELECT COUNT(DISTINCT id) FROM UNNEST(users) id)
FROM (
  SELECT day,  ARRAY_AGG(user_id) OVER(PARTITION BY DATE_TRUNC(day, MONTH) ORDER BY day) users
  FROM `project.dataset.table`
)
ORDER BY day, CASE series WHEN 'D' THEN 1 WHEN 'W' THEN 2 ELSE 3 END

with result

Row day         series  users    
1   2013-01-01  D       1    
2   2013-01-01  W       1    
3   2013-01-01  M       1    
4   2013-01-03  D       1    
5   2013-01-03  W       2    
6   2013-01-03  M       2    
7   2013-01-06  D       1    
8   2013-01-06  W       1    
9   2013-01-06  M       3    
10  2013-01-07  D       1    
11  2013-01-07  W       1    
12  2013-01-07  M       3    

Option #2 - based on above version but combining three query into just one

#standardSQL
SELECT DISTINCT day, d_users,
  (SELECT COUNT(DISTINCT id) FROM UNNEST(w_users) id) w_users,
  (SELECT COUNT(DISTINCT id) FROM UNNEST(m_users) id) m_users
FROM (
  SELECT day,  
    COUNT(DISTINCT user_id) OVER(PARTITION BY day) d_users,
    ARRAY_AGG(user_id) OVER(PARTITION BY DATE_TRUNC(day, WEEK) ORDER BY day) w_users,
    ARRAY_AGG(user_id) OVER(PARTITION BY DATE_TRUNC(day, MONTH) ORDER BY day) m_users
  FROM `project.dataset.table`
)
ORDER BY day  

If to apply to same data - result is

Row day         d_users w_users m_users  
1   2013-01-01  1       1       1    
2   2013-01-03  1       2       2    
3   2013-01-06  1       1       3    
4   2013-01-07  1       1       3      

Option #3 - if for some reason you need unpivot / flatten result of option #2

#standardSQL
SELECT day, series, users
FROM (
  SELECT DISTINCT day, d_users,
    (SELECT COUNT(DISTINCT id) FROM UNNEST(w_users) id) w_users,
    (SELECT COUNT(DISTINCT id) FROM UNNEST(m_users) id) m_users
  FROM (
    SELECT day,  
      COUNT(DISTINCT user_id) OVER(PARTITION BY day) d_users,
      ARRAY_AGG(user_id) OVER(PARTITION BY DATE_TRUNC(day, WEEK) ORDER BY day) w_users,
      ARRAY_AGG(user_id) OVER(PARTITION BY DATE_TRUNC(day, MONTH) ORDER BY day) m_users
    FROM `project.dataset.table`
  )
), UNNEST([STRUCT('D' AS series, d_users AS users), ('W', w_users), ('M', m_users)]) 
ORDER BY day   

wich gives same result as in option #1

Mikhail Berlyant
  • 165,386
  • 8
  • 154
  • 230