-1

What I'm trying to do in BigQuery using data from Google Analytics:

  1. get the list of product impressions and clicks from Google Analytics, together with additional data (productListName, productListPosition, Test etc.) - aka "listTable"

  2. get the list of transactions, that are associated with those clicks - aka "transTable"

  3. populate the missing field values in "transTable" with values from "listTable"
    -- Test

    -- productListName

    -- productListPosition

    -- Sponsored

    -- searchResultType

  4. Match those missing values by a combination of date, fullVisitorID, visitID (I assume those are how you define sessionID) and productSKU.

  5. Given that there may be multiple matches, I'd take the last value (ordered by hitNumber)

  6. Combine "listTable" and "transTable"

I'm having a problem with steps 3 to 5, so I've set the required values as NULL for now in the "transTable".

Here is my code so far:

WITH
  listTable AS (
  SELECT
    *
  FROM (
    SELECT
      date,
      fullVisitorID,
      visitID,
      (
      SELECT
        value
      FROM
        UNNEST(hits.customdimensions)
      WHERE
        INDEX = 58
      GROUP BY
        value ) AS Test,
      hits.hitNumber,
      CASE
        WHEN product.isImpression = TRUE THEN "Impression"
        WHEN product.isClick = TRUE THEN "Click"
    END
      AS Action,
      productListName,
      product.productListPosition AS productListPosition,
      (
      SELECT
        value
      FROM
        UNNEST(product.customdimensions)
      WHERE
        INDEX = 68
      GROUP BY
        value ) AS Sponsored,
      (
      SELECT
        value
      FROM
        UNNEST(product.customdimensions)
      WHERE
        INDEX = 76
      GROUP BY
        value ) AS searchResultType,
      product.productSKU,
      product.productPrice / 1000000 AS Price
    FROM
      `ga_sessions_20230603`,
      UNNEST (hits) AS hits,
      UNNEST (hits.product) AS product
    WHERE
      productListName IN ( "searchpage",
        "categorypage",
        "dropdown_initial",
        "dropdown_results" )
      AND ( product.isImpression = TRUE
        OR product.isClick = TRUE ) )
  ORDER BY
    date,
    fullVisitorID ASC,
    visitID ASC,
    hitNumber ASC,
    productListPosition ASC ),

  transTable AS (
  SELECT
    date,
    fullVisitorID,
    visitID,
    NULL AS Test,
    hits.hitNumber,
    CASE
      WHEN hits.eCommerceAction.action_type = "6" THEN "Purchase"
  END
    AS Action,
    NULL AS productListName,
    NULL AS productListPosition,
    NULL AS Sponsored,
    NULL AS searchResultType,
    product.productSKU,
    product.productPrice / 1000000 AS Price
  FROM
    `ga_sessions_20230603`,
    UNNEST (hits) AS hits,
    UNNEST (hits.product) AS product
  WHERE
    hits.eCommerceAction.action_type = "6"
    AND CONCAT(date, fullVisitorID, visitID) IN (
    SELECT
      DISTINCT CONCAT(date, fullVisitorID, visitID)
    FROM
      listTable
    WHERE
      Action = "Click" )
  ORDER BY
    date,
    fullVisitorID ASC,
    visitID ASC,
    hitNumber ASC )

SELECT
  *
FROM
  listTable
UNION ALL
SELECT
  *
FROM
  transTable

I think I can use LAST_VALUE(), but can't figure out how to use it properly - it seems similar to the problem I've had earlier: One of the previous row value with condition

Not sure if I can use LAG() here, after combining tables.

Could you help me out, please?

Yaro
  • 19
  • 4

1 Answers1

1

Eventually, I've solved my problem exactly like here: One of the previous row value with condition

WITH
  listTable AS (
  SELECT
    *
  FROM (
    SELECT
      date,
      fullVisitorID,
      visitID,
      (
      SELECT
        value
      FROM
        UNNEST(hits.customdimensions)
      WHERE
        INDEX = 58
      GROUP BY
        value ) AS Test,
      hits.hitNumber,
      CASE
        WHEN product.isImpression = TRUE THEN "Impression"
        WHEN product.isClick = TRUE THEN "Click"
    END
      AS Action,
      productListName,
      product.productListPosition AS productListPosition,
      (
      SELECT
        value
      FROM
        UNNEST(product.customdimensions)
      WHERE
        INDEX = 68
      GROUP BY
        value ) AS Sponsored,
      (
      SELECT
        value
      FROM
        UNNEST(product.customdimensions)
      WHERE
        INDEX = 76
      GROUP BY
        value ) AS searchResultType,
      product.productSKU,
      product.productPrice / 1000000 AS Price,
      CONCAT(date, fullVisitorID, visitID, productSKU) AS listKey
    FROM
      `gap2-144306.73525272.ga_sessions_20230603`,
      UNNEST (hits) AS hits,
      UNNEST (hits.product) AS product
    WHERE
      productListName IN ( "searchpage",
        "categorypage",
        "dropdown_initial",
        "dropdown_results" )
      AND ( product.isImpression = TRUE
        OR product.isClick = TRUE )
      AND (
      SELECT
        value
      FROM
        UNNEST(`gap2-144306.73525272.ga_sessions_20230603`.customDimensions)
      WHERE
        INDEX = 12
      GROUP BY
        value ) NOT IN ("business",
        "robot",
        "wholesale"))
  ORDER BY
    date,
    fullVisitorID ASC,
    visitID ASC,
    hitNumber ASC,
    productListPosition ASC ),


  ini_transTable AS (
  SELECT
    date,
    fullVisitorID,
    visitID,
    "NULL" AS Test,
    hits.hitNumber,
    CASE
      WHEN hits.eCommerceAction.action_type = "6" THEN "Purchase"
  END
    AS Action,
    "NULL" AS productListName,
    NULL AS productListPosition,
    "NULL" AS Sponsored,
    "NULL" AS searchResultType,
    product.productSKU,
    product.productPrice / 1000000 AS Price,
    CONCAT(date, fullVisitorID, visitID, productSKU) AS transKey
  FROM
    `gap2-144306.73525272.ga_sessions_20230603`,
    UNNEST (hits) AS hits,
    UNNEST (hits.product) AS product
  WHERE
    hits.eCommerceAction.action_type = "6"
    AND CONCAT(date, fullVisitorID, visitID, productSKU) IN (
    SELECT
      DISTINCT listKey
    FROM
      listTable
    WHERE
      Action = "Click" )
  ORDER BY
    date,
    fullVisitorID ASC,
    visitID ASC,
    hitNumber ASC ),


  ini_uTable AS (
  SELECT
    *
  FROM
    listTable
  UNION ALL
  SELECT
    *
  FROM
    ini_transTable
  ORDER BY
    date,
    fullVisitorID ASC,
    visitID ASC,
    hitNumber ASC),

    
  uTable AS (
  SELECT
    date,
    fullVisitorID,
    visitID,
  IF
    (Action = "Purchase", LAST_VALUE(
      IF
        (Action != "Purchase", Test, NULL) IGNORE NULLS) OVER (PARTITION BY listKey ORDER BY hitNumber ASC), Test) AS Test,
    hitNumber,
    Action,
  IF
    (Action = "Purchase", LAST_VALUE(
      IF
        (Action != "Purchase", productListName, NULL) IGNORE NULLS) OVER (PARTITION BY listKey ORDER BY hitNumber ASC), productListName) AS productListName,
  IF
    (Action = "Purchase", LAST_VALUE(
      IF
        (Action != "Purchase", productListPosition, NULL) IGNORE NULLS) OVER (PARTITION BY listKey ORDER BY hitNumber ASC), productListPosition) AS productListPosition,
  IF
    (Action = "Purchase", LAST_VALUE(
      IF
        (Action != "Purchase", Sponsored, NULL) IGNORE NULLS) OVER (PARTITION BY listKey ORDER BY hitNumber ASC), Sponsored) AS Sponsored,
  IF
    (Action = "Purchase", LAST_VALUE(
      IF
        (Action != "Purchase", searchResultType, NULL) IGNORE NULLS) OVER (PARTITION BY listKey ORDER BY hitNumber ASC), searchResultType) AS searchResultType,
    productSKU,
    Price,
    listKey
  FROM
    ini_uTable
  ORDER BY
    date,
    fullVisitorID ASC,
    visitID ASC,
    hitNumber ASC )

But I'm pretty sure there is a more elegant solution than my brute-forcing through the problem. I'll appreciate any suggestions.

Yaro
  • 19
  • 4