-1

I am extracting data from biqQuery using biqQuery API with python. Also Using the regular expression for filtering out the data.

regular expression: r".*https://my.magazine.com.*

Query with python as follows:

if __name__ == "__main__":
 
      start_date = "20230117"
      end_date = "20230117"
      url = re.sub(',', '', 'r".*https://my.magazine.com.*') # <-- how to declare it ???
      
      client = bigquery.Client()
      
      query = """
          SELECT
            event_date,
            COUNT(*) AS page_view,
            COUNT(DISTINCT user_pseudo_id) AS UU,
 
          FROM
            `analytics_111111.events_*`
 
          WHERE
            _TABLE_SUFFIX BETWEEN @start_date AND @end_date
            AND event_name IN ('page_view')
            AND REGEXP_CONTAINS((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), @url)
 
          GROUP BY 1;
 
      """
      job_config = bigquery.QueryJobConfig(
          query_parameters=[
                  bigquery.ScalarQueryParameter("start_date", "STRING", start_date),
                  bigquery.ScalarQueryParameter("end_date", "STRING", end_date),
                  bigquery.ScalarQueryParameter("url", "STRING", url),
          ]
      ) 
 
      query_job = client.query(query, job_config=job_config)  # Make an API request.
 
      df = query_job.to_dataframe()

I donot know how to pass exactly r".*https://my.magazine.com.* as a variable which is called url into the query and query_parameters. I want to pass it like end_date and start_date. Please help me. Thank you in advance.

M. ahmed
  • 53
  • 2
  • 11

2 Answers2

0

A quick workaround could be to format the string in python; like so:

if __name__ == "__main__":

  start_date = "20230117"
  end_date = "20230117"
  url = re.sub(',', '', 'r".*https://my.magazine.com.*') # <-- how to declare it ???
  
  client = bigquery.Client()
  
  query = f"""
      SELECT
        event_date,
        COUNT(*) AS page_view,
        COUNT(DISTINCT user_pseudo_id) AS UU,

      FROM
        `analytics_111111.events_*`

      WHERE
        _TABLE_SUFFIX BETWEEN @start_date AND @end_date
        AND event_name IN ('page_view')
        AND REGEXP_CONTAINS((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), {url})

      GROUP BY 1;

  """
  job_config = bigquery.QueryJobConfig(
      query_parameters=[
              bigquery.ScalarQueryParameter("start_date", "STRING", start_date),
              bigquery.ScalarQueryParameter("end_date", "STRING", end_date)
      ]
  ) 

  query_job = client.query(query, job_config=job_config)  # Make an API request.

  df = query_job.to_dataframe()

You will essentially get the same result instead of passing it as a QueryParameter.

0

Can you share the value of Query? Maybe you can try to replace the values in the Query directly using fstring instead of use job_config.

query = f"""
      SELECT
        event_date,
        COUNT(*) AS page_view,
        COUNT(DISTINCT user_pseudo_id) AS UU,

      FROM
        `analytics_111111.events_*`

      WHERE
        _TABLE_SUFFIX BETWEEN {start_date} AND {end_date}
        AND event_name IN ('page_view')
        AND REGEXP_CONTAINS((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), {url})

      GROUP BY 1;

  """