I am testing the Google cloud functions. The objective is to stream more than 10MB to into big query table. I have divided the data into chunks then use that chunk to load data to big query. Memory allocated is 2GB. However, it does not work.
from bigquery_uploader.datauploader import Datauploader, bq_streaming_cloud_function_table_schema
import sys
def upload_bigquery_chunks(data_list, bigquery_extractor, bq_table_id, chunk_mb=4):
def getChunks(data_list, n=20):
for i in range(0, len(data_list), n):
yield data_list[i: i + n]
avg_size_dd = sys.getsizeof(data_list) / len(data_list)
num_chunks = int(chunk_mb * (10 ** 6) / avg_size_dd)
data_chunks = getChunks(data_list, num_chunks)
for data in data_chunks:
print(sys.getsizeof(data))
bigquery_extractor.client.insert_rows_json(bq_table_id, data)
def bq_data_uploader_stream(request):
bq_project_id = 'my_project_id'
bq_dataset = 'my_dataset'
bq_table_id = "bq_streaming_cloud_function_table"
data_list = list()
# 1MB = 1 * (10**6)
MBs = 10
while sys.getsizeof(data_list) < MBs * (10 ** 6):
data_list.append({'campaign_name': 'Google Cloud Function', 'campaign_id': 123456789})
bq_extractor = Datauploader(bq_project_id)
bq_extractor.create_table(bq_dataset, bq_table_id, bq_streaming_cloud_function_table_schema)
bq_table_id = '{0}.{1}.{2}'.format(bq_project_id, bq_dataset, bq_table_id)
upload_bigquery_chunks(data_list, bq_extractor, bq_table_id)
I get the following Error.
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 346, in run_http_function
result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function
return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 210, in call_user_function
return self._user_function(request_or_event)
File "/user_code/main.py", line 34, in bq_data_uploader_stream
upload_bigquery_chunks(data_list, bq_extractor, bq_table_id)
File "/user_code/main.py", line 16, in upload_bigquery_chunks
bigquery_extractor.client.insert_rows_json(bq_table_id, data)
File "/env/local/lib/python3.7/site-packages/google/cloud/bigquery/client.py", line 2269, in insert_rows_json
retry, method="POST", path="%s/insertAll" % table.path, data=data
File "/env/local/lib/python3.7/site-packages/google/cloud/bigquery/client.py", line 476, in _call_api
return call()
File "/env/local/lib/python3.7/site-packages/google/api_core/retry.py", line 277, in retry_wrapped_func
on_error=on_error,
File "/env/local/lib/python3.7/site-packages/google/api_core/retry.py", line 182, in retry_target
return target()
File "/env/local/lib/python3.7/site-packages/google/cloud/_http.py", line 393, in api_request
raise exceptions.from_http_response(response)
google.api_core.exceptions.BadRequest: 400 POST https://bigquery.googleapis.com/bigquery/v2/projects/my_project_id/datasets/my_dataset/tables/bq_streaming_cloud_function_table/insertAll: Request payload size exceeds the limit: 10485760 bytes.