I'm trying to load a pickled pandas dataframe from Google Cloud Storage into App Engine.
I have been using blob.download_to_file() to read the bytestream into pandas, however I encounter the following error:
UnpicklingError: invalid load key, m
I have tried seeking to the beginning to no avail and am pretty sure something fundamental is missing from my understanding.
When attempting to pass an open file object and read from there, I get an
UnsupportedOperation: write
error
from io import BytesIO
from google.cloud import storage
def get_byte_fileobj(project, bucket, path) -> BytesIO:
blob = _get_blob(bucket, path, project)
byte_stream = BytesIO()
blob.download_to_file(byte_stream)
byte_stream.seek(0)
return(byte_stream)
def _get_blob(bucket_name, path, project):
credentials = service_account.Credentials.from_service_account_file(
service_account_credentials_path) if service_account_credentials_path else None
storage_client = storage.Client(project=project, credentials=credentials)
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(path)
return(blob)
fileobj = get_byte_fileobj(projectid, 'backups', 'Matches/Matches.pickle')
pd.read_pickle(fileobj)
Ideally pandas would just read from pickle since all of my GCS backups are in that format, but I'm open to suggestions.