i want to implement an aws lambda function that will execute the following python script:
directory = os.fsencode(directory_in_string)
def transform_csv(csv):
for file in os.listdir(directory):
filename = os.fsdecode(file)
d = open(r'C:\Users\r.reibold\Documents\GitHub\groovy_dynamodb_api\historische_wetterdaten\{}'.format(filename))
data = json.load(d)
df_historical = pd.json_normalize(data)
#Transform to datetime
df_historical["dt"] = pd.to_datetime(df_historical["dt"], unit='s', errors='coerce').dt.strftime("%m/%d/%Y %H:%M:%S")
df_historical["dt"] = pd.to_datetime(df_historical["dt"])
.
.
.
.
My question is now:
How do i have to change the os. commands because i need to reference to the s3 bucket and not my local directory?
My first attempt looks like this
DIRECTORY = 's3://weatherdata-templates/historische_wetterdaten/New/'
BUCKET = 'weatherdata-templates'
s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=BUCKET, Prefix=DIRECTORY)
def lambda_handler(event, context):
for page in pages:
for obj in page['Contents']:
filename = s3.fsdecode(obj)
d = open(r's3://102135091842-weatherdata-templates/historische_wetterdaten/New/{}'.format(filename))
data = json.load(d)
df_historical = pd.json_normalize(data)
.
.
.
Am i on the right track or completely wrong? Thx.