I have the sync script which is running & working well, but i see some download files takes time, thought of using async approach here.
import json
import os
import io
import time
import gzip
import re
import logging
from logging.handlers import RotatingFileHandler
import boto3
AWS_KEY = "**"
AWS_SECRET = "**"
QUEUE_URL = "***"
OUTPUT_PATH = "./test"
VISIBILITY_TIMEOUT = 10
REGION_NAME = "region"
sqs = boto3.resource('sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
s3 = boto3.client('s3', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
queue = sqs.Queue(url=QUEUE_URL)
def handle_response(msg, path):
"""Logic goes here"""
print('message: %s' % msg)
def download_message_files(msg):
for s3_file in msg['files']:
s3_path = s3_file['path']
with io.BytesIO() as f:
s3.download_fileobj(msg['bucket'], s3_path, f)
f.seek(0)
for line in gzip.GzipFile(fileobj=f):
await handle_response(line.decode('UTF-8'), s3_path)
def consume():
while True:
for msg in queue.receive_messages(VisibilityTimeout=VISIBILITY_TIMEOUT):
body = json.loads(msg.body) # grab the actual message body
download_message_files(body)
msg.delete()
time.sleep(sleep_time)
if __name__ == '__main__':
# Setup our root logger
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
# Create our FDR logger
logger = logging.getLogger("Consumer")
# Rotate log file handler
RFH = RotatingFileHandler("test.log", maxBytes=20971520, backupCount=5)
# Log file output format
F_FORMAT = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
# Set the log file output level to INFO
RFH.setLevel(logging.INFO)
# Add our log file formatter to the log file handler
RFH.setFormatter(F_FORMAT)
# Add our log file handler to our logger
logger.addHandler(RFH)
consume()
I have tried converting this using aioboto3 and got struck in queue approach.
session = aioboto3.Session()
sqs = session.resource('sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
s3 = session.client('s3', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
queue = sqs.Queue(url=QUEUE_URL) <---- this gives error as 'ResourceCreatorContext' object has no attribute 'Queue'
As i could understand from this there is no attribute, but could anyone guide me to make this working with async nature.