I have s3 bucket which stores pdf's. We have to conditionally apply watermark on the pdf. We opted for s3 object lambda access point to achieve this. If we are saving the file back to s3 it is working fine but when returning dynamically in write_get_object_response the file is getting corrupted. Here is the code.
import boto3
import json
import os
import logging
from io import BytesIO
from urllib import request
from urllib.parse import urlparse
import PyPDF4
logger = logging.getLogger('S3-img-processing')
logger.addHandler(logging.StreamHandler())
logger.setLevel(getattr(logging, os.getenv('LOG_LEVEL', 'INFO')))
def apply_watermark_to_pdf(inputStream, watermarkStream):
watermark = PyPDF4.PdfFileReader(watermarkStream)
watermark_page = watermark.getPage(0)
output_stream = BytesIO()
pdf = PyPDF4.PdfFileReader(inputStream)
# Create a new PDF writer
pdf_writer = PyPDF4.PdfFileWriter()
# Iterate through each page of the input PDF
for page_number in range(pdf.getNumPages()):
page = pdf.getPage(page_number)
# Merge the watermark page with the current page
page.mergePage(watermark_page)
# Add the modified page to the PDF writer
pdf_writer.addPage(page)
pdf_writer.write(output_stream)
output_stream.seek(0)
return output_stream
def handler(event, context) -> dict:
logger.debug(json.dumps(event))
object_context = event["getObjectContext"]
# Get the presigned URL to fetch the requested original object from S3
s3_url = object_context["inputS3Url"]
watermark_url = r'Presignedurl of watermark pdf'
# Extract the route and request token from the input context
request_route = object_context["outputRoute"]
request_token = object_context["outputToken"]
# Get the original S3 object using the presigned URL
inputReq = request.Request(s3_url)
watermarkReq = request.Request(watermark_url)
try:
inputResponse = request.urlopen(inputReq)
watermarkResponse = request.urlopen(watermarkReq)
except request.HTTPError as e:
logger.info(f'Error downloading the object. Error code: {e.code}')
logger.exception(e.read())
return {'status_code': e.code}
# Apply watermark to the PDF
transformed_object = apply_watermark_to_pdf(BytesIO(inputResponse.read()), BytesIO(watermarkResponse.read()))
# Write object back to S3 Object Lambda
s3 = boto3.client('s3')
# The WriteGetObjectResponse API sends the transformed data
if os.getenv('AWS_EXECUTION_ENV'):
s3.write_get_object_response(
Body=transformed_object,
RequestRoute=request_route,
RequestToken=request_token)
return {'status_code': 200}