I have created the custom scrapy pipeline for wasabi s3 and it is working fine json file getting uploaded but I am getting this one error botocore.exceptions.ClientError: An error occurred (InvalidAccessKeyId) when calling the PutObject operation: The AWS Access Key Id you provided does not exist in our records.
even though I am providing valid access key I have tried uploading a random json file using terminal and boto3 that was also working without any error. Also created policies with WasabiFullAccess
and AmazonS3FullAccess
My pipeline:
from scrapy import signals
from scrapy.exporters import JsonItemExporter
import boto3
class JsonWriterPipeline(object):
def __init__(self):
self.items_list = []
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open("%s_items.json" % spider.name, "wb")
self.exporter = JsonItemExporter(self.file)
self.exporter.encoding = "utf-8"
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
s3 = boto3.resource(
"s3",
endpoint_url="https://s3.ap-northeast-1.wasabisys.com",
aws_access_key_id="ACCESS_KEY_ID",
aws_secret_access_key="SECRET_ACCESS_KEY",
)
boto_test_bucket = s3.Bucket(bucket_name)
boto_test_bucket.upload_file("%s_items.json" % spider.name, f"{spider.name}")
Here is the traceback error from scrapy logs:
2022-10-31 03:50:18 [scrapy.extensions.feedexport] ERROR: Error storing json feed (120 items) in: s3://brownfashions/brown_fashion_json/brown_fashion_json_2022-10-30T22-50-00.json
Traceback (most recent call last):
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/twisted/python/threadpool.py", line 244, in inContext
result = inContext.theWork() # type: ignore[attr-defined]
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/twisted/python/threadpool.py", line 260, in <lambda>
inContext.theWork = lambda: context.call( # type: ignore[attr-defined]
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/twisted/python/context.py", line 117, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/twisted/python/context.py", line 82, in callWithContext
return func(*args, **kw)
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/scrapy/extensions/feedexport.py", line 196, in _store_in_thread
self.s3_client.put_object(
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/botocore/client.py", line 507, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/home/zerox/BrownFashions/venv/lib/python3.9/site-packages/botocore/client.py", line 943, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (InvalidAccessKeyId) when calling the PutObject operation: The AWS Access Key Id you provided does not exist in our records.
Anyone here know or any idea why I am getting this error and how to overcome this? Thanks!