I am trying to read files in my buckets that are a mix of csv/xslx and I am getting a 403 which I do not quite understand since I am setting AWS creds through the keychain and env vars. I am using a URL over https, when I switch the URL to s3:// it tells me the bucket doesn't exist which it definitely does. I have s3fs installed as well.
TLDR: Https throws 403s, s3:// throws bucket doesn't exist when it does.
Code:
def get_file(project_name, uid) -> list:
files = []
s3 = boto3.resource('s3', region_name='us-east-2')
bucket_str = 'stackstr-' + uid
url = 'https://' + bucket_str + '.s3.us-east-2.amazonaws.com/'
bucket = s3.Bucket(bucket_str)
for obj in bucket.objects.filter(Prefix=project_name + '/raw_datasets'):
link = url + obj.key
files.append(link)
print(files)
return files
def generate_dataframes(files) -> pd.DataFrame:
df_list = []
for fname in files:
ext = fname.split(".")[-1]
if ext == 'xlsx':
df = pd.read_excel(fname)
df_list.append(df)
if ext == 'csv':
df = pd.read_csv(fname)
df_list.append(df)
print(df_list)