I have this code that creates a file "called" tar for all the files in a directory but it is really just a text file and not a tar file, What have I done wrong?
def get_matching_s3_keys(bucket, prefix='', suffix=''):
"""
Generate the keys in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch keys that start with this prefix (optional).
:param suffix: Only fetch keys that end with this suffix (optional).
"""
s3 = boto3.client('s3')
kwargs = {'Bucket': bucket}
# If the prefix is a single string (not a tuple of strings), we can
# do the filtering directly in the S3 API.
if isinstance(prefix, str):
kwargs['Prefix'] = prefix
while True:
# The S3 API response is a large blob of metadata.
# 'Contents' contains information about the listed objects.
resp = s3.list_objects_v2(**kwargs)
for obj in resp['Contents']:
key = obj['Key']
if key.startswith(prefix) and key.endswith(suffix):
yield key
# The S3 API is paginated, returning up to 1000 keys at a time.
# Pass the continuation token into the next response, until we
# reach the final page (when this field is missing).
try:
kwargs['ContinuationToken'] = resp['NextContinuationToken']
except KeyError:
break
def lambda_handler(event, context):
messages = ''
newDate = 20201020
agtBucket = 'agt-logs'
key = 'hourly_logs/'
to_key = 'daily_logs/'
my_bucket = s3object.Bucket(agtBucket)
files_list = []
tar = tarfile.open('/tmp/' + newDate + '.tar', 'w')
source_dir="/tmp/"
for fname in get_matching_s3_keys(bucket=agtBucket, prefix=key, suffix='.log'):
print(fname)
file_obj = s3object.Object(agtBucket, fname)
#file_content = file_obj.get()['Body'].read()
#tar.add(file_content)
s3object.Bucket(agtBucket).download_file(fname, '/tmp/'+fname[12])
tar.add(source_dir, arcname=os.path.basename(source_dir))
s3object.Object(agtBucket,fname).delete()
tar.close()
s3object.meta.client.upload_file(source_dir + newDate + '.tar', agtBucket, to_key + 's3Download_' + newDate + '.tar')
Here is am example of what is in a log file
11:00:00.55 - Create the list of files to be downloaded
11:00:02.48 - Read through and process each line
11:00:02.48 - Downloading 8492893947 bytes for rd_dnsdb_dns.20201021.1300.H.mtbl
11:11:52.91 - Moving rd_dnsdb_dns.20201021.1300.H.mtbl to copied
11:12:29.71 - Finished downloading and moving files
-
and here is what the "tar" file looks like
././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1603111585.722301
/ 0000700 0001742 0001737 00000000000 00000000000 011374 5 ustar 00sbx_user1051 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1603111585.722301
s 0000664 0001742 0001737 00000000471 00000000000 011520 0 ustar 00sbx_user1051 0000000 0000000
10:00:00.68 - Create the list of files to be downloaded
10:00:02.57 - Read through and process each line
10:00:02.57 - Downloading 5917288822 bytes for rd_dnsdb_dns.20201004.0800.H.mtbl
10:08:11.29 - Moving rd_dnsdb_dns.20201004.0800.H.mtbl to copied
10:08:36.38 - Finished downloading and moving files
-
././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1603111585.910301
/ 0000700 0001742 0001737 00000000000 00000000000 011374 5 ustar 00sbx_user1051 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1603111585.890301
s 0000664 0001742 0001737 00000000471 00000000000 011520 0 ustar 00sbx_user1051 0000000 0000000
11:00:00.61 - Create the list of files to be downloaded
11:00:02.35 - Read through and process each line
11:00:02.35 - Downloading 5691935935 bytes for rd_dnsdb_dns.20201004.0900.H.mtbl
11:07:45.06 - Moving rd_dnsdb_dns.20201004.0900.H.mtbl to copied
11:08:26.80 - Finished downloading and moving files
-
././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1603111586.050301