this is the script the does what Theo recommended.
import json
import logging
import awswrangler as wr
import boto3
from botocore.exceptions import ClientError
logging.basicConfig(level=logging.INFO, format=logging.BASIC_FORMAT)
logger = logging.getLogger()
def delete_partitions(database_name: str, table_name: str):
client = boto3.client('glue')
paginator = client.get_paginator('get_partitions')
page_count = 0
partition_count = 0
for page in paginator.paginate(DatabaseName=database_name, TableName=table_name, MaxResults=20):
page_count = page_count + 1
partitions = page['Partitions']
partitions_to_delete = []
for partition in partitions:
partition_count = partition_count + 1
partitions_to_delete.append({'Values': partition['Values']})
logger.info(f"Found partition {partition['Values']}")
if partitions_to_delete:
response = client.batch_delete_partition(DatabaseName=database_name, TableName=table_name,
PartitionsToDelete=partitions_to_delete)
logger.info(f'Deleted partitions with response: {response}')
else:
logger.info('Done with all partitions')
def repair_table(database_name: str, table_name: str):
client = boto3.client('athena')
try:
response = client.start_query_execution(QueryString='MSCK REPAIR TABLE ' + table_name + ';',
QueryExecutionContext={'Database': database_name}, )
except ClientError as err:
logger.info(err.response['Error']['Message'])
else:
res = wr.athena.wait_query(query_execution_id=response['QueryExecutionId'])
logger.info(f"Query succeeded: {json.dumps(res, indent=2)}")
if __name__ == '__main__':
table = 'table_name'
database = 'database_name'
delete_partitions(database_name=database, table_name=table)
repair_table(database_name=database, table_name=table)