I'm posting since I ran into a similar issue and had more than 100 databases and 100 tables in a database (so needed to use tokens). It's hacked together and a bit more verbose than needed, but hopefully easy to modify if you need to.
import boto3
import pdb
import logging
logging.basicConfig(filename='databases_tables.log', level=logging.INFO)
def get_glue_databases() -> list:
"""
Returns a list of databases, with each database as a dict, like:
{
'Name': 'analytics_raw_development',
'CreateTime': datetime.datetime(2022, 4, 22, 13, 19, 49, tzinfo=tzlocal()),
'CreateTableDefaultPermissions': [{'Principal': {'DataLakePrincipalIdentifier': 'IAM_ALLOWED_PRINCIPALS'}, 'Permissions': ['ALL']}],
'CatalogId': '999999999'
}
"""
glue_client = boto3.client('glue')
next_token = ""
databases = []
while True:
# Get first batch of databases (max 100)
response_databases = glue_client.get_databases(NextToken=next_token)
database_list = response_databases['DatabaseList']
for db in database_list:
databases.append(db)
# due to pagination, if there's more databases, will have a 'NextToken'
next_token = response_databases.get('NextToken', None)
# no more databases, can exit
if next_token is None:
break
return databases
def get_glue_tables(databases) -> list:
glue_client = boto3.client('glue')
tables = []
for database in databases:
logging.info(f"Database: {database}")
next_token = ""
while True:
# Get first batch of tables (max 100)
response_tables = glue_client.get_tables(DatabaseName=database['Name'], CatalogId=database['CatalogId'], NextToken=next_token)
tables_list = response_tables['TableList']
for table in tables_list:
logging.info(f"Adding table: {table}")
tables.append(table)
# due to pagination, if there's more tables, will have a 'NextToken'
next_token = response_tables.get('NextToken', None)
# no more tables, can exit
if next_token is None:
break
return tables
if __name__ == '__main__':
logging.info("Getting Databases")
databases = get_glue_databases() # list
logging.info(f'Databases: {databases}')
logging.info("Getting All Tables")
tables = get_glue_tables(databases)
logging.info(f"Tables: {tables}")
print("Finished")