Specific to invoices, it is unfortunate but still to this day there is no native way to download them other than manually downloading them or being a lucky one to get and have to deal with all of them via email https://aws.amazon.com/premiumsupport/knowledge-center/download-pdf-invoice/
There is https://github.com/iann0036/aws-bill-export (it does not use a native API but instead scrapes the webpage and is setup via lambda and nodejs) and also Puppeteer among other dependencies.
I just finished writing some Python + Selenium that is far more "monstrous" but gets the job done (for today's UI/Jan.2023 at least)...
I thought I'd share both of those since you mentioned them in the OP and no other solutions have come up.
import os
import sys
import time
import argparse
from os.path import expanduser
from datetime import datetime
from dateutil.relativedelta import relativedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
home = expanduser("~")
# Variables grabbed from CLI arguments
parser = argparse.ArgumentParser(
description='AWS Console Login, programming the unprogrammatically-accessible (via CLI/API, using selenium instead).')
parser.add_argument(
'-i', '--interactive',
help="Use False for Headless mode",
default=False,
required=False
)
args = parser.parse_args()
# ChromeDriver options
options = webdriver.ChromeOptions()
if args.interactive == False:
options.add_argument('--headless')
download_directory = "./aws_invoice_downloads"
if not os.path.exists(download_directory):
os.makedirs(download_directory)
else:
download_directory = home + "/Downloads"
options.add_argument("--window-size=1920x1080")
options.add_argument("--remote-debugging-port=9222")
options.add_argument('--no-sandbox')
options.add_argument("--disable-gpu")
options.add_argument('--disable-dev-shm-usage')
options.add_experimental_option("prefs", {
"download.default_directory": download_directory,
"download.prompt_for_download": False
})
# Initiate ChromeDriver
driver = webdriver.Chrome(executable_path='chromedriver', options=options)
# create action chain object
action = ActionChains(driver)
# Set the default selenium timeout
delay = 30 # seconds
# Abort function
def abort_function():
print ("Aborting!")
driver.close()
sys.exit(1)
# Wait for download function
def download_wait(path_to_downloads):
seconds = 0
dl_wait = True
while dl_wait and seconds < 30:
time.sleep(1)
dl_wait = False
for fname in os.listdir(path_to_downloads):
if fname.endswith('.crdownload'):
dl_wait = True
seconds += 1
return seconds
def download_invoices(Id, Network):
print("Switching to the " + Network + "/" + Id + " org account...")
# remove_existing_conflicts(Network)
driver.get("https://signin.aws.amazon.com/switchrole?account=" + Id + "&roleName=YOUR_ROLE_NAME&displayName=" + Network + "%20Org%20Master")
time.sleep(1)
elem = WebDriverWait(driver, delay).until(
EC.presence_of_element_located((By.XPATH, '//*[@type="submit"]'))
)
elem.click()
time.sleep(3)
print("Downloading invoices...")
# Notes
# Can provide YYYY and MM in the URL to get a specific YYYY/MM billing period
# https://us-east-1.console.aws.amazon.com/billing/home?region=us-east-1#/bills?year=2023&month=1
# Get today's YYYY
today = datetime.now()
last_month = today - relativedelta(months=1)
year = last_month.strftime("%Y")
month = last_month.strftime("%m")
driver.get("https://us-east-1.console.aws.amazon.com/billing/home?region=us-east-1#/bills?year=" + year + "&month=" + month)
WebDriverWait(driver, 13).until(
EC.presence_of_element_located((By.XPATH, '//*[@data-testid="main-spinner"]'))
)
time.sleep(2)
elem = WebDriverWait(driver, 13).until(
EC.presence_of_all_elements_located((By.XPATH, '(//*[text()[contains(., " Charges")]])[position() < last() - 1]'))
)
# Count the number of items in the list
elem_count = len(elem)
print("Found " + str(elem_count) + " items in the list...")
# Loop through the list and expand each item
for i in range(1, elem_count + 1):
print("Expanding item " + str(i) + " of " + str(elem_count) + "...")
# (//*[text()[contains(., " Charges")]])[position() < last() - 1][i]
elem = WebDriverWait(driver, 13).until(
EC.presence_of_element_located((By.XPATH, '(//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']'))
)
desired_y = (elem.size['height'] / 2) + elem.location['y']
current_y = (driver.execute_script('return window.innerHeight') / 2) + driver.execute_script('return window.pageYOffset')
scroll_y_by = desired_y - current_y
driver.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by)
time.sleep(2) # Fixes content shift and ElementClickInterceptedException by waiting, checking the elem, and scrolling again
elem = WebDriverWait(driver, delay).until(
EC.visibility_of_element_located((By.XPATH, '(//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']')))
driver.execute_script("arguments[0].scrollIntoView(true); window.scrollBy(0, -100);", elem)
action.move_to_element(elem).move_by_offset(0,0).click().perform()
# Count the number of invoices with that item
# (//*[text()[contains(., " Charges")]])[position() < last() - 1][2]/following-sibling::div//*[@title="Download Invoice"]
elem = WebDriverWait(driver, 13).until(
EC.presence_of_all_elements_located((By.XPATH, '(//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']/following-sibling::div//*[@title="Download Invoice"]'))
)
# Count the number of items in the list
invoice_count = len(elem)
# Loop through the list and download each invoice
for j in range(1, invoice_count + 1):
print("Downloading invoice " + str(j) + " of " + str(invoice_count) + "...")
# (//*[text()[contains(., " Charges")]])[position() < last() - 1][2]/following-sibling::div//*[@title="Download Invoice"][1]
elem = WebDriverWait(driver, 13).until(
EC.presence_of_element_located((By.XPATH, '((//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']/following-sibling::div//*[@title="Download Invoice"])[' + str(j) + ']'))
)
desired_y = (elem.size['height'] / 2) + elem.location['y']
current_y = (driver.execute_script('return window.innerHeight') / 2) + driver.execute_script('return window.pageYOffset')
scroll_y_by = desired_y - current_y
driver.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by)
time.sleep(2) # Fixes content shift and ElementClickInterceptedException by waiting, checking the elem, and scrolling again
elem = WebDriverWait(driver, delay).until(
EC.visibility_of_element_located((By.XPATH, '((//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']/following-sibling::div//*[@title="Download Invoice"])[' + str(j) + ']')))
driver.execute_script("arguments[0].scrollIntoView(true); window.scrollBy(0, -100);", elem)
action.move_to_element(elem).move_by_offset(0,0).click().perform()
download_wait(download_directory)
time.sleep(3)
# Find the parent again
elem = WebDriverWait(driver, 13).until(
EC.presence_of_element_located((By.XPATH, '(//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']'))
)
# Collapse the parent
desired_y = (elem.size['height'] / 2) + elem.location['y']
current_y = (driver.execute_script('return window.innerHeight') / 2) + driver.execute_script('return window.pageYOffset')
scroll_y_by = desired_y - current_y
driver.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by)
time.sleep(2) # Fixes content shift and ElementClickInterceptedException by waiting, checking the elem, and scrolling again
elem = WebDriverWait(driver, delay).until(
EC.visibility_of_element_located((By.XPATH, '(//*[text()[contains(., " Charges")]])[position() < last() - 1][' + str(i) + ']')))
action.move_to_element(elem).move_by_offset(0,0).click().perform()