I am trying to write a script that will scan through all emails from the past year. I can only get it to download the top 3. The tutorial I am following doesn't explain anywhere I can change the number of emails it downloads. Here's the code so far.
# user credentials
email_user = input('Email: ')
email_pass = input('Password: ')
# connect to imap
mail = imaplib.IMAP4_SSL("imap.gmail.com",993)
#login
mail.login(email_user, email_pass)
#select folder
mail.select("INBOX","SPAM")
#filter emails by header with receipt or invoice
type, data = mail.search(None, '(SUBJECT "Invoice")')
mail_ids = data[0]
id_list = mail_ids.split()
for num in data[0].split():
typ, data = mail.fetch(num, '(RFC822)' )
raw_email = data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
# this part comes from the snipped I don't understand yet...
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('/Users/benbuechler/Desktop/Keepr Receipt Storage', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 10)[1].split("\nTo:", 10)[1]
print('Downloaded "{file}" from email titled "{subject}"'.format(file=fileName, subject=subject))
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1].decode('utf-8'))
email_subject = msg['subject']
email_from = msg['from']
print ('From : ' + email_from + '\n')
print ('Subject : ' + email_subject + '\n')
print(msg.get_payload(decode=True))