I'm trying to figure out how to get only the text portion of an email message. Using the following code I'm able to get the body, but it is always followed by the html of the email, which I don't need. How can I tell my script to ignore the html?
import imaplib
import email
def extract_body(payload):
if isinstance(payload,str):
return payload
else:
return '\n'.join([extract_body(part.get_payload()) for part in payload])
conn = imaplib.IMAP4_SSL("imap.gmail.com", 993)
conn.login("username", "password")
conn.select()
typ, data = conn.search(None, 'UNSEEN')
try:
for num in data[0].split():
typ, msg_data = conn.fetch(num, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
subject=msg['subject']
print(subject)
payload=msg.get_payload()
body=extract_body(payload)
print(body)
typ, response = conn.store(num, '+FLAGS', r'(\Seen)')
finally:
try:
conn.close()
except:
pass
conn.logout()