The point of this script is to find hyperlinks in emails and automatically open them. I'm currently stuck on the search part.
The script can't seem to pick up the link from the body of the email. The hyperlink should look like
https://something.com/verify/c4b7668ad547922226426896f
is something wrong with my regex?
def process_mailbox(M):
rv, data = M.search(None, specific_email_addy)
if rv != 'OK':
print "No messages found!"
return
for num in data[0].split():
rv, data = M.fetch(num, '(RFC822)')
if rv != 'OK':
print "ERROR getting message", num
return
msg = email.message_from_string(data[0][1])
raw_email = data[0][1] # here's the body, which is raw headers and html and body of the whole email including headers and alternate payloads
msg = email.message_from_string(raw_email)
for part in msg.walk():
# each part is a either non-multipart, or another multipart message
# that contains further parts... Message is organized like a tree
if part.get_content_type() == 'text/html':
plain_text = part.get_payload()
link_pattern = re.compile('<a[^>]+href=\'(.*?)\'[^>]*>(.*)?</a>')
search = link_pattern.search(plain_text)
if search is not None:
print("Link found! -> " + search)
break
else:
print("No links were found.")