i want to convert mbox file to msg format. For that i have done but i am not getting proper format. I am able to read mbox file but i am not getting how to create msg file with that. I have converted mbox file to eml file but same way i want to create msg file, but i am not getting how to do that.
Below is code for converting mbox to eml.
import os
import mailbox
from email import generator
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
count = 0
def emlGenerator(body, thisemail):
global count
msg = MIMEMultipart('alternative')
msg['Subject'] = thisemail['subject']
msg['From'] = thisemail['From']
msg['To'] = thisemail['To']
msg['Cc'] = thisemail['Cc']
msg['Bcc'] = thisemail['Bcc']
msg['Date'] = thisemail['Date']
name = str(count) + '.eml'
count += 1
part = MIMEText(body)
msg.attach(part)
outfile_name = os.path.join('xxxxx/test2', name)
with open(outfile_name, 'w') as outfile:
gen = generator.Generator(outfile)
gen.flatten(msg)
def getcharsets(msg):
charsets = set({})
for c in msg.get_charsets():
if c is not None:
charsets.update([c])
return charsets
def handleerror(errmsg, emailmsg, cs):
print()
print(errmsg)
print("This error occurred while decoding with ", cs, " charset.")
print("These charsets were found in the one email.", getcharsets(emailmsg))
print("This is the subject:", emailmsg['subject'])
print("This is the sender:", emailmsg['From'])
def getbodyfromemail(msg):
body = None
# Walk through the parts of the email to find the text body.
if msg.is_multipart():
for part in msg.walk():
# If part is multipart, walk through the subparts.
if part.is_multipart():
for subpart in part.walk():
if subpart.get_content_type() == 'text/plain':
# Get the subpart payload (i.e the message body)
body = subpart.get_payload(decode=True)
# charset = subpart.get_charset()
# Part isn't multipart so get the email body
elif part.get_content_type() == 'text/plain':
body = part.get_payload(decode=True)
# charset = part.get_charset()
# If this isn't a multi-part message then get the payload (i.e the message body)
elif msg.get_content_type() == 'text/plain':
body = msg.get_payload(decode=True)
# No checking done to match the charset with the correct part.
charsets = set({})
for c in msg.get_charsets():
if c is not None:
charsets.update([c])
for charset in charsets:
try:
body = body.decode(charset)
except:
print("Hit a UnicodeDecodeError or AttributeError. Moving right along.")
return body
if __name__ == "__main__":
for thisemail in mailbox.mbox('xxxxxx/topics.mbox'):
print (thisemail['Message-id'])
body = getbodyfromemail(thisemail)
emlGenerator(body, thisemail)
print("=========== DONE ============")
print("Total ", count, " File")