R David Murray | 02384bf | 2014-02-07 10:55:17 -0500 | [diff] [blame] | 1 | import os |
| 2 | import sys |
| 3 | import tempfile |
| 4 | import mimetypes |
| 5 | import webbrowser |
| 6 | |
| 7 | # Import the email modules we'll need |
| 8 | from email import policy |
| 9 | from email.parser import BytesParser |
| 10 | |
| 11 | # An imaginary module that would make this work and be safe. |
| 12 | from imaginary import magic_html_parser |
| 13 | |
| 14 | # In a real program you'd get the filename from the arguments. |
| 15 | msg = BytesParser(policy=policy.default).parse(open('outgoing.msg', 'rb')) |
| 16 | |
| 17 | # Now the header items can be accessed as a dictionary, and any non-ASCII will |
| 18 | # be converted to unicode: |
| 19 | print('To:', msg['to']) |
| 20 | print('From:', msg['from']) |
| 21 | print('Subject:', msg['subject']) |
| 22 | |
| 23 | # If we want to print a priview of the message content, we can extract whatever |
| 24 | # the least formatted payload is and print the first three lines. Of course, |
| 25 | # if the message has no plain text part printing the first three lines of html |
| 26 | # is probably useless, but this is just a conceptual example. |
| 27 | simplest = msg.get_body(preferencelist=('plain', 'html')) |
| 28 | print() |
| 29 | print(''.join(simplest.get_content().splitlines(keepends=True)[:3])) |
| 30 | |
| 31 | ans = input("View full message?") |
| 32 | if ans.lower()[0] == 'n': |
| 33 | sys.exit() |
| 34 | |
| 35 | # We can extract the richest alternative in order to display it: |
| 36 | richest = msg.get_body() |
| 37 | partfiles = {} |
| 38 | if richest['content-type'].maintype == 'text': |
| 39 | if richest['content-type'].subtype == 'plain': |
| 40 | for line in richest.get_content().splitlines(): |
| 41 | print(line) |
| 42 | sys.exit() |
| 43 | elif richest['content-type'].subtype == 'html': |
| 44 | body = richest |
| 45 | else: |
| 46 | print("Don't know how to display {}".format(richest.get_content_type())) |
| 47 | sys.exit() |
| 48 | elif richest['content-type'].content_type == 'multipart/related': |
| 49 | body = richest.get_body(preferencelist=('html')) |
| 50 | for part in richest.iter_attachments(): |
| 51 | fn = part.get_filename() |
| 52 | if fn: |
| 53 | extension = os.path.splitext(part.get_filename())[1] |
| 54 | else: |
| 55 | extension = mimetypes.guess_extension(part.get_content_type()) |
| 56 | with tempfile.NamedTemporaryFile(suffix=extension, delete=False) as f: |
| 57 | f.write(part.get_content()) |
| 58 | # again strip the <> to go from email form of cid to html form. |
| 59 | partfiles[part['content-id'][1:-1]] = f.name |
| 60 | else: |
| 61 | print("Don't know how to display {}".format(richest.get_content_type())) |
| 62 | sys.exit() |
| 63 | with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: |
| 64 | # The magic_html_parser has to rewrite the href="cid:...." attributes to |
| 65 | # point to the filenames in partfiles. It also has to do a safety-sanitize |
| 66 | # of the html. It could be written using html.parser. |
| 67 | f.write(magic_html_parser(body.get_content(), partfiles)) |
| 68 | webbrowser.open(f.name) |
| 69 | os.remove(f.name) |
| 70 | for fn in partfiles.values(): |
| 71 | os.remove(fn) |
| 72 | |
| 73 | # Of course, there are lots of email messages that could break this simple |
| 74 | # minded program, but it will handle the most common ones. |