blob: 5626ab91eb57da6b0fbe54885dd71e6b17b12ec7 [file] [log] [blame]
Georg Brandl8cdc9bc2010-01-01 13:07:05 +00001# Copyright (C) 2001-2010 Python Software Foundation
Barry Warsawbb113862004-10-03 03:16:19 +00002# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00003
Barry Warsawbb113862004-10-03 03:16:19 +00004"""Classes to generate plain text from a message object tree."""
Barry Warsawba925802001-09-23 03:17:28 +00005
Barry Warsaw40ef0062006-03-18 15:41:53 +00006__all__ = ['Generator', 'DecodedGenerator']
7
Barry Warsawba925802001-09-23 03:17:28 +00008import re
Barry Warsawdb6888b2003-05-29 19:39:33 +00009import sys
Barry Warsaw5d384ef2003-03-06 05:22:02 +000010import time
Barry Warsawba925802001-09-23 03:17:28 +000011import random
Barry Warsawbb113862004-10-03 03:16:19 +000012import warnings
Barry Warsawba925802001-09-23 03:17:28 +000013
Barry Warsaw40ef0062006-03-18 15:41:53 +000014from cStringIO import StringIO
15from email.header import Header
Barry Warsaw062749a2002-06-28 23:41:42 +000016
Barry Warsawba925802001-09-23 03:17:28 +000017UNDERSCORE = '_'
18NL = '\n'
Barry Warsawba925802001-09-23 03:17:28 +000019
20fcre = re.compile(r'^From ', re.MULTILINE)
21
Barry Warsaw6c2bc462002-10-14 15:09:30 +000022def _is8bitstring(s):
Barry Warsaw36112f22004-05-09 03:35:17 +000023 if isinstance(s, str):
Barry Warsaw6c2bc462002-10-14 15:09:30 +000024 try:
25 unicode(s, 'us-ascii')
26 except UnicodeError:
27 return True
28 return False
29
Barry Warsawba925802001-09-23 03:17:28 +000030
Barry Warsawe968ead2001-10-04 17:05:11 +000031
Barry Warsawba925802001-09-23 03:17:28 +000032class Generator:
33 """Generates output from a Message object tree.
34
35 This basic generator writes the message to the given file object as plain
36 text.
37 """
38 #
39 # Public interface
40 #
41
Barry Warsaw56835dd2002-09-28 18:04:55 +000042 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
Barry Warsawba925802001-09-23 03:17:28 +000043 """Create the generator for message flattening.
44
45 outfp is the output file-like object for writing the message to. It
46 must have a write() method.
47
Barry Warsaw56835dd2002-09-28 18:04:55 +000048 Optional mangle_from_ is a flag that, when True (the default), escapes
49 From_ lines in the body of the message by putting a `>' in front of
50 them.
Barry Warsawba925802001-09-23 03:17:28 +000051
52 Optional maxheaderlen specifies the longest length for a non-continued
53 header. When a header line is longer (in characters, with tabs
Barry Warsawb03136a2003-11-19 02:23:01 +000054 expanded to 8 spaces) than maxheaderlen, the header will split as
55 defined in the Header class. Set maxheaderlen to zero to disable
56 header wrapping. The default is 78, as recommended (but not required)
57 by RFC 2822.
Barry Warsawba925802001-09-23 03:17:28 +000058 """
59 self._fp = outfp
60 self._mangle_from_ = mangle_from_
Barry Warsaw36112f22004-05-09 03:35:17 +000061 self._maxheaderlen = maxheaderlen
Barry Warsawba925802001-09-23 03:17:28 +000062
63 def write(self, s):
64 # Just delegate to the file object
65 self._fp.write(s)
66
Barry Warsaw56835dd2002-09-28 18:04:55 +000067 def flatten(self, msg, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +000068 """Print the message object tree rooted at msg to the output file
69 specified when the Generator instance was created.
70
71 unixfrom is a flag that forces the printing of a Unix From_ delimiter
72 before the first object in the message tree. If the original message
73 has no From_ delimiter, a `standard' one is crafted. By default, this
Barry Warsaw56835dd2002-09-28 18:04:55 +000074 is False to inhibit the printing of any From_ delimiter.
Barry Warsawba925802001-09-23 03:17:28 +000075
76 Note that for subobjects, no From_ line is printed.
77 """
78 if unixfrom:
79 ufrom = msg.get_unixfrom()
80 if not ufrom:
81 ufrom = 'From nobody ' + time.ctime(time.time())
82 print >> self._fp, ufrom
83 self._write(msg)
84
Barry Warsaw93c40f02002-07-09 02:43:47 +000085 def clone(self, fp):
86 """Clone this generator with the exact same options."""
Barry Warsaw36112f22004-05-09 03:35:17 +000087 return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
Barry Warsaw93c40f02002-07-09 02:43:47 +000088
Barry Warsawba925802001-09-23 03:17:28 +000089 #
90 # Protected interface - undocumented ;/
91 #
92
93 def _write(self, msg):
94 # We can't write the headers yet because of the following scenario:
95 # say a multipart message includes the boundary string somewhere in
96 # its body. We'd have to calculate the new boundary /before/ we write
97 # the headers so that we can write the correct Content-Type:
98 # parameter.
99 #
100 # The way we do this, so as to make the _handle_*() methods simpler,
101 # is to cache any subpart writes into a StringIO. The we write the
102 # headers and the StringIO contents. That way, subpart handlers can
103 # Do The Right Thing, and can still modify the Content-Type: header if
104 # necessary.
105 oldfp = self._fp
106 try:
107 self._fp = sfp = StringIO()
108 self._dispatch(msg)
109 finally:
110 self._fp = oldfp
111 # Write the headers. First we see if the message object wants to
112 # handle that itself. If not, we'll do it generically.
113 meth = getattr(msg, '_write_headers', None)
114 if meth is None:
115 self._write_headers(msg)
116 else:
117 meth(self)
118 self._fp.write(sfp.getvalue())
119
120 def _dispatch(self, msg):
121 # Get the Content-Type: for the message, then try to dispatch to
Barry Warsawf488b2c2002-07-11 18:48:40 +0000122 # self._handle_<maintype>_<subtype>(). If there's no handler for the
123 # full MIME type, then dispatch to self._handle_<maintype>(). If
124 # that's missing too, then dispatch to self._writeBody().
Barry Warsawdfea3b32002-08-20 14:47:30 +0000125 main = msg.get_content_maintype()
126 sub = msg.get_content_subtype()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000127 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
128 meth = getattr(self, '_handle_' + specific, None)
129 if meth is None:
130 generic = main.replace('-', '_')
131 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000132 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000133 meth = self._writeBody
134 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000135
136 #
137 # Default handlers
138 #
139
140 def _write_headers(self, msg):
141 for h, v in msg.items():
Barry Warsawce6bf592003-03-07 15:43:17 +0000142 print >> self._fp, '%s:' % h,
Barry Warsaw36112f22004-05-09 03:35:17 +0000143 if self._maxheaderlen == 0:
Barry Warsawce6bf592003-03-07 15:43:17 +0000144 # Explicit no-wrapping
145 print >> self._fp, v
146 elif isinstance(v, Header):
147 # Header instances know what to do
148 print >> self._fp, v.encode()
149 elif _is8bitstring(v):
150 # If we have raw 8bit data in a byte string, we have no idea
151 # what the encoding is. There is no safe way to split this
152 # string. If it's ascii-subset, then we could do a normal
153 # ascii split, but if it's multibyte then we could break the
154 # string. There's no way to know so the least harm seems to
155 # be to not split the string and risk it being too long.
156 print >> self._fp, v
157 else:
Barry Warsawdbf95a32009-03-30 22:42:17 +0000158 # Header's got lots of smarts, so use it. Note that this is
159 # fundamentally broken though because we lose idempotency when
160 # the header string is continued with tabs. It will now be
161 # continued with spaces. This was reversedly broken before we
162 # fixed bug 1974. Either way, we lose.
Barry Warsawce6bf592003-03-07 15:43:17 +0000163 print >> self._fp, Header(
Barry Warsawdbf95a32009-03-30 22:42:17 +0000164 v, maxlinelen=self._maxheaderlen, header_name=h).encode()
Barry Warsawba925802001-09-23 03:17:28 +0000165 # A blank line always separates headers from body
166 print >> self._fp
167
Barry Warsawba925802001-09-23 03:17:28 +0000168 #
169 # Handlers for writing types and subtypes
170 #
171
172 def _handle_text(self, msg):
173 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000174 if payload is None:
175 return
Barry Warsaw36112f22004-05-09 03:35:17 +0000176 if not isinstance(payload, basestring):
Barry Warsawbb113862004-10-03 03:16:19 +0000177 raise TypeError('string payload expected: %s' % type(payload))
Barry Warsawba925802001-09-23 03:17:28 +0000178 if self._mangle_from_:
179 payload = fcre.sub('>From ', payload)
180 self._fp.write(payload)
181
182 # Default body handler
183 _writeBody = _handle_text
184
Barry Warsaw93c40f02002-07-09 02:43:47 +0000185 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000186 # The trick here is to write out each part separately, merge them all
187 # together, and then make sure that the boundary we've chosen isn't
188 # present in the payload.
189 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000190 subparts = msg.get_payload()
191 if subparts is None:
Barry Warsaw36112f22004-05-09 03:35:17 +0000192 subparts = []
193 elif isinstance(subparts, basestring):
Barry Warsawb1c1de32002-09-10 16:13:45 +0000194 # e.g. a non-strict parse of a message with no starting boundary.
195 self._fp.write(subparts)
196 return
Barry Warsaw36112f22004-05-09 03:35:17 +0000197 elif not isinstance(subparts, list):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000198 # Scalar payload
199 subparts = [subparts]
200 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000201 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000202 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000203 g.flatten(part, unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000204 msgtexts.append(s.getvalue())
Barry Warsawba925802001-09-23 03:17:28 +0000205 # BAW: What about boundaries that are wrapped in double-quotes?
R. David Murray910c52f2010-12-12 20:32:19 +0000206 boundary = msg.get_boundary()
207 if not boundary:
208 # Create a boundary that doesn't appear in any of the
209 # message texts.
210 alltext = NL.join(msgtexts)
R. David Murraya993b2d2010-12-21 18:12:50 +0000211 boundary = _make_boundary(alltext)
212 msg.set_boundary(boundary)
Barry Warsaw36112f22004-05-09 03:35:17 +0000213 # If there's a preamble, write it out, with a trailing CRLF
Barry Warsawba925802001-09-23 03:17:28 +0000214 if msg.preamble is not None:
R David Murray0f111c12012-07-22 21:55:12 -0400215 if self._mangle_from_:
216 preamble = fcre.sub('>From ', msg.preamble)
217 else:
218 preamble = msg.preamble
219 print >> self._fp, preamble
Barry Warsaw36112f22004-05-09 03:35:17 +0000220 # dash-boundary transport-padding CRLF
Barry Warsawba925802001-09-23 03:17:28 +0000221 print >> self._fp, '--' + boundary
Barry Warsaw36112f22004-05-09 03:35:17 +0000222 # body-part
223 if msgtexts:
224 self._fp.write(msgtexts.pop(0))
225 # *encapsulation
226 # --> delimiter transport-padding
227 # --> CRLF body-part
228 for body_part in msgtexts:
229 # delimiter transport-padding CRLF
230 print >> self._fp, '\n--' + boundary
231 # body-part
232 self._fp.write(body_part)
233 # close-delimiter transport-padding
234 self._fp.write('\n--' + boundary + '--')
Barry Warsawba925802001-09-23 03:17:28 +0000235 if msg.epilogue is not None:
Barry Warsaw36112f22004-05-09 03:35:17 +0000236 print >> self._fp
R David Murray0f111c12012-07-22 21:55:12 -0400237 if self._mangle_from_:
238 epilogue = fcre.sub('>From ', msg.epilogue)
239 else:
240 epilogue = msg.epilogue
241 self._fp.write(epilogue)
Barry Warsawba925802001-09-23 03:17:28 +0000242
R. David Murrayed44dfa2010-01-16 05:15:17 +0000243 def _handle_multipart_signed(self, msg):
244 # The contents of signed parts has to stay unmodified in order to keep
245 # the signature intact per RFC1847 2.1, so we disable header wrapping.
246 # RDM: This isn't enough to completely preserve the part, but it helps.
247 old_maxheaderlen = self._maxheaderlen
248 try:
249 self._maxheaderlen = 0
250 self._handle_multipart(msg)
251 finally:
252 self._maxheaderlen = old_maxheaderlen
253
Barry Warsawb384e012001-09-26 05:32:41 +0000254 def _handle_message_delivery_status(self, msg):
255 # We can't just write the headers directly to self's file object
256 # because this will leave an extra newline between the last header
257 # block and the boundary. Sigh.
258 blocks = []
259 for part in msg.get_payload():
260 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000261 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000262 g.flatten(part, unixfrom=False)
Barry Warsawb384e012001-09-26 05:32:41 +0000263 text = s.getvalue()
264 lines = text.split('\n')
265 # Strip off the unnecessary trailing empty line
266 if lines and lines[-1] == '':
267 blocks.append(NL.join(lines[:-1]))
268 else:
269 blocks.append(text)
270 # Now join all the blocks with an empty line. This has the lovely
271 # effect of separating each block with an empty line, but not adding
272 # an extra one after the last one.
273 self._fp.write(NL.join(blocks))
274
275 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000276 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000277 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000278 # The payload of a message/rfc822 part should be a multipart sequence
279 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000280 # object for the subpart. Extract that object, stringify it, and
281 # write it out.
R. David Murray51f12042010-02-21 04:23:00 +0000282 # Except, it turns out, when it's a string instead, which happens when
283 # and only when HeaderParser is used on a message of mime type
284 # message/rfc822. Such messages are generated by, for example,
285 # Groupwise when forwarding unadorned messages. (Issue 7970.) So
286 # in that case we just emit the string body.
287 payload = msg.get_payload()
288 if isinstance(payload, list):
289 g.flatten(msg.get_payload(0), unixfrom=False)
290 payload = s.getvalue()
291 self._fp.write(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000292
293
Barry Warsawe968ead2001-10-04 17:05:11 +0000294
Barry Warsawbb113862004-10-03 03:16:19 +0000295_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
296
Barry Warsawba925802001-09-23 03:17:28 +0000297class DecodedGenerator(Generator):
R. David Murray82e7aae2010-12-06 18:48:31 +0000298 """Generates a text representation of a message.
Barry Warsawba925802001-09-23 03:17:28 +0000299
300 Like the Generator base class, except that non-text parts are substituted
301 with a format string representing the part.
302 """
Barry Warsaw56835dd2002-09-28 18:04:55 +0000303 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
Barry Warsawba925802001-09-23 03:17:28 +0000304 """Like Generator.__init__() except that an additional optional
305 argument is allowed.
306
307 Walks through all subparts of a message. If the subpart is of main
308 type `text', then it prints the decoded payload of the subpart.
309
310 Otherwise, fmt is a format string that is used instead of the message
311 payload. fmt is expanded with the following keywords (in
312 %(keyword)s format):
313
314 type : Full MIME type of the non-text part
315 maintype : Main MIME type of the non-text part
316 subtype : Sub-MIME type of the non-text part
317 filename : Filename of the non-text part
318 description: Description associated with the non-text part
319 encoding : Content transfer encoding of the non-text part
320
321 The default value for fmt is None, meaning
322
323 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
324 """
325 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
326 if fmt is None:
Barry Warsawbb113862004-10-03 03:16:19 +0000327 self._fmt = _FMT
328 else:
329 self._fmt = fmt
Barry Warsawba925802001-09-23 03:17:28 +0000330
331 def _dispatch(self, msg):
332 for part in msg.walk():
Barry Warsawbb113862004-10-03 03:16:19 +0000333 maintype = part.get_content_maintype()
Barry Warsawb384e012001-09-26 05:32:41 +0000334 if maintype == 'text':
Barry Warsaw56835dd2002-09-28 18:04:55 +0000335 print >> self, part.get_payload(decode=True)
Barry Warsawb384e012001-09-26 05:32:41 +0000336 elif maintype == 'multipart':
337 # Just skip this
338 pass
Barry Warsawba925802001-09-23 03:17:28 +0000339 else:
340 print >> self, self._fmt % {
Barry Warsawbb113862004-10-03 03:16:19 +0000341 'type' : part.get_content_type(),
342 'maintype' : part.get_content_maintype(),
343 'subtype' : part.get_content_subtype(),
Barry Warsawba925802001-09-23 03:17:28 +0000344 'filename' : part.get_filename('[no filename]'),
345 'description': part.get('Content-Description',
346 '[no description]'),
347 'encoding' : part.get('Content-Transfer-Encoding',
348 '[no encoding]'),
349 }
350
351
Barry Warsawe968ead2001-10-04 17:05:11 +0000352
Barry Warsawba925802001-09-23 03:17:28 +0000353# Helper
Barry Warsawdb6888b2003-05-29 19:39:33 +0000354_width = len(repr(sys.maxint-1))
355_fmt = '%%0%dd' % _width
356
Barry Warsaw409a4c02002-04-10 21:01:31 +0000357def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000358 # Craft a random boundary. If text is given, ensure that the chosen
359 # boundary doesn't appear in the text.
Barry Warsaw663219a2003-06-24 20:19:34 +0000360 token = random.randrange(sys.maxint)
Barry Warsawdb6888b2003-05-29 19:39:33 +0000361 boundary = ('=' * 15) + (_fmt % token) + '=='
Barry Warsawba925802001-09-23 03:17:28 +0000362 if text is None:
363 return boundary
364 b = boundary
365 counter = 0
Barry Warsaw56835dd2002-09-28 18:04:55 +0000366 while True:
Barry Warsawba925802001-09-23 03:17:28 +0000367 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
368 if not cre.search(text):
369 break
370 b = boundary + '.' + str(counter)
371 counter += 1
372 return b