blob: cc30affad9449f8c5c81e0ad9293375de694a7c6 [file] [log] [blame]
Benjamin Petersonffeda292010-01-09 18:48:46 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Classes to generate plain text from a message object tree."""
6
7__all__ = ['Generator', 'DecodedGenerator']
8
9import re
10import sys
11import time
12import random
13import warnings
14
15from io import StringIO
16from email.header import Header
17
18UNDERSCORE = '_'
19NL = '\n'
20
21fcre = re.compile(r'^From ', re.MULTILINE)
22
23
24
25class Generator:
26 """Generates output from a Message object tree.
27
28 This basic generator writes the message to the given file object as plain
29 text.
30 """
31 #
32 # Public interface
33 #
34
35 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
36 """Create the generator for message flattening.
37
38 outfp is the output file-like object for writing the message to. It
39 must have a write() method.
40
41 Optional mangle_from_ is a flag that, when True (the default), escapes
42 From_ lines in the body of the message by putting a `>' in front of
43 them.
44
45 Optional maxheaderlen specifies the longest length for a non-continued
46 header. When a header line is longer (in characters, with tabs
47 expanded to 8 spaces) than maxheaderlen, the header will split as
48 defined in the Header class. Set maxheaderlen to zero to disable
49 header wrapping. The default is 78, as recommended (but not required)
50 by RFC 2822.
51 """
52 self._fp = outfp
53 self._mangle_from_ = mangle_from_
54 self._maxheaderlen = maxheaderlen
55
56 def write(self, s):
57 # Just delegate to the file object
58 self._fp.write(s)
59
60 def flatten(self, msg, unixfrom=False):
61 """Print the message object tree rooted at msg to the output file
62 specified when the Generator instance was created.
63
64 unixfrom is a flag that forces the printing of a Unix From_ delimiter
65 before the first object in the message tree. If the original message
66 has no From_ delimiter, a `standard' one is crafted. By default, this
67 is False to inhibit the printing of any From_ delimiter.
68
69 Note that for subobjects, no From_ line is printed.
70 """
71 if unixfrom:
72 ufrom = msg.get_unixfrom()
73 if not ufrom:
74 ufrom = 'From nobody ' + time.ctime(time.time())
75 print(ufrom, file=self._fp)
76 self._write(msg)
77
78 def clone(self, fp):
79 """Clone this generator with the exact same options."""
80 return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
81
82 #
83 # Protected interface - undocumented ;/
84 #
85
86 def _write(self, msg):
87 # We can't write the headers yet because of the following scenario:
88 # say a multipart message includes the boundary string somewhere in
89 # its body. We'd have to calculate the new boundary /before/ we write
90 # the headers so that we can write the correct Content-Type:
91 # parameter.
92 #
93 # The way we do this, so as to make the _handle_*() methods simpler,
94 # is to cache any subpart writes into a StringIO. The we write the
95 # headers and the StringIO contents. That way, subpart handlers can
96 # Do The Right Thing, and can still modify the Content-Type: header if
97 # necessary.
98 oldfp = self._fp
99 try:
100 self._fp = sfp = StringIO()
101 self._dispatch(msg)
102 finally:
103 self._fp = oldfp
104 # Write the headers. First we see if the message object wants to
105 # handle that itself. If not, we'll do it generically.
106 meth = getattr(msg, '_write_headers', None)
107 if meth is None:
108 self._write_headers(msg)
109 else:
110 meth(self)
111 self._fp.write(sfp.getvalue())
112
113 def _dispatch(self, msg):
114 # Get the Content-Type: for the message, then try to dispatch to
115 # self._handle_<maintype>_<subtype>(). If there's no handler for the
116 # full MIME type, then dispatch to self._handle_<maintype>(). If
117 # that's missing too, then dispatch to self._writeBody().
118 main = msg.get_content_maintype()
119 sub = msg.get_content_subtype()
120 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
121 meth = getattr(self, '_handle_' + specific, None)
122 if meth is None:
123 generic = main.replace('-', '_')
124 meth = getattr(self, '_handle_' + generic, None)
125 if meth is None:
126 meth = self._writeBody
127 meth(msg)
128
129 #
130 # Default handlers
131 #
132
133 def _write_headers(self, msg):
134 for h, v in msg.items():
135 print('%s:' % h, end=' ', file=self._fp)
Guido van Rossum9604e662007-08-30 03:46:43 +0000136 if isinstance(v, Header):
137 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000138 else:
139 # Header's got lots of smarts, so use it.
140 header = Header(v, maxlinelen=self._maxheaderlen,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000141 header_name=h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000142 print(header.encode(), file=self._fp)
143 # A blank line always separates headers from body
144 print(file=self._fp)
145
146 #
147 # Handlers for writing types and subtypes
148 #
149
150 def _handle_text(self, msg):
151 payload = msg.get_payload()
152 if payload is None:
153 return
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000154 if not isinstance(payload, str):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000155 raise TypeError('string payload expected: %s' % type(payload))
156 if self._mangle_from_:
157 payload = fcre.sub('>From ', payload)
158 self._fp.write(payload)
159
160 # Default body handler
161 _writeBody = _handle_text
162
163 def _handle_multipart(self, msg):
164 # The trick here is to write out each part separately, merge them all
165 # together, and then make sure that the boundary we've chosen isn't
166 # present in the payload.
167 msgtexts = []
168 subparts = msg.get_payload()
169 if subparts is None:
170 subparts = []
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000171 elif isinstance(subparts, str):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000172 # e.g. a non-strict parse of a message with no starting boundary.
173 self._fp.write(subparts)
174 return
175 elif not isinstance(subparts, list):
176 # Scalar payload
177 subparts = [subparts]
178 for part in subparts:
179 s = StringIO()
180 g = self.clone(s)
181 g.flatten(part, unixfrom=False)
182 msgtexts.append(s.getvalue())
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000183 # BAW: What about boundaries that are wrapped in double-quotes?
R. David Murray0101a3a2010-12-12 20:28:13 +0000184 boundary = msg.get_boundary()
185 if not boundary:
186 # Create a boundary that doesn't appear in any of the
187 # message texts.
188 alltext = NL.join(msgtexts)
R. David Murray27c19142010-12-21 18:11:01 +0000189 boundary = _make_boundary(alltext)
190 msg.set_boundary(boundary)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 # If there's a preamble, write it out, with a trailing CRLF
192 if msg.preamble is not None:
193 print(msg.preamble, file=self._fp)
194 # dash-boundary transport-padding CRLF
195 print('--' + boundary, file=self._fp)
196 # body-part
197 if msgtexts:
198 self._fp.write(msgtexts.pop(0))
199 # *encapsulation
200 # --> delimiter transport-padding
201 # --> CRLF body-part
202 for body_part in msgtexts:
203 # delimiter transport-padding CRLF
204 print('\n--' + boundary, file=self._fp)
205 # body-part
206 self._fp.write(body_part)
207 # close-delimiter transport-padding
208 self._fp.write('\n--' + boundary + '--')
209 if msg.epilogue is not None:
210 print(file=self._fp)
211 self._fp.write(msg.epilogue)
212
R. David Murrayfa606922010-01-16 18:41:00 +0000213 def _handle_multipart_signed(self, msg):
214 # The contents of signed parts has to stay unmodified in order to keep
215 # the signature intact per RFC1847 2.1, so we disable header wrapping.
216 # RDM: This isn't enough to completely preserve the part, but it helps.
217 old_maxheaderlen = self._maxheaderlen
218 try:
219 self._maxheaderlen = 0
220 self._handle_multipart(msg)
221 finally:
222 self._maxheaderlen = old_maxheaderlen
223
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000224 def _handle_message_delivery_status(self, msg):
225 # We can't just write the headers directly to self's file object
226 # because this will leave an extra newline between the last header
227 # block and the boundary. Sigh.
228 blocks = []
229 for part in msg.get_payload():
230 s = StringIO()
231 g = self.clone(s)
232 g.flatten(part, unixfrom=False)
233 text = s.getvalue()
234 lines = text.split('\n')
235 # Strip off the unnecessary trailing empty line
236 if lines and lines[-1] == '':
237 blocks.append(NL.join(lines[:-1]))
238 else:
239 blocks.append(text)
240 # Now join all the blocks with an empty line. This has the lovely
241 # effect of separating each block with an empty line, but not adding
242 # an extra one after the last one.
243 self._fp.write(NL.join(blocks))
244
245 def _handle_message(self, msg):
246 s = StringIO()
247 g = self.clone(s)
248 # The payload of a message/rfc822 part should be a multipart sequence
249 # of length 1. The zeroth element of the list should be the Message
250 # object for the subpart. Extract that object, stringify it, and
251 # write it out.
R. David Murrayd0a04ff2010-02-21 04:48:18 +0000252 # Except, it turns out, when it's a string instead, which happens when
253 # and only when HeaderParser is used on a message of mime type
254 # message/rfc822. Such messages are generated by, for example,
255 # Groupwise when forwarding unadorned messages. (Issue 7970.) So
256 # in that case we just emit the string body.
257 payload = msg.get_payload()
258 if isinstance(payload, list):
259 g.flatten(msg.get_payload(0), unixfrom=False)
260 payload = s.getvalue()
261 self._fp.write(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000262
263
264
265_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
266
267class DecodedGenerator(Generator):
R. David Murrayabb10752010-12-06 18:48:11 +0000268 """Generates a text representation of a message.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000269
270 Like the Generator base class, except that non-text parts are substituted
271 with a format string representing the part.
272 """
273 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
274 """Like Generator.__init__() except that an additional optional
275 argument is allowed.
276
277 Walks through all subparts of a message. If the subpart is of main
278 type `text', then it prints the decoded payload of the subpart.
279
280 Otherwise, fmt is a format string that is used instead of the message
281 payload. fmt is expanded with the following keywords (in
282 %(keyword)s format):
283
284 type : Full MIME type of the non-text part
285 maintype : Main MIME type of the non-text part
286 subtype : Sub-MIME type of the non-text part
287 filename : Filename of the non-text part
288 description: Description associated with the non-text part
289 encoding : Content transfer encoding of the non-text part
290
291 The default value for fmt is None, meaning
292
293 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
294 """
295 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
296 if fmt is None:
297 self._fmt = _FMT
298 else:
299 self._fmt = fmt
300
301 def _dispatch(self, msg):
302 for part in msg.walk():
303 maintype = part.get_content_maintype()
304 if maintype == 'text':
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000305 print(part.get_payload(decode=False), file=self)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000306 elif maintype == 'multipart':
307 # Just skip this
308 pass
309 else:
310 print(self._fmt % {
311 'type' : part.get_content_type(),
312 'maintype' : part.get_content_maintype(),
313 'subtype' : part.get_content_subtype(),
314 'filename' : part.get_filename('[no filename]'),
315 'description': part.get('Content-Description',
316 '[no description]'),
317 'encoding' : part.get('Content-Transfer-Encoding',
318 '[no encoding]'),
319 }, file=self)
320
321
322
323# Helper
Christian Heimesa37d4c62007-12-04 23:02:19 +0000324_width = len(repr(sys.maxsize-1))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325_fmt = '%%0%dd' % _width
326
327def _make_boundary(text=None):
328 # Craft a random boundary. If text is given, ensure that the chosen
329 # boundary doesn't appear in the text.
Christian Heimesa37d4c62007-12-04 23:02:19 +0000330 token = random.randrange(sys.maxsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000331 boundary = ('=' * 15) + (_fmt % token) + '=='
332 if text is None:
333 return boundary
334 b = boundary
335 counter = 0
336 while True:
337 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
338 if not cre.search(text):
339 break
340 b = boundary + '.' + str(counter)
341 counter += 1
342 return b