blob: 1352ede9e1071b550adae1d94e3570b198cfe6f4 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Classes to generate plain text from a message object tree."""
6
7__all__ = ['Generator', 'DecodedGenerator']
8
9import re
10import sys
11import time
12import random
13import warnings
14
15from io import StringIO
16from email.header import Header
17
18UNDERSCORE = '_'
19NL = '\n'
20
21fcre = re.compile(r'^From ', re.MULTILINE)
22
23
24
25class Generator:
26 """Generates output from a Message object tree.
27
28 This basic generator writes the message to the given file object as plain
29 text.
30 """
31 #
32 # Public interface
33 #
34
35 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
36 """Create the generator for message flattening.
37
38 outfp is the output file-like object for writing the message to. It
39 must have a write() method.
40
41 Optional mangle_from_ is a flag that, when True (the default), escapes
42 From_ lines in the body of the message by putting a `>' in front of
43 them.
44
45 Optional maxheaderlen specifies the longest length for a non-continued
46 header. When a header line is longer (in characters, with tabs
47 expanded to 8 spaces) than maxheaderlen, the header will split as
48 defined in the Header class. Set maxheaderlen to zero to disable
49 header wrapping. The default is 78, as recommended (but not required)
50 by RFC 2822.
51 """
52 self._fp = outfp
53 self._mangle_from_ = mangle_from_
54 self._maxheaderlen = maxheaderlen
55
56 def write(self, s):
57 # Just delegate to the file object
58 self._fp.write(s)
59
60 def flatten(self, msg, unixfrom=False):
61 """Print the message object tree rooted at msg to the output file
62 specified when the Generator instance was created.
63
64 unixfrom is a flag that forces the printing of a Unix From_ delimiter
65 before the first object in the message tree. If the original message
66 has no From_ delimiter, a `standard' one is crafted. By default, this
67 is False to inhibit the printing of any From_ delimiter.
68
69 Note that for subobjects, no From_ line is printed.
70 """
71 if unixfrom:
72 ufrom = msg.get_unixfrom()
73 if not ufrom:
74 ufrom = 'From nobody ' + time.ctime(time.time())
75 print(ufrom, file=self._fp)
76 self._write(msg)
77
78 def clone(self, fp):
79 """Clone this generator with the exact same options."""
80 return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
81
82 #
83 # Protected interface - undocumented ;/
84 #
85
86 def _write(self, msg):
87 # We can't write the headers yet because of the following scenario:
88 # say a multipart message includes the boundary string somewhere in
89 # its body. We'd have to calculate the new boundary /before/ we write
90 # the headers so that we can write the correct Content-Type:
91 # parameter.
92 #
93 # The way we do this, so as to make the _handle_*() methods simpler,
94 # is to cache any subpart writes into a StringIO. The we write the
95 # headers and the StringIO contents. That way, subpart handlers can
96 # Do The Right Thing, and can still modify the Content-Type: header if
97 # necessary.
98 oldfp = self._fp
99 try:
100 self._fp = sfp = StringIO()
101 self._dispatch(msg)
102 finally:
103 self._fp = oldfp
104 # Write the headers. First we see if the message object wants to
105 # handle that itself. If not, we'll do it generically.
106 meth = getattr(msg, '_write_headers', None)
107 if meth is None:
108 self._write_headers(msg)
109 else:
110 meth(self)
111 self._fp.write(sfp.getvalue())
112
113 def _dispatch(self, msg):
114 # Get the Content-Type: for the message, then try to dispatch to
115 # self._handle_<maintype>_<subtype>(). If there's no handler for the
116 # full MIME type, then dispatch to self._handle_<maintype>(). If
117 # that's missing too, then dispatch to self._writeBody().
118 main = msg.get_content_maintype()
119 sub = msg.get_content_subtype()
120 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
121 meth = getattr(self, '_handle_' + specific, None)
122 if meth is None:
123 generic = main.replace('-', '_')
124 meth = getattr(self, '_handle_' + generic, None)
125 if meth is None:
126 meth = self._writeBody
127 meth(msg)
128
129 #
130 # Default handlers
131 #
132
133 def _write_headers(self, msg):
134 for h, v in msg.items():
135 print('%s:' % h, end=' ', file=self._fp)
136 if self._maxheaderlen == 0:
137 # Explicit no-wrapping
138 print(v, file=self._fp)
139 elif isinstance(v, Header):
140 # Header instances know what to do
141 print(v.encode(), file=self._fp)
142 else:
143 # Header's got lots of smarts, so use it.
144 header = Header(v, maxlinelen=self._maxheaderlen,
145 header_name=h, continuation_ws='\t')
146 print(header.encode(), file=self._fp)
147 # A blank line always separates headers from body
148 print(file=self._fp)
149
150 #
151 # Handlers for writing types and subtypes
152 #
153
154 def _handle_text(self, msg):
155 payload = msg.get_payload()
156 if payload is None:
157 return
158 if not isinstance(payload, basestring):
159 raise TypeError('string payload expected: %s' % type(payload))
160 if self._mangle_from_:
161 payload = fcre.sub('>From ', payload)
162 self._fp.write(payload)
163
164 # Default body handler
165 _writeBody = _handle_text
166
167 def _handle_multipart(self, msg):
168 # The trick here is to write out each part separately, merge them all
169 # together, and then make sure that the boundary we've chosen isn't
170 # present in the payload.
171 msgtexts = []
172 subparts = msg.get_payload()
173 if subparts is None:
174 subparts = []
175 elif isinstance(subparts, basestring):
176 # e.g. a non-strict parse of a message with no starting boundary.
177 self._fp.write(subparts)
178 return
179 elif not isinstance(subparts, list):
180 # Scalar payload
181 subparts = [subparts]
182 for part in subparts:
183 s = StringIO()
184 g = self.clone(s)
185 g.flatten(part, unixfrom=False)
186 msgtexts.append(s.getvalue())
187 # Now make sure the boundary we've selected doesn't appear in any of
188 # the message texts.
189 alltext = NL.join(msgtexts)
190 # BAW: What about boundaries that are wrapped in double-quotes?
191 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
192 # If we had to calculate a new boundary because the body text
193 # contained that string, set the new boundary. We don't do it
194 # unconditionally because, while set_boundary() preserves order, it
195 # doesn't preserve newlines/continuations in headers. This is no big
196 # deal in practice, but turns out to be inconvenient for the unittest
197 # suite.
198 if msg.get_boundary() != boundary:
199 msg.set_boundary(boundary)
200 # If there's a preamble, write it out, with a trailing CRLF
201 if msg.preamble is not None:
202 print(msg.preamble, file=self._fp)
203 # dash-boundary transport-padding CRLF
204 print('--' + boundary, file=self._fp)
205 # body-part
206 if msgtexts:
207 self._fp.write(msgtexts.pop(0))
208 # *encapsulation
209 # --> delimiter transport-padding
210 # --> CRLF body-part
211 for body_part in msgtexts:
212 # delimiter transport-padding CRLF
213 print('\n--' + boundary, file=self._fp)
214 # body-part
215 self._fp.write(body_part)
216 # close-delimiter transport-padding
217 self._fp.write('\n--' + boundary + '--')
218 if msg.epilogue is not None:
219 print(file=self._fp)
220 self._fp.write(msg.epilogue)
221
222 def _handle_message_delivery_status(self, msg):
223 # We can't just write the headers directly to self's file object
224 # because this will leave an extra newline between the last header
225 # block and the boundary. Sigh.
226 blocks = []
227 for part in msg.get_payload():
228 s = StringIO()
229 g = self.clone(s)
230 g.flatten(part, unixfrom=False)
231 text = s.getvalue()
232 lines = text.split('\n')
233 # Strip off the unnecessary trailing empty line
234 if lines and lines[-1] == '':
235 blocks.append(NL.join(lines[:-1]))
236 else:
237 blocks.append(text)
238 # Now join all the blocks with an empty line. This has the lovely
239 # effect of separating each block with an empty line, but not adding
240 # an extra one after the last one.
241 self._fp.write(NL.join(blocks))
242
243 def _handle_message(self, msg):
244 s = StringIO()
245 g = self.clone(s)
246 # The payload of a message/rfc822 part should be a multipart sequence
247 # of length 1. The zeroth element of the list should be the Message
248 # object for the subpart. Extract that object, stringify it, and
249 # write it out.
250 g.flatten(msg.get_payload(0), unixfrom=False)
251 self._fp.write(s.getvalue())
252
253
254
255_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
256
257class DecodedGenerator(Generator):
258 """Generator a text representation of a message.
259
260 Like the Generator base class, except that non-text parts are substituted
261 with a format string representing the part.
262 """
263 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
264 """Like Generator.__init__() except that an additional optional
265 argument is allowed.
266
267 Walks through all subparts of a message. If the subpart is of main
268 type `text', then it prints the decoded payload of the subpart.
269
270 Otherwise, fmt is a format string that is used instead of the message
271 payload. fmt is expanded with the following keywords (in
272 %(keyword)s format):
273
274 type : Full MIME type of the non-text part
275 maintype : Main MIME type of the non-text part
276 subtype : Sub-MIME type of the non-text part
277 filename : Filename of the non-text part
278 description: Description associated with the non-text part
279 encoding : Content transfer encoding of the non-text part
280
281 The default value for fmt is None, meaning
282
283 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
284 """
285 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
286 if fmt is None:
287 self._fmt = _FMT
288 else:
289 self._fmt = fmt
290
291 def _dispatch(self, msg):
292 for part in msg.walk():
293 maintype = part.get_content_maintype()
294 if maintype == 'text':
295 print(part.get_payload(decode=True), file=self)
296 elif maintype == 'multipart':
297 # Just skip this
298 pass
299 else:
300 print(self._fmt % {
301 'type' : part.get_content_type(),
302 'maintype' : part.get_content_maintype(),
303 'subtype' : part.get_content_subtype(),
304 'filename' : part.get_filename('[no filename]'),
305 'description': part.get('Content-Description',
306 '[no description]'),
307 'encoding' : part.get('Content-Transfer-Encoding',
308 '[no encoding]'),
309 }, file=self)
310
311
312
313# Helper
314_width = len(repr(sys.maxint-1))
315_fmt = '%%0%dd' % _width
316
317def _make_boundary(text=None):
318 # Craft a random boundary. If text is given, ensure that the chosen
319 # boundary doesn't appear in the text.
320 token = random.randrange(sys.maxint)
321 boundary = ('=' * 15) + (_fmt % token) + '=='
322 if text is None:
323 return boundary
324 b = boundary
325 counter = 0
326 while True:
327 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
328 if not cre.search(text):
329 break
330 b = boundary + '.' + str(counter)
331 counter += 1
332 return b