blob: a8e2cfe57d34154526f6c924737636d53ed29094 [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
Barry Warsaw062749a2002-06-28 23:41:42 +000014from email.Header import Header
15
Barry Warsawd1eeecb2001-10-17 20:51:42 +000016EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000017SEMISPACE = '; '
18BAR = '|'
19UNDERSCORE = '_'
20NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000021NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000022SEMINLTAB = ';\n\t'
23SPACE8 = ' ' * 8
24
25fcre = re.compile(r'^From ', re.MULTILINE)
26
27
Barry Warsawe968ead2001-10-04 17:05:11 +000028
Barry Warsawba925802001-09-23 03:17:28 +000029class Generator:
30 """Generates output from a Message object tree.
31
32 This basic generator writes the message to the given file object as plain
33 text.
34 """
35 #
36 # Public interface
37 #
38
39 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
40 """Create the generator for message flattening.
41
42 outfp is the output file-like object for writing the message to. It
43 must have a write() method.
44
45 Optional mangle_from_ is a flag that, when true, escapes From_ lines
46 in the body of the message by putting a `>' in front of them.
47
48 Optional maxheaderlen specifies the longest length for a non-continued
49 header. When a header line is longer (in characters, with tabs
50 expanded to 8 spaces), than maxheaderlen, the header will be broken on
51 semicolons and continued as per RFC 2822. If no semicolon is found,
52 then the header is left alone. Set to zero to disable wrapping
53 headers. Default is 78, as recommended (but not required by RFC
54 2822.
55 """
56 self._fp = outfp
57 self._mangle_from_ = mangle_from_
58 self.__first = 1
59 self.__maxheaderlen = maxheaderlen
60
61 def write(self, s):
62 # Just delegate to the file object
63 self._fp.write(s)
64
Barry Warsaw7dc865a2002-06-02 19:02:37 +000065 def flatten(self, msg, unixfrom=0):
Barry Warsawba925802001-09-23 03:17:28 +000066 """Print the message object tree rooted at msg to the output file
67 specified when the Generator instance was created.
68
69 unixfrom is a flag that forces the printing of a Unix From_ delimiter
70 before the first object in the message tree. If the original message
71 has no From_ delimiter, a `standard' one is crafted. By default, this
72 is 0 to inhibit the printing of any From_ delimiter.
73
74 Note that for subobjects, no From_ line is printed.
75 """
76 if unixfrom:
77 ufrom = msg.get_unixfrom()
78 if not ufrom:
79 ufrom = 'From nobody ' + time.ctime(time.time())
80 print >> self._fp, ufrom
81 self._write(msg)
82
Barry Warsaw7dc865a2002-06-02 19:02:37 +000083 # For backwards compatibility, but this is slower
84 __call__ = flatten
85
Barry Warsaw93c40f02002-07-09 02:43:47 +000086 def clone(self, fp):
87 """Clone this generator with the exact same options."""
88 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
89
Barry Warsawba925802001-09-23 03:17:28 +000090 #
91 # Protected interface - undocumented ;/
92 #
93
94 def _write(self, msg):
95 # We can't write the headers yet because of the following scenario:
96 # say a multipart message includes the boundary string somewhere in
97 # its body. We'd have to calculate the new boundary /before/ we write
98 # the headers so that we can write the correct Content-Type:
99 # parameter.
100 #
101 # The way we do this, so as to make the _handle_*() methods simpler,
102 # is to cache any subpart writes into a StringIO. The we write the
103 # headers and the StringIO contents. That way, subpart handlers can
104 # Do The Right Thing, and can still modify the Content-Type: header if
105 # necessary.
106 oldfp = self._fp
107 try:
108 self._fp = sfp = StringIO()
109 self._dispatch(msg)
110 finally:
111 self._fp = oldfp
112 # Write the headers. First we see if the message object wants to
113 # handle that itself. If not, we'll do it generically.
114 meth = getattr(msg, '_write_headers', None)
115 if meth is None:
116 self._write_headers(msg)
117 else:
118 meth(self)
119 self._fp.write(sfp.getvalue())
120
121 def _dispatch(self, msg):
122 # Get the Content-Type: for the message, then try to dispatch to
Barry Warsawf488b2c2002-07-11 18:48:40 +0000123 # self._handle_<maintype>_<subtype>(). If there's no handler for the
124 # full MIME type, then dispatch to self._handle_<maintype>(). If
125 # that's missing too, then dispatch to self._writeBody().
Barry Warsaw1cecdc62002-07-19 22:21:02 +0000126 ctype = msg.get_content_type()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000127 # We do have a Content-Type: header.
128 main, sub = ctype.split('/')
129 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
130 meth = getattr(self, '_handle_' + specific, None)
131 if meth is None:
132 generic = main.replace('-', '_')
133 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000134 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000135 meth = self._writeBody
136 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000137
138 #
139 # Default handlers
140 #
141
142 def _write_headers(self, msg):
143 for h, v in msg.items():
144 # We only write the MIME-Version: header for the outermost
145 # container message. Unfortunately, we can't use same technique
146 # as for the Unix-From above because we don't know when
147 # MIME-Version: will occur.
148 if h.lower() == 'mime-version' and not self.__first:
149 continue
150 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
151 # characters wide, so we're well within our rights to split long
152 # headers.
153 text = '%s: %s' % (h, v)
154 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
Barry Warsaw062749a2002-06-28 23:41:42 +0000155 text = self._split_header(h, text)
Barry Warsawba925802001-09-23 03:17:28 +0000156 print >> self._fp, text
157 # A blank line always separates headers from body
158 print >> self._fp
159
Barry Warsaw062749a2002-06-28 23:41:42 +0000160 def _split_header(self, name, text):
Barry Warsawba925802001-09-23 03:17:28 +0000161 maxheaderlen = self.__maxheaderlen
162 # Find out whether any lines in the header are really longer than
163 # maxheaderlen characters wide. There could be continuation lines
164 # that actually shorten it. Also, replace hard tabs with 8 spaces.
Barry Warsaw062749a2002-06-28 23:41:42 +0000165 lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
Barry Warsawba925802001-09-23 03:17:28 +0000166 for line in lines:
167 if len(line) > maxheaderlen:
168 break
169 else:
170 # No line was actually longer than maxheaderlen characters, so
171 # just return the original unchanged.
172 return text
Barry Warsaw062749a2002-06-28 23:41:42 +0000173 # The `text' argument already has the field name prepended, so don't
174 # provide it here or the first line will get folded too short.
175 h = Header(text, maxlinelen=maxheaderlen,
176 # For backwards compatibility, we use a hard tab here
177 continuation_ws='\t')
178 return h.encode()
Barry Warsawba925802001-09-23 03:17:28 +0000179
180 #
181 # Handlers for writing types and subtypes
182 #
183
184 def _handle_text(self, msg):
185 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000186 if payload is None:
187 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000188 cset = msg.get_charset()
189 if cset is not None:
190 payload = cset.body_encode(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000191 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000192 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000193 if self._mangle_from_:
194 payload = fcre.sub('>From ', payload)
195 self._fp.write(payload)
196
197 # Default body handler
198 _writeBody = _handle_text
199
Barry Warsaw93c40f02002-07-09 02:43:47 +0000200 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000201 # The trick here is to write out each part separately, merge them all
202 # together, and then make sure that the boundary we've chosen isn't
203 # present in the payload.
204 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000205 subparts = msg.get_payload()
206 if subparts is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000207 # Nothing has ever been attached
Barry Warsaw409a4c02002-04-10 21:01:31 +0000208 boundary = msg.get_boundary(failobj=_make_boundary())
209 print >> self._fp, '--' + boundary
210 print >> self._fp, '\n'
211 print >> self._fp, '--' + boundary + '--'
212 return
213 elif not isinstance(subparts, ListType):
214 # Scalar payload
215 subparts = [subparts]
216 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000217 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000218 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000219 g.flatten(part, unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000220 msgtexts.append(s.getvalue())
221 # Now make sure the boundary we've selected doesn't appear in any of
222 # the message texts.
223 alltext = NL.join(msgtexts)
224 # BAW: What about boundaries that are wrapped in double-quotes?
225 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
226 # If we had to calculate a new boundary because the body text
227 # contained that string, set the new boundary. We don't do it
228 # unconditionally because, while set_boundary() preserves order, it
229 # doesn't preserve newlines/continuations in headers. This is no big
230 # deal in practice, but turns out to be inconvenient for the unittest
231 # suite.
232 if msg.get_boundary() <> boundary:
233 msg.set_boundary(boundary)
234 # Write out any preamble
235 if msg.preamble is not None:
236 self._fp.write(msg.preamble)
237 # First boundary is a bit different; it doesn't have a leading extra
238 # newline.
239 print >> self._fp, '--' + boundary
Barry Warsawba925802001-09-23 03:17:28 +0000240 # Join and write the individual parts
241 joiner = '\n--' + boundary + '\n'
Barry Warsawba925802001-09-23 03:17:28 +0000242 self._fp.write(joiner.join(msgtexts))
243 print >> self._fp, '\n--' + boundary + '--',
244 # Write out any epilogue
245 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000246 if not msg.epilogue.startswith('\n'):
247 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000248 self._fp.write(msg.epilogue)
249
Barry Warsawb384e012001-09-26 05:32:41 +0000250 def _handle_message_delivery_status(self, msg):
251 # We can't just write the headers directly to self's file object
252 # because this will leave an extra newline between the last header
253 # block and the boundary. Sigh.
254 blocks = []
255 for part in msg.get_payload():
256 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000257 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000258 g.flatten(part, unixfrom=0)
Barry Warsawb384e012001-09-26 05:32:41 +0000259 text = s.getvalue()
260 lines = text.split('\n')
261 # Strip off the unnecessary trailing empty line
262 if lines and lines[-1] == '':
263 blocks.append(NL.join(lines[:-1]))
264 else:
265 blocks.append(text)
266 # Now join all the blocks with an empty line. This has the lovely
267 # effect of separating each block with an empty line, but not adding
268 # an extra one after the last one.
269 self._fp.write(NL.join(blocks))
270
271 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000272 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000273 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000274 # The payload of a message/rfc822 part should be a multipart sequence
275 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000276 # object for the subpart. Extract that object, stringify it, and
277 # write it out.
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000278 g.flatten(msg.get_payload(0), unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000279 self._fp.write(s.getvalue())
280
281
Barry Warsawe968ead2001-10-04 17:05:11 +0000282
Barry Warsawba925802001-09-23 03:17:28 +0000283class DecodedGenerator(Generator):
284 """Generator a text representation of a message.
285
286 Like the Generator base class, except that non-text parts are substituted
287 with a format string representing the part.
288 """
289 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
290 """Like Generator.__init__() except that an additional optional
291 argument is allowed.
292
293 Walks through all subparts of a message. If the subpart is of main
294 type `text', then it prints the decoded payload of the subpart.
295
296 Otherwise, fmt is a format string that is used instead of the message
297 payload. fmt is expanded with the following keywords (in
298 %(keyword)s format):
299
300 type : Full MIME type of the non-text part
301 maintype : Main MIME type of the non-text part
302 subtype : Sub-MIME type of the non-text part
303 filename : Filename of the non-text part
304 description: Description associated with the non-text part
305 encoding : Content transfer encoding of the non-text part
306
307 The default value for fmt is None, meaning
308
309 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
310 """
311 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
312 if fmt is None:
313 fmt = ('[Non-text (%(type)s) part of message omitted, '
314 'filename %(filename)s]')
315 self._fmt = fmt
316
317 def _dispatch(self, msg):
318 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000319 maintype = part.get_main_type('text')
320 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000321 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000322 elif maintype == 'multipart':
323 # Just skip this
324 pass
Barry Warsawba925802001-09-23 03:17:28 +0000325 else:
326 print >> self, self._fmt % {
327 'type' : part.get_type('[no MIME type]'),
328 'maintype' : part.get_main_type('[no main MIME type]'),
329 'subtype' : part.get_subtype('[no sub-MIME type]'),
330 'filename' : part.get_filename('[no filename]'),
331 'description': part.get('Content-Description',
332 '[no description]'),
333 'encoding' : part.get('Content-Transfer-Encoding',
334 '[no encoding]'),
335 }
336
337
Barry Warsawe968ead2001-10-04 17:05:11 +0000338
Barry Warsawba925802001-09-23 03:17:28 +0000339# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000340def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000341 # Craft a random boundary. If text is given, ensure that the chosen
342 # boundary doesn't appear in the text.
343 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
344 if text is None:
345 return boundary
346 b = boundary
347 counter = 0
348 while 1:
349 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
350 if not cre.search(text):
351 break
352 b = boundary + '.' + str(counter)
353 counter += 1
354 return b