blob: 333137fff2ab9515fda025c05d2eb41e3e3836fb [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
Barry Warsaw062749a2002-06-28 23:41:42 +000014from email.Header import Header
15
Barry Warsawd1eeecb2001-10-17 20:51:42 +000016EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000017SEMISPACE = '; '
18BAR = '|'
19UNDERSCORE = '_'
20NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000021NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000022SEMINLTAB = ';\n\t'
23SPACE8 = ' ' * 8
24
25fcre = re.compile(r'^From ', re.MULTILINE)
26
27
Barry Warsawe968ead2001-10-04 17:05:11 +000028
Barry Warsawba925802001-09-23 03:17:28 +000029class Generator:
30 """Generates output from a Message object tree.
31
32 This basic generator writes the message to the given file object as plain
33 text.
34 """
35 #
36 # Public interface
37 #
38
39 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
40 """Create the generator for message flattening.
41
42 outfp is the output file-like object for writing the message to. It
43 must have a write() method.
44
45 Optional mangle_from_ is a flag that, when true, escapes From_ lines
46 in the body of the message by putting a `>' in front of them.
47
48 Optional maxheaderlen specifies the longest length for a non-continued
49 header. When a header line is longer (in characters, with tabs
50 expanded to 8 spaces), than maxheaderlen, the header will be broken on
51 semicolons and continued as per RFC 2822. If no semicolon is found,
52 then the header is left alone. Set to zero to disable wrapping
53 headers. Default is 78, as recommended (but not required by RFC
54 2822.
55 """
56 self._fp = outfp
57 self._mangle_from_ = mangle_from_
58 self.__first = 1
59 self.__maxheaderlen = maxheaderlen
60
61 def write(self, s):
62 # Just delegate to the file object
63 self._fp.write(s)
64
Barry Warsaw7dc865a2002-06-02 19:02:37 +000065 def flatten(self, msg, unixfrom=0):
Barry Warsawba925802001-09-23 03:17:28 +000066 """Print the message object tree rooted at msg to the output file
67 specified when the Generator instance was created.
68
69 unixfrom is a flag that forces the printing of a Unix From_ delimiter
70 before the first object in the message tree. If the original message
71 has no From_ delimiter, a `standard' one is crafted. By default, this
72 is 0 to inhibit the printing of any From_ delimiter.
73
74 Note that for subobjects, no From_ line is printed.
75 """
76 if unixfrom:
77 ufrom = msg.get_unixfrom()
78 if not ufrom:
79 ufrom = 'From nobody ' + time.ctime(time.time())
80 print >> self._fp, ufrom
81 self._write(msg)
82
Barry Warsaw7dc865a2002-06-02 19:02:37 +000083 # For backwards compatibility, but this is slower
84 __call__ = flatten
85
Barry Warsaw93c40f02002-07-09 02:43:47 +000086 def clone(self, fp):
87 """Clone this generator with the exact same options."""
88 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
89
Barry Warsawba925802001-09-23 03:17:28 +000090 #
91 # Protected interface - undocumented ;/
92 #
93
94 def _write(self, msg):
95 # We can't write the headers yet because of the following scenario:
96 # say a multipart message includes the boundary string somewhere in
97 # its body. We'd have to calculate the new boundary /before/ we write
98 # the headers so that we can write the correct Content-Type:
99 # parameter.
100 #
101 # The way we do this, so as to make the _handle_*() methods simpler,
102 # is to cache any subpart writes into a StringIO. The we write the
103 # headers and the StringIO contents. That way, subpart handlers can
104 # Do The Right Thing, and can still modify the Content-Type: header if
105 # necessary.
106 oldfp = self._fp
107 try:
108 self._fp = sfp = StringIO()
109 self._dispatch(msg)
110 finally:
111 self._fp = oldfp
112 # Write the headers. First we see if the message object wants to
113 # handle that itself. If not, we'll do it generically.
114 meth = getattr(msg, '_write_headers', None)
115 if meth is None:
116 self._write_headers(msg)
117 else:
118 meth(self)
119 self._fp.write(sfp.getvalue())
120
121 def _dispatch(self, msg):
122 # Get the Content-Type: for the message, then try to dispatch to
123 # self._handle_maintype_subtype(). If there's no handler for the full
124 # MIME type, then dispatch to self._handle_maintype(). If that's
125 # missing too, then dispatch to self._writeBody().
126 ctype = msg.get_type()
127 if ctype is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000128 # No Content-Type: header so use the default type, which must be
129 # either text/plain or message/rfc822.
130 ctype = msg.get_default_type()
131 assert ctype in ('text/plain', 'message/rfc822')
132 # We do have a Content-Type: header.
133 main, sub = ctype.split('/')
134 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
135 meth = getattr(self, '_handle_' + specific, None)
136 if meth is None:
137 generic = main.replace('-', '_')
138 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000139 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000140 meth = self._writeBody
141 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000142
143 #
144 # Default handlers
145 #
146
147 def _write_headers(self, msg):
148 for h, v in msg.items():
149 # We only write the MIME-Version: header for the outermost
150 # container message. Unfortunately, we can't use same technique
151 # as for the Unix-From above because we don't know when
152 # MIME-Version: will occur.
153 if h.lower() == 'mime-version' and not self.__first:
154 continue
155 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
156 # characters wide, so we're well within our rights to split long
157 # headers.
158 text = '%s: %s' % (h, v)
159 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
Barry Warsaw062749a2002-06-28 23:41:42 +0000160 text = self._split_header(h, text)
Barry Warsawba925802001-09-23 03:17:28 +0000161 print >> self._fp, text
162 # A blank line always separates headers from body
163 print >> self._fp
164
Barry Warsaw062749a2002-06-28 23:41:42 +0000165 def _split_header(self, name, text):
Barry Warsawba925802001-09-23 03:17:28 +0000166 maxheaderlen = self.__maxheaderlen
167 # Find out whether any lines in the header are really longer than
168 # maxheaderlen characters wide. There could be continuation lines
169 # that actually shorten it. Also, replace hard tabs with 8 spaces.
Barry Warsaw062749a2002-06-28 23:41:42 +0000170 lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
Barry Warsawba925802001-09-23 03:17:28 +0000171 for line in lines:
172 if len(line) > maxheaderlen:
173 break
174 else:
175 # No line was actually longer than maxheaderlen characters, so
176 # just return the original unchanged.
177 return text
Barry Warsaw062749a2002-06-28 23:41:42 +0000178 # The `text' argument already has the field name prepended, so don't
179 # provide it here or the first line will get folded too short.
180 h = Header(text, maxlinelen=maxheaderlen,
181 # For backwards compatibility, we use a hard tab here
182 continuation_ws='\t')
183 return h.encode()
Barry Warsawba925802001-09-23 03:17:28 +0000184
185 #
186 # Handlers for writing types and subtypes
187 #
188
189 def _handle_text(self, msg):
190 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000191 if payload is None:
192 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000193 cset = msg.get_charset()
194 if cset is not None:
195 payload = cset.body_encode(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000196 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000197 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000198 if self._mangle_from_:
199 payload = fcre.sub('>From ', payload)
200 self._fp.write(payload)
201
202 # Default body handler
203 _writeBody = _handle_text
204
Barry Warsaw93c40f02002-07-09 02:43:47 +0000205 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000206 # The trick here is to write out each part separately, merge them all
207 # together, and then make sure that the boundary we've chosen isn't
208 # present in the payload.
209 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000210 subparts = msg.get_payload()
211 if subparts is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000212 # Nothing has ever been attached
Barry Warsaw409a4c02002-04-10 21:01:31 +0000213 boundary = msg.get_boundary(failobj=_make_boundary())
214 print >> self._fp, '--' + boundary
215 print >> self._fp, '\n'
216 print >> self._fp, '--' + boundary + '--'
217 return
218 elif not isinstance(subparts, ListType):
219 # Scalar payload
220 subparts = [subparts]
221 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000222 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000223 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000224 g.flatten(part, unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000225 msgtexts.append(s.getvalue())
226 # Now make sure the boundary we've selected doesn't appear in any of
227 # the message texts.
228 alltext = NL.join(msgtexts)
229 # BAW: What about boundaries that are wrapped in double-quotes?
230 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
231 # If we had to calculate a new boundary because the body text
232 # contained that string, set the new boundary. We don't do it
233 # unconditionally because, while set_boundary() preserves order, it
234 # doesn't preserve newlines/continuations in headers. This is no big
235 # deal in practice, but turns out to be inconvenient for the unittest
236 # suite.
237 if msg.get_boundary() <> boundary:
238 msg.set_boundary(boundary)
239 # Write out any preamble
240 if msg.preamble is not None:
241 self._fp.write(msg.preamble)
242 # First boundary is a bit different; it doesn't have a leading extra
243 # newline.
244 print >> self._fp, '--' + boundary
Barry Warsawba925802001-09-23 03:17:28 +0000245 # Join and write the individual parts
246 joiner = '\n--' + boundary + '\n'
Barry Warsawba925802001-09-23 03:17:28 +0000247 self._fp.write(joiner.join(msgtexts))
248 print >> self._fp, '\n--' + boundary + '--',
249 # Write out any epilogue
250 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000251 if not msg.epilogue.startswith('\n'):
252 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000253 self._fp.write(msg.epilogue)
254
Barry Warsawb384e012001-09-26 05:32:41 +0000255 def _handle_message_delivery_status(self, msg):
256 # We can't just write the headers directly to self's file object
257 # because this will leave an extra newline between the last header
258 # block and the boundary. Sigh.
259 blocks = []
260 for part in msg.get_payload():
261 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000262 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000263 g.flatten(part, unixfrom=0)
Barry Warsawb384e012001-09-26 05:32:41 +0000264 text = s.getvalue()
265 lines = text.split('\n')
266 # Strip off the unnecessary trailing empty line
267 if lines and lines[-1] == '':
268 blocks.append(NL.join(lines[:-1]))
269 else:
270 blocks.append(text)
271 # Now join all the blocks with an empty line. This has the lovely
272 # effect of separating each block with an empty line, but not adding
273 # an extra one after the last one.
274 self._fp.write(NL.join(blocks))
275
276 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000277 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000278 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000279 # The payload of a message/rfc822 part should be a multipart sequence
280 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000281 # object for the subpart. Extract that object, stringify it, and
282 # write it out.
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000283 g.flatten(msg.get_payload(0), unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000284 self._fp.write(s.getvalue())
285
286
Barry Warsawe968ead2001-10-04 17:05:11 +0000287
Barry Warsawba925802001-09-23 03:17:28 +0000288class DecodedGenerator(Generator):
289 """Generator a text representation of a message.
290
291 Like the Generator base class, except that non-text parts are substituted
292 with a format string representing the part.
293 """
294 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
295 """Like Generator.__init__() except that an additional optional
296 argument is allowed.
297
298 Walks through all subparts of a message. If the subpart is of main
299 type `text', then it prints the decoded payload of the subpart.
300
301 Otherwise, fmt is a format string that is used instead of the message
302 payload. fmt is expanded with the following keywords (in
303 %(keyword)s format):
304
305 type : Full MIME type of the non-text part
306 maintype : Main MIME type of the non-text part
307 subtype : Sub-MIME type of the non-text part
308 filename : Filename of the non-text part
309 description: Description associated with the non-text part
310 encoding : Content transfer encoding of the non-text part
311
312 The default value for fmt is None, meaning
313
314 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
315 """
316 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
317 if fmt is None:
318 fmt = ('[Non-text (%(type)s) part of message omitted, '
319 'filename %(filename)s]')
320 self._fmt = fmt
321
322 def _dispatch(self, msg):
323 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000324 maintype = part.get_main_type('text')
325 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000326 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000327 elif maintype == 'multipart':
328 # Just skip this
329 pass
Barry Warsawba925802001-09-23 03:17:28 +0000330 else:
331 print >> self, self._fmt % {
332 'type' : part.get_type('[no MIME type]'),
333 'maintype' : part.get_main_type('[no main MIME type]'),
334 'subtype' : part.get_subtype('[no sub-MIME type]'),
335 'filename' : part.get_filename('[no filename]'),
336 'description': part.get('Content-Description',
337 '[no description]'),
338 'encoding' : part.get('Content-Transfer-Encoding',
339 '[no encoding]'),
340 }
341
342
Barry Warsawe968ead2001-10-04 17:05:11 +0000343
Barry Warsawba925802001-09-23 03:17:28 +0000344# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000345def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000346 # Craft a random boundary. If text is given, ensure that the chosen
347 # boundary doesn't appear in the text.
348 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
349 if text is None:
350 return boundary
351 b = boundary
352 counter = 0
353 while 1:
354 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
355 if not cre.search(text):
356 break
357 b = boundary + '.' + str(counter)
358 counter += 1
359 return b