blob: e8bce102ad2f3293f18af5c585fd17a3b22579eb [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
Barry Warsawb1c1de32002-09-10 16:13:45 +000011from types import ListType
Barry Warsawba925802001-09-23 03:17:28 +000012from cStringIO import StringIO
13
Barry Warsaw062749a2002-06-28 23:41:42 +000014from email.Header import Header
15
Barry Warsawb1c1de32002-09-10 16:13:45 +000016try:
17 from email._compat22 import _isstring
18except SyntaxError:
19 from email._compat21 import _isstring
20
21
Barry Warsawd1eeecb2001-10-17 20:51:42 +000022EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000023SEMISPACE = '; '
24BAR = '|'
25UNDERSCORE = '_'
26NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000027NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000028SEMINLTAB = ';\n\t'
29SPACE8 = ' ' * 8
30
31fcre = re.compile(r'^From ', re.MULTILINE)
32
33
Barry Warsawe968ead2001-10-04 17:05:11 +000034
Barry Warsawba925802001-09-23 03:17:28 +000035class Generator:
36 """Generates output from a Message object tree.
37
38 This basic generator writes the message to the given file object as plain
39 text.
40 """
41 #
42 # Public interface
43 #
44
45 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
46 """Create the generator for message flattening.
47
48 outfp is the output file-like object for writing the message to. It
49 must have a write() method.
50
51 Optional mangle_from_ is a flag that, when true, escapes From_ lines
52 in the body of the message by putting a `>' in front of them.
53
54 Optional maxheaderlen specifies the longest length for a non-continued
55 header. When a header line is longer (in characters, with tabs
56 expanded to 8 spaces), than maxheaderlen, the header will be broken on
57 semicolons and continued as per RFC 2822. If no semicolon is found,
58 then the header is left alone. Set to zero to disable wrapping
59 headers. Default is 78, as recommended (but not required by RFC
60 2822.
61 """
62 self._fp = outfp
63 self._mangle_from_ = mangle_from_
64 self.__first = 1
65 self.__maxheaderlen = maxheaderlen
66
67 def write(self, s):
68 # Just delegate to the file object
69 self._fp.write(s)
70
Barry Warsaw7dc865a2002-06-02 19:02:37 +000071 def flatten(self, msg, unixfrom=0):
Barry Warsawba925802001-09-23 03:17:28 +000072 """Print the message object tree rooted at msg to the output file
73 specified when the Generator instance was created.
74
75 unixfrom is a flag that forces the printing of a Unix From_ delimiter
76 before the first object in the message tree. If the original message
77 has no From_ delimiter, a `standard' one is crafted. By default, this
78 is 0 to inhibit the printing of any From_ delimiter.
79
80 Note that for subobjects, no From_ line is printed.
81 """
82 if unixfrom:
83 ufrom = msg.get_unixfrom()
84 if not ufrom:
85 ufrom = 'From nobody ' + time.ctime(time.time())
86 print >> self._fp, ufrom
87 self._write(msg)
88
Barry Warsaw7dc865a2002-06-02 19:02:37 +000089 # For backwards compatibility, but this is slower
90 __call__ = flatten
91
Barry Warsaw93c40f02002-07-09 02:43:47 +000092 def clone(self, fp):
93 """Clone this generator with the exact same options."""
94 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
95
Barry Warsawba925802001-09-23 03:17:28 +000096 #
97 # Protected interface - undocumented ;/
98 #
99
100 def _write(self, msg):
101 # We can't write the headers yet because of the following scenario:
102 # say a multipart message includes the boundary string somewhere in
103 # its body. We'd have to calculate the new boundary /before/ we write
104 # the headers so that we can write the correct Content-Type:
105 # parameter.
106 #
107 # The way we do this, so as to make the _handle_*() methods simpler,
108 # is to cache any subpart writes into a StringIO. The we write the
109 # headers and the StringIO contents. That way, subpart handlers can
110 # Do The Right Thing, and can still modify the Content-Type: header if
111 # necessary.
112 oldfp = self._fp
113 try:
114 self._fp = sfp = StringIO()
115 self._dispatch(msg)
116 finally:
117 self._fp = oldfp
118 # Write the headers. First we see if the message object wants to
119 # handle that itself. If not, we'll do it generically.
120 meth = getattr(msg, '_write_headers', None)
121 if meth is None:
122 self._write_headers(msg)
123 else:
124 meth(self)
125 self._fp.write(sfp.getvalue())
126
127 def _dispatch(self, msg):
128 # Get the Content-Type: for the message, then try to dispatch to
Barry Warsawf488b2c2002-07-11 18:48:40 +0000129 # self._handle_<maintype>_<subtype>(). If there's no handler for the
130 # full MIME type, then dispatch to self._handle_<maintype>(). If
131 # that's missing too, then dispatch to self._writeBody().
Barry Warsawdfea3b32002-08-20 14:47:30 +0000132 main = msg.get_content_maintype()
133 sub = msg.get_content_subtype()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000134 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
135 meth = getattr(self, '_handle_' + specific, None)
136 if meth is None:
137 generic = main.replace('-', '_')
138 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000139 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000140 meth = self._writeBody
141 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000142
143 #
144 # Default handlers
145 #
146
147 def _write_headers(self, msg):
148 for h, v in msg.items():
149 # We only write the MIME-Version: header for the outermost
150 # container message. Unfortunately, we can't use same technique
151 # as for the Unix-From above because we don't know when
152 # MIME-Version: will occur.
153 if h.lower() == 'mime-version' and not self.__first:
154 continue
155 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
156 # characters wide, so we're well within our rights to split long
157 # headers.
158 text = '%s: %s' % (h, v)
159 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
Barry Warsaw062749a2002-06-28 23:41:42 +0000160 text = self._split_header(h, text)
Barry Warsawba925802001-09-23 03:17:28 +0000161 print >> self._fp, text
162 # A blank line always separates headers from body
163 print >> self._fp
164
Barry Warsaw062749a2002-06-28 23:41:42 +0000165 def _split_header(self, name, text):
Barry Warsawba925802001-09-23 03:17:28 +0000166 maxheaderlen = self.__maxheaderlen
167 # Find out whether any lines in the header are really longer than
168 # maxheaderlen characters wide. There could be continuation lines
169 # that actually shorten it. Also, replace hard tabs with 8 spaces.
Barry Warsaw062749a2002-06-28 23:41:42 +0000170 lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
Barry Warsawba925802001-09-23 03:17:28 +0000171 for line in lines:
172 if len(line) > maxheaderlen:
173 break
174 else:
175 # No line was actually longer than maxheaderlen characters, so
176 # just return the original unchanged.
177 return text
Barry Warsaw062749a2002-06-28 23:41:42 +0000178 # The `text' argument already has the field name prepended, so don't
179 # provide it here or the first line will get folded too short.
180 h = Header(text, maxlinelen=maxheaderlen,
181 # For backwards compatibility, we use a hard tab here
182 continuation_ws='\t')
183 return h.encode()
Barry Warsawba925802001-09-23 03:17:28 +0000184
185 #
186 # Handlers for writing types and subtypes
187 #
188
189 def _handle_text(self, msg):
190 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000191 if payload is None:
192 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000193 cset = msg.get_charset()
194 if cset is not None:
195 payload = cset.body_encode(payload)
Barry Warsawb1c1de32002-09-10 16:13:45 +0000196 if not _isstring(payload):
Barry Warsawb384e012001-09-26 05:32:41 +0000197 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000198 if self._mangle_from_:
199 payload = fcre.sub('>From ', payload)
200 self._fp.write(payload)
201
202 # Default body handler
203 _writeBody = _handle_text
204
Barry Warsaw93c40f02002-07-09 02:43:47 +0000205 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000206 # The trick here is to write out each part separately, merge them all
207 # together, and then make sure that the boundary we've chosen isn't
208 # present in the payload.
209 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000210 subparts = msg.get_payload()
211 if subparts is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000212 # Nothing has ever been attached
Barry Warsaw409a4c02002-04-10 21:01:31 +0000213 boundary = msg.get_boundary(failobj=_make_boundary())
214 print >> self._fp, '--' + boundary
215 print >> self._fp, '\n'
216 print >> self._fp, '--' + boundary + '--'
217 return
Barry Warsawb1c1de32002-09-10 16:13:45 +0000218 elif _isstring(subparts):
219 # e.g. a non-strict parse of a message with no starting boundary.
220 self._fp.write(subparts)
221 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000222 elif not isinstance(subparts, ListType):
223 # Scalar payload
224 subparts = [subparts]
225 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000226 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000227 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000228 g.flatten(part, unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000229 msgtexts.append(s.getvalue())
230 # Now make sure the boundary we've selected doesn't appear in any of
231 # the message texts.
232 alltext = NL.join(msgtexts)
233 # BAW: What about boundaries that are wrapped in double-quotes?
234 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
235 # If we had to calculate a new boundary because the body text
236 # contained that string, set the new boundary. We don't do it
237 # unconditionally because, while set_boundary() preserves order, it
238 # doesn't preserve newlines/continuations in headers. This is no big
239 # deal in practice, but turns out to be inconvenient for the unittest
240 # suite.
241 if msg.get_boundary() <> boundary:
242 msg.set_boundary(boundary)
243 # Write out any preamble
244 if msg.preamble is not None:
245 self._fp.write(msg.preamble)
246 # First boundary is a bit different; it doesn't have a leading extra
247 # newline.
248 print >> self._fp, '--' + boundary
Barry Warsawba925802001-09-23 03:17:28 +0000249 # Join and write the individual parts
250 joiner = '\n--' + boundary + '\n'
Barry Warsawba925802001-09-23 03:17:28 +0000251 self._fp.write(joiner.join(msgtexts))
252 print >> self._fp, '\n--' + boundary + '--',
253 # Write out any epilogue
254 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000255 if not msg.epilogue.startswith('\n'):
256 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000257 self._fp.write(msg.epilogue)
258
Barry Warsawb384e012001-09-26 05:32:41 +0000259 def _handle_message_delivery_status(self, msg):
260 # We can't just write the headers directly to self's file object
261 # because this will leave an extra newline between the last header
262 # block and the boundary. Sigh.
263 blocks = []
264 for part in msg.get_payload():
265 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000266 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000267 g.flatten(part, unixfrom=0)
Barry Warsawb384e012001-09-26 05:32:41 +0000268 text = s.getvalue()
269 lines = text.split('\n')
270 # Strip off the unnecessary trailing empty line
271 if lines and lines[-1] == '':
272 blocks.append(NL.join(lines[:-1]))
273 else:
274 blocks.append(text)
275 # Now join all the blocks with an empty line. This has the lovely
276 # effect of separating each block with an empty line, but not adding
277 # an extra one after the last one.
278 self._fp.write(NL.join(blocks))
279
280 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000281 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000282 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000283 # The payload of a message/rfc822 part should be a multipart sequence
284 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000285 # object for the subpart. Extract that object, stringify it, and
286 # write it out.
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000287 g.flatten(msg.get_payload(0), unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000288 self._fp.write(s.getvalue())
289
290
Barry Warsawe968ead2001-10-04 17:05:11 +0000291
Barry Warsawba925802001-09-23 03:17:28 +0000292class DecodedGenerator(Generator):
293 """Generator a text representation of a message.
294
295 Like the Generator base class, except that non-text parts are substituted
296 with a format string representing the part.
297 """
298 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
299 """Like Generator.__init__() except that an additional optional
300 argument is allowed.
301
302 Walks through all subparts of a message. If the subpart is of main
303 type `text', then it prints the decoded payload of the subpart.
304
305 Otherwise, fmt is a format string that is used instead of the message
306 payload. fmt is expanded with the following keywords (in
307 %(keyword)s format):
308
309 type : Full MIME type of the non-text part
310 maintype : Main MIME type of the non-text part
311 subtype : Sub-MIME type of the non-text part
312 filename : Filename of the non-text part
313 description: Description associated with the non-text part
314 encoding : Content transfer encoding of the non-text part
315
316 The default value for fmt is None, meaning
317
318 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
319 """
320 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
321 if fmt is None:
322 fmt = ('[Non-text (%(type)s) part of message omitted, '
323 'filename %(filename)s]')
324 self._fmt = fmt
325
326 def _dispatch(self, msg):
327 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000328 maintype = part.get_main_type('text')
329 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000330 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000331 elif maintype == 'multipart':
332 # Just skip this
333 pass
Barry Warsawba925802001-09-23 03:17:28 +0000334 else:
335 print >> self, self._fmt % {
336 'type' : part.get_type('[no MIME type]'),
337 'maintype' : part.get_main_type('[no main MIME type]'),
338 'subtype' : part.get_subtype('[no sub-MIME type]'),
339 'filename' : part.get_filename('[no filename]'),
340 'description': part.get('Content-Description',
341 '[no description]'),
342 'encoding' : part.get('Content-Transfer-Encoding',
343 '[no encoding]'),
344 }
345
346
Barry Warsawe968ead2001-10-04 17:05:11 +0000347
Barry Warsawba925802001-09-23 03:17:28 +0000348# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000349def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000350 # Craft a random boundary. If text is given, ensure that the chosen
351 # boundary doesn't appear in the text.
352 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
353 if text is None:
354 return boundary
355 b = boundary
356 counter = 0
357 while 1:
358 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
359 if not cre.search(text):
360 break
361 b = boundary + '.' + str(counter)
362 counter += 1
363 return b