blob: 7f05218d48599493f01d278556ef442dafdc8811 [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
Barry Warsawb1c1de32002-09-10 16:13:45 +000011from types import ListType
Barry Warsawba925802001-09-23 03:17:28 +000012from cStringIO import StringIO
13
Barry Warsaw062749a2002-06-28 23:41:42 +000014from email.Header import Header
15
Barry Warsawb1c1de32002-09-10 16:13:45 +000016try:
17 from email._compat22 import _isstring
18except SyntaxError:
19 from email._compat21 import _isstring
20
Barry Warsaw56835dd2002-09-28 18:04:55 +000021try:
22 True, False
23except NameError:
24 True = 1
25 False = 0
Barry Warsawb1c1de32002-09-10 16:13:45 +000026
Barry Warsawd1eeecb2001-10-17 20:51:42 +000027EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000028SEMISPACE = '; '
29BAR = '|'
30UNDERSCORE = '_'
31NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000032NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000033SEMINLTAB = ';\n\t'
34SPACE8 = ' ' * 8
35
36fcre = re.compile(r'^From ', re.MULTILINE)
37
38
Barry Warsawe968ead2001-10-04 17:05:11 +000039
Barry Warsawba925802001-09-23 03:17:28 +000040class Generator:
41 """Generates output from a Message object tree.
42
43 This basic generator writes the message to the given file object as plain
44 text.
45 """
46 #
47 # Public interface
48 #
49
Barry Warsaw56835dd2002-09-28 18:04:55 +000050 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
Barry Warsawba925802001-09-23 03:17:28 +000051 """Create the generator for message flattening.
52
53 outfp is the output file-like object for writing the message to. It
54 must have a write() method.
55
Barry Warsaw56835dd2002-09-28 18:04:55 +000056 Optional mangle_from_ is a flag that, when True (the default), escapes
57 From_ lines in the body of the message by putting a `>' in front of
58 them.
Barry Warsawba925802001-09-23 03:17:28 +000059
60 Optional maxheaderlen specifies the longest length for a non-continued
61 header. When a header line is longer (in characters, with tabs
62 expanded to 8 spaces), than maxheaderlen, the header will be broken on
63 semicolons and continued as per RFC 2822. If no semicolon is found,
64 then the header is left alone. Set to zero to disable wrapping
65 headers. Default is 78, as recommended (but not required by RFC
66 2822.
67 """
68 self._fp = outfp
69 self._mangle_from_ = mangle_from_
Barry Warsawba925802001-09-23 03:17:28 +000070 self.__maxheaderlen = maxheaderlen
71
72 def write(self, s):
73 # Just delegate to the file object
74 self._fp.write(s)
75
Barry Warsaw56835dd2002-09-28 18:04:55 +000076 def flatten(self, msg, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +000077 """Print the message object tree rooted at msg to the output file
78 specified when the Generator instance was created.
79
80 unixfrom is a flag that forces the printing of a Unix From_ delimiter
81 before the first object in the message tree. If the original message
82 has no From_ delimiter, a `standard' one is crafted. By default, this
Barry Warsaw56835dd2002-09-28 18:04:55 +000083 is False to inhibit the printing of any From_ delimiter.
Barry Warsawba925802001-09-23 03:17:28 +000084
85 Note that for subobjects, no From_ line is printed.
86 """
87 if unixfrom:
88 ufrom = msg.get_unixfrom()
89 if not ufrom:
90 ufrom = 'From nobody ' + time.ctime(time.time())
91 print >> self._fp, ufrom
92 self._write(msg)
93
Barry Warsaw7dc865a2002-06-02 19:02:37 +000094 # For backwards compatibility, but this is slower
95 __call__ = flatten
96
Barry Warsaw93c40f02002-07-09 02:43:47 +000097 def clone(self, fp):
98 """Clone this generator with the exact same options."""
99 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
100
Barry Warsawba925802001-09-23 03:17:28 +0000101 #
102 # Protected interface - undocumented ;/
103 #
104
105 def _write(self, msg):
106 # We can't write the headers yet because of the following scenario:
107 # say a multipart message includes the boundary string somewhere in
108 # its body. We'd have to calculate the new boundary /before/ we write
109 # the headers so that we can write the correct Content-Type:
110 # parameter.
111 #
112 # The way we do this, so as to make the _handle_*() methods simpler,
113 # is to cache any subpart writes into a StringIO. The we write the
114 # headers and the StringIO contents. That way, subpart handlers can
115 # Do The Right Thing, and can still modify the Content-Type: header if
116 # necessary.
117 oldfp = self._fp
118 try:
119 self._fp = sfp = StringIO()
120 self._dispatch(msg)
121 finally:
122 self._fp = oldfp
123 # Write the headers. First we see if the message object wants to
124 # handle that itself. If not, we'll do it generically.
125 meth = getattr(msg, '_write_headers', None)
126 if meth is None:
127 self._write_headers(msg)
128 else:
129 meth(self)
130 self._fp.write(sfp.getvalue())
131
132 def _dispatch(self, msg):
133 # Get the Content-Type: for the message, then try to dispatch to
Barry Warsawf488b2c2002-07-11 18:48:40 +0000134 # self._handle_<maintype>_<subtype>(). If there's no handler for the
135 # full MIME type, then dispatch to self._handle_<maintype>(). If
136 # that's missing too, then dispatch to self._writeBody().
Barry Warsawdfea3b32002-08-20 14:47:30 +0000137 main = msg.get_content_maintype()
138 sub = msg.get_content_subtype()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000139 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
140 meth = getattr(self, '_handle_' + specific, None)
141 if meth is None:
142 generic = main.replace('-', '_')
143 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000144 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000145 meth = self._writeBody
146 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000147
148 #
149 # Default handlers
150 #
151
152 def _write_headers(self, msg):
153 for h, v in msg.items():
Barry Warsawba925802001-09-23 03:17:28 +0000154 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
155 # characters wide, so we're well within our rights to split long
156 # headers.
157 text = '%s: %s' % (h, v)
158 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
Barry Warsaw56835dd2002-09-28 18:04:55 +0000159 text = self._split_header(text)
Barry Warsawba925802001-09-23 03:17:28 +0000160 print >> self._fp, text
161 # A blank line always separates headers from body
162 print >> self._fp
163
Barry Warsaw56835dd2002-09-28 18:04:55 +0000164 def _split_header(self, text):
Barry Warsawba925802001-09-23 03:17:28 +0000165 maxheaderlen = self.__maxheaderlen
166 # Find out whether any lines in the header are really longer than
167 # maxheaderlen characters wide. There could be continuation lines
168 # that actually shorten it. Also, replace hard tabs with 8 spaces.
Barry Warsaw062749a2002-06-28 23:41:42 +0000169 lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
Barry Warsawba925802001-09-23 03:17:28 +0000170 for line in lines:
171 if len(line) > maxheaderlen:
172 break
173 else:
174 # No line was actually longer than maxheaderlen characters, so
175 # just return the original unchanged.
176 return text
Barry Warsaw062749a2002-06-28 23:41:42 +0000177 # The `text' argument already has the field name prepended, so don't
178 # provide it here or the first line will get folded too short.
179 h = Header(text, maxlinelen=maxheaderlen,
180 # For backwards compatibility, we use a hard tab here
181 continuation_ws='\t')
182 return h.encode()
Barry Warsawba925802001-09-23 03:17:28 +0000183
184 #
185 # Handlers for writing types and subtypes
186 #
187
188 def _handle_text(self, msg):
189 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000190 if payload is None:
191 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000192 cset = msg.get_charset()
193 if cset is not None:
194 payload = cset.body_encode(payload)
Barry Warsawb1c1de32002-09-10 16:13:45 +0000195 if not _isstring(payload):
Barry Warsawb384e012001-09-26 05:32:41 +0000196 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000197 if self._mangle_from_:
198 payload = fcre.sub('>From ', payload)
199 self._fp.write(payload)
200
201 # Default body handler
202 _writeBody = _handle_text
203
Barry Warsaw93c40f02002-07-09 02:43:47 +0000204 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000205 # The trick here is to write out each part separately, merge them all
206 # together, and then make sure that the boundary we've chosen isn't
207 # present in the payload.
208 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000209 subparts = msg.get_payload()
210 if subparts is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000211 # Nothing has ever been attached
Barry Warsaw409a4c02002-04-10 21:01:31 +0000212 boundary = msg.get_boundary(failobj=_make_boundary())
213 print >> self._fp, '--' + boundary
214 print >> self._fp, '\n'
215 print >> self._fp, '--' + boundary + '--'
216 return
Barry Warsawb1c1de32002-09-10 16:13:45 +0000217 elif _isstring(subparts):
218 # e.g. a non-strict parse of a message with no starting boundary.
219 self._fp.write(subparts)
220 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000221 elif not isinstance(subparts, ListType):
222 # Scalar payload
223 subparts = [subparts]
224 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000225 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000226 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000227 g.flatten(part, unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000228 msgtexts.append(s.getvalue())
229 # Now make sure the boundary we've selected doesn't appear in any of
230 # the message texts.
231 alltext = NL.join(msgtexts)
232 # BAW: What about boundaries that are wrapped in double-quotes?
233 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
234 # If we had to calculate a new boundary because the body text
235 # contained that string, set the new boundary. We don't do it
236 # unconditionally because, while set_boundary() preserves order, it
237 # doesn't preserve newlines/continuations in headers. This is no big
238 # deal in practice, but turns out to be inconvenient for the unittest
239 # suite.
240 if msg.get_boundary() <> boundary:
241 msg.set_boundary(boundary)
242 # Write out any preamble
243 if msg.preamble is not None:
244 self._fp.write(msg.preamble)
245 # First boundary is a bit different; it doesn't have a leading extra
246 # newline.
247 print >> self._fp, '--' + boundary
Barry Warsawba925802001-09-23 03:17:28 +0000248 # Join and write the individual parts
249 joiner = '\n--' + boundary + '\n'
Barry Warsawba925802001-09-23 03:17:28 +0000250 self._fp.write(joiner.join(msgtexts))
251 print >> self._fp, '\n--' + boundary + '--',
252 # Write out any epilogue
253 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000254 if not msg.epilogue.startswith('\n'):
255 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000256 self._fp.write(msg.epilogue)
257
Barry Warsawb384e012001-09-26 05:32:41 +0000258 def _handle_message_delivery_status(self, msg):
259 # We can't just write the headers directly to self's file object
260 # because this will leave an extra newline between the last header
261 # block and the boundary. Sigh.
262 blocks = []
263 for part in msg.get_payload():
264 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000265 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000266 g.flatten(part, unixfrom=False)
Barry Warsawb384e012001-09-26 05:32:41 +0000267 text = s.getvalue()
268 lines = text.split('\n')
269 # Strip off the unnecessary trailing empty line
270 if lines and lines[-1] == '':
271 blocks.append(NL.join(lines[:-1]))
272 else:
273 blocks.append(text)
274 # Now join all the blocks with an empty line. This has the lovely
275 # effect of separating each block with an empty line, but not adding
276 # an extra one after the last one.
277 self._fp.write(NL.join(blocks))
278
279 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000280 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000281 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000282 # The payload of a message/rfc822 part should be a multipart sequence
283 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000284 # object for the subpart. Extract that object, stringify it, and
285 # write it out.
Barry Warsaw56835dd2002-09-28 18:04:55 +0000286 g.flatten(msg.get_payload(0), unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000287 self._fp.write(s.getvalue())
288
289
Barry Warsawe968ead2001-10-04 17:05:11 +0000290
Barry Warsawba925802001-09-23 03:17:28 +0000291class DecodedGenerator(Generator):
292 """Generator a text representation of a message.
293
294 Like the Generator base class, except that non-text parts are substituted
295 with a format string representing the part.
296 """
Barry Warsaw56835dd2002-09-28 18:04:55 +0000297 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
Barry Warsawba925802001-09-23 03:17:28 +0000298 """Like Generator.__init__() except that an additional optional
299 argument is allowed.
300
301 Walks through all subparts of a message. If the subpart is of main
302 type `text', then it prints the decoded payload of the subpart.
303
304 Otherwise, fmt is a format string that is used instead of the message
305 payload. fmt is expanded with the following keywords (in
306 %(keyword)s format):
307
308 type : Full MIME type of the non-text part
309 maintype : Main MIME type of the non-text part
310 subtype : Sub-MIME type of the non-text part
311 filename : Filename of the non-text part
312 description: Description associated with the non-text part
313 encoding : Content transfer encoding of the non-text part
314
315 The default value for fmt is None, meaning
316
317 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
318 """
319 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
320 if fmt is None:
321 fmt = ('[Non-text (%(type)s) part of message omitted, '
322 'filename %(filename)s]')
323 self._fmt = fmt
324
325 def _dispatch(self, msg):
326 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000327 maintype = part.get_main_type('text')
328 if maintype == 'text':
Barry Warsaw56835dd2002-09-28 18:04:55 +0000329 print >> self, part.get_payload(decode=True)
Barry Warsawb384e012001-09-26 05:32:41 +0000330 elif maintype == 'multipart':
331 # Just skip this
332 pass
Barry Warsawba925802001-09-23 03:17:28 +0000333 else:
334 print >> self, self._fmt % {
335 'type' : part.get_type('[no MIME type]'),
336 'maintype' : part.get_main_type('[no main MIME type]'),
337 'subtype' : part.get_subtype('[no sub-MIME type]'),
338 'filename' : part.get_filename('[no filename]'),
339 'description': part.get('Content-Description',
340 '[no description]'),
341 'encoding' : part.get('Content-Transfer-Encoding',
342 '[no encoding]'),
343 }
344
345
Barry Warsawe968ead2001-10-04 17:05:11 +0000346
Barry Warsawba925802001-09-23 03:17:28 +0000347# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000348def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000349 # Craft a random boundary. If text is given, ensure that the chosen
350 # boundary doesn't appear in the text.
351 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
352 if text is None:
353 return boundary
354 b = boundary
355 counter = 0
Barry Warsaw56835dd2002-09-28 18:04:55 +0000356 while True:
Barry Warsawba925802001-09-23 03:17:28 +0000357 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
358 if not cre.search(text):
359 break
360 b = boundary + '.' + str(counter)
361 counter += 1
362 return b