blob: 56d44ea521765ba8abfce58b33bb50bd65448430 [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsawdb6888b2003-05-29 19:39:33 +00008import sys
Barry Warsaw5d384ef2003-03-06 05:22:02 +00009import time
10import locale
Barry Warsawba925802001-09-23 03:17:28 +000011import random
12
Barry Warsaw6c2bc462002-10-14 15:09:30 +000013from types import ListType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000014from cStringIO import StringIO
15
Barry Warsaw062749a2002-06-28 23:41:42 +000016from email.Header import Header
Barry Warsaw5d384ef2003-03-06 05:22:02 +000017from email.Parser import NLCRE
Barry Warsaw062749a2002-06-28 23:41:42 +000018
Barry Warsawb1c1de32002-09-10 16:13:45 +000019try:
20 from email._compat22 import _isstring
21except SyntaxError:
22 from email._compat21 import _isstring
23
Barry Warsaw56835dd2002-09-28 18:04:55 +000024try:
25 True, False
26except NameError:
27 True = 1
28 False = 0
Barry Warsawb1c1de32002-09-10 16:13:45 +000029
Barry Warsawd1eeecb2001-10-17 20:51:42 +000030EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000031SEMISPACE = '; '
32BAR = '|'
33UNDERSCORE = '_'
34NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000035NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000036SEMINLTAB = ';\n\t'
37SPACE8 = ' ' * 8
38
39fcre = re.compile(r'^From ', re.MULTILINE)
40
Barry Warsaw6c2bc462002-10-14 15:09:30 +000041def _is8bitstring(s):
42 if isinstance(s, StringType):
43 try:
44 unicode(s, 'us-ascii')
45 except UnicodeError:
46 return True
47 return False
48
Barry Warsawba925802001-09-23 03:17:28 +000049
Barry Warsawe968ead2001-10-04 17:05:11 +000050
Barry Warsawba925802001-09-23 03:17:28 +000051class Generator:
52 """Generates output from a Message object tree.
53
54 This basic generator writes the message to the given file object as plain
55 text.
56 """
57 #
58 # Public interface
59 #
60
Barry Warsaw56835dd2002-09-28 18:04:55 +000061 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
Barry Warsawba925802001-09-23 03:17:28 +000062 """Create the generator for message flattening.
63
64 outfp is the output file-like object for writing the message to. It
65 must have a write() method.
66
Barry Warsaw56835dd2002-09-28 18:04:55 +000067 Optional mangle_from_ is a flag that, when True (the default), escapes
68 From_ lines in the body of the message by putting a `>' in front of
69 them.
Barry Warsawba925802001-09-23 03:17:28 +000070
71 Optional maxheaderlen specifies the longest length for a non-continued
72 header. When a header line is longer (in characters, with tabs
Barry Warsawb03136a2003-11-19 02:23:01 +000073 expanded to 8 spaces) than maxheaderlen, the header will split as
74 defined in the Header class. Set maxheaderlen to zero to disable
75 header wrapping. The default is 78, as recommended (but not required)
76 by RFC 2822.
Barry Warsawba925802001-09-23 03:17:28 +000077 """
78 self._fp = outfp
79 self._mangle_from_ = mangle_from_
Barry Warsawba925802001-09-23 03:17:28 +000080 self.__maxheaderlen = maxheaderlen
81
82 def write(self, s):
83 # Just delegate to the file object
84 self._fp.write(s)
85
Barry Warsaw56835dd2002-09-28 18:04:55 +000086 def flatten(self, msg, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +000087 """Print the message object tree rooted at msg to the output file
88 specified when the Generator instance was created.
89
90 unixfrom is a flag that forces the printing of a Unix From_ delimiter
91 before the first object in the message tree. If the original message
92 has no From_ delimiter, a `standard' one is crafted. By default, this
Barry Warsaw56835dd2002-09-28 18:04:55 +000093 is False to inhibit the printing of any From_ delimiter.
Barry Warsawba925802001-09-23 03:17:28 +000094
95 Note that for subobjects, no From_ line is printed.
96 """
97 if unixfrom:
98 ufrom = msg.get_unixfrom()
99 if not ufrom:
100 ufrom = 'From nobody ' + time.ctime(time.time())
101 print >> self._fp, ufrom
102 self._write(msg)
103
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000104 # For backwards compatibility, but this is slower
105 __call__ = flatten
106
Barry Warsaw93c40f02002-07-09 02:43:47 +0000107 def clone(self, fp):
108 """Clone this generator with the exact same options."""
109 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
110
Barry Warsawba925802001-09-23 03:17:28 +0000111 #
112 # Protected interface - undocumented ;/
113 #
114
115 def _write(self, msg):
116 # We can't write the headers yet because of the following scenario:
117 # say a multipart message includes the boundary string somewhere in
118 # its body. We'd have to calculate the new boundary /before/ we write
119 # the headers so that we can write the correct Content-Type:
120 # parameter.
121 #
122 # The way we do this, so as to make the _handle_*() methods simpler,
123 # is to cache any subpart writes into a StringIO. The we write the
124 # headers and the StringIO contents. That way, subpart handlers can
125 # Do The Right Thing, and can still modify the Content-Type: header if
126 # necessary.
127 oldfp = self._fp
128 try:
129 self._fp = sfp = StringIO()
130 self._dispatch(msg)
131 finally:
132 self._fp = oldfp
133 # Write the headers. First we see if the message object wants to
134 # handle that itself. If not, we'll do it generically.
135 meth = getattr(msg, '_write_headers', None)
136 if meth is None:
137 self._write_headers(msg)
138 else:
139 meth(self)
140 self._fp.write(sfp.getvalue())
141
142 def _dispatch(self, msg):
143 # Get the Content-Type: for the message, then try to dispatch to
Barry Warsawf488b2c2002-07-11 18:48:40 +0000144 # self._handle_<maintype>_<subtype>(). If there's no handler for the
145 # full MIME type, then dispatch to self._handle_<maintype>(). If
146 # that's missing too, then dispatch to self._writeBody().
Barry Warsawdfea3b32002-08-20 14:47:30 +0000147 main = msg.get_content_maintype()
148 sub = msg.get_content_subtype()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000149 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
150 meth = getattr(self, '_handle_' + specific, None)
151 if meth is None:
152 generic = main.replace('-', '_')
153 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000154 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000155 meth = self._writeBody
156 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000157
158 #
159 # Default handlers
160 #
161
162 def _write_headers(self, msg):
163 for h, v in msg.items():
Barry Warsawce6bf592003-03-07 15:43:17 +0000164 print >> self._fp, '%s:' % h,
165 if self.__maxheaderlen == 0:
166 # Explicit no-wrapping
167 print >> self._fp, v
168 elif isinstance(v, Header):
169 # Header instances know what to do
170 print >> self._fp, v.encode()
171 elif _is8bitstring(v):
172 # If we have raw 8bit data in a byte string, we have no idea
173 # what the encoding is. There is no safe way to split this
174 # string. If it's ascii-subset, then we could do a normal
175 # ascii split, but if it's multibyte then we could break the
176 # string. There's no way to know so the least harm seems to
177 # be to not split the string and risk it being too long.
178 print >> self._fp, v
179 else:
180 # Header's got lots of smarts, so use it.
181 print >> self._fp, Header(
182 v, maxlinelen=self.__maxheaderlen,
183 header_name=h, continuation_ws='\t').encode()
Barry Warsawba925802001-09-23 03:17:28 +0000184 # A blank line always separates headers from body
185 print >> self._fp
186
Barry Warsawba925802001-09-23 03:17:28 +0000187 #
188 # Handlers for writing types and subtypes
189 #
190
191 def _handle_text(self, msg):
192 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000193 if payload is None:
194 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000195 cset = msg.get_charset()
196 if cset is not None:
197 payload = cset.body_encode(payload)
Barry Warsawb1c1de32002-09-10 16:13:45 +0000198 if not _isstring(payload):
Barry Warsawb384e012001-09-26 05:32:41 +0000199 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000200 if self._mangle_from_:
201 payload = fcre.sub('>From ', payload)
202 self._fp.write(payload)
203
204 # Default body handler
205 _writeBody = _handle_text
206
Barry Warsaw93c40f02002-07-09 02:43:47 +0000207 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000208 # The trick here is to write out each part separately, merge them all
209 # together, and then make sure that the boundary we've chosen isn't
210 # present in the payload.
211 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000212 subparts = msg.get_payload()
213 if subparts is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000214 # Nothing has ever been attached
Barry Warsaw409a4c02002-04-10 21:01:31 +0000215 boundary = msg.get_boundary(failobj=_make_boundary())
216 print >> self._fp, '--' + boundary
217 print >> self._fp, '\n'
218 print >> self._fp, '--' + boundary + '--'
219 return
Barry Warsawb1c1de32002-09-10 16:13:45 +0000220 elif _isstring(subparts):
221 # e.g. a non-strict parse of a message with no starting boundary.
222 self._fp.write(subparts)
223 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000224 elif not isinstance(subparts, ListType):
225 # Scalar payload
226 subparts = [subparts]
227 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000228 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000229 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000230 g.flatten(part, unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000231 msgtexts.append(s.getvalue())
232 # Now make sure the boundary we've selected doesn't appear in any of
233 # the message texts.
234 alltext = NL.join(msgtexts)
235 # BAW: What about boundaries that are wrapped in double-quotes?
236 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
237 # If we had to calculate a new boundary because the body text
238 # contained that string, set the new boundary. We don't do it
239 # unconditionally because, while set_boundary() preserves order, it
240 # doesn't preserve newlines/continuations in headers. This is no big
241 # deal in practice, but turns out to be inconvenient for the unittest
242 # suite.
243 if msg.get_boundary() <> boundary:
244 msg.set_boundary(boundary)
245 # Write out any preamble
246 if msg.preamble is not None:
247 self._fp.write(msg.preamble)
Barry Warsaw5d384ef2003-03-06 05:22:02 +0000248 # If preamble is the empty string, the length of the split will be
249 # 1, but the last element will be the empty string. If it's
250 # anything else but does not end in a line separator, the length
251 # will be > 1 and not end in an empty string. We need to
252 # guarantee a newline after the preamble, but don't add too many.
253 plines = NLCRE.split(msg.preamble)
254 if plines <> [''] and plines[-1] <> '':
255 self._fp.write('\n')
Barry Warsawba925802001-09-23 03:17:28 +0000256 # First boundary is a bit different; it doesn't have a leading extra
257 # newline.
258 print >> self._fp, '--' + boundary
Barry Warsawba925802001-09-23 03:17:28 +0000259 # Join and write the individual parts
260 joiner = '\n--' + boundary + '\n'
Barry Warsawba925802001-09-23 03:17:28 +0000261 self._fp.write(joiner.join(msgtexts))
262 print >> self._fp, '\n--' + boundary + '--',
263 # Write out any epilogue
264 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000265 if not msg.epilogue.startswith('\n'):
266 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000267 self._fp.write(msg.epilogue)
268
Barry Warsawb384e012001-09-26 05:32:41 +0000269 def _handle_message_delivery_status(self, msg):
270 # We can't just write the headers directly to self's file object
271 # because this will leave an extra newline between the last header
272 # block and the boundary. Sigh.
273 blocks = []
274 for part in msg.get_payload():
275 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000276 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000277 g.flatten(part, unixfrom=False)
Barry Warsawb384e012001-09-26 05:32:41 +0000278 text = s.getvalue()
279 lines = text.split('\n')
280 # Strip off the unnecessary trailing empty line
281 if lines and lines[-1] == '':
282 blocks.append(NL.join(lines[:-1]))
283 else:
284 blocks.append(text)
285 # Now join all the blocks with an empty line. This has the lovely
286 # effect of separating each block with an empty line, but not adding
287 # an extra one after the last one.
288 self._fp.write(NL.join(blocks))
289
290 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000291 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000292 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000293 # The payload of a message/rfc822 part should be a multipart sequence
294 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000295 # object for the subpart. Extract that object, stringify it, and
296 # write it out.
Barry Warsaw56835dd2002-09-28 18:04:55 +0000297 g.flatten(msg.get_payload(0), unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000298 self._fp.write(s.getvalue())
299
300
Barry Warsawe968ead2001-10-04 17:05:11 +0000301
Barry Warsawba925802001-09-23 03:17:28 +0000302class DecodedGenerator(Generator):
303 """Generator a text representation of a message.
304
305 Like the Generator base class, except that non-text parts are substituted
306 with a format string representing the part.
307 """
Barry Warsaw56835dd2002-09-28 18:04:55 +0000308 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
Barry Warsawba925802001-09-23 03:17:28 +0000309 """Like Generator.__init__() except that an additional optional
310 argument is allowed.
311
312 Walks through all subparts of a message. If the subpart is of main
313 type `text', then it prints the decoded payload of the subpart.
314
315 Otherwise, fmt is a format string that is used instead of the message
316 payload. fmt is expanded with the following keywords (in
317 %(keyword)s format):
318
319 type : Full MIME type of the non-text part
320 maintype : Main MIME type of the non-text part
321 subtype : Sub-MIME type of the non-text part
322 filename : Filename of the non-text part
323 description: Description associated with the non-text part
324 encoding : Content transfer encoding of the non-text part
325
326 The default value for fmt is None, meaning
327
328 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
329 """
330 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
331 if fmt is None:
332 fmt = ('[Non-text (%(type)s) part of message omitted, '
333 'filename %(filename)s]')
334 self._fmt = fmt
335
336 def _dispatch(self, msg):
337 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000338 maintype = part.get_main_type('text')
339 if maintype == 'text':
Barry Warsaw56835dd2002-09-28 18:04:55 +0000340 print >> self, part.get_payload(decode=True)
Barry Warsawb384e012001-09-26 05:32:41 +0000341 elif maintype == 'multipart':
342 # Just skip this
343 pass
Barry Warsawba925802001-09-23 03:17:28 +0000344 else:
345 print >> self, self._fmt % {
346 'type' : part.get_type('[no MIME type]'),
347 'maintype' : part.get_main_type('[no main MIME type]'),
348 'subtype' : part.get_subtype('[no sub-MIME type]'),
349 'filename' : part.get_filename('[no filename]'),
350 'description': part.get('Content-Description',
351 '[no description]'),
352 'encoding' : part.get('Content-Transfer-Encoding',
353 '[no encoding]'),
354 }
355
356
Barry Warsawe968ead2001-10-04 17:05:11 +0000357
Barry Warsawba925802001-09-23 03:17:28 +0000358# Helper
Barry Warsawdb6888b2003-05-29 19:39:33 +0000359_width = len(repr(sys.maxint-1))
360_fmt = '%%0%dd' % _width
361
Barry Warsaw409a4c02002-04-10 21:01:31 +0000362def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000363 # Craft a random boundary. If text is given, ensure that the chosen
364 # boundary doesn't appear in the text.
Barry Warsaw663219a2003-06-24 20:19:34 +0000365 token = random.randrange(sys.maxint)
Barry Warsawdb6888b2003-05-29 19:39:33 +0000366 boundary = ('=' * 15) + (_fmt % token) + '=='
Barry Warsawba925802001-09-23 03:17:28 +0000367 if text is None:
368 return boundary
369 b = boundary
370 counter = 0
Barry Warsaw56835dd2002-09-28 18:04:55 +0000371 while True:
Barry Warsawba925802001-09-23 03:17:28 +0000372 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
373 if not cre.search(text):
374 break
375 b = boundary + '.' + str(counter)
376 counter += 1
377 return b