blob: 3e578a2f745b6a57c9ff9a9e75c51ca4b251290e [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsawdb6888b2003-05-29 19:39:33 +00008import sys
Barry Warsaw5d384ef2003-03-06 05:22:02 +00009import time
10import locale
Barry Warsawba925802001-09-23 03:17:28 +000011import random
12
Barry Warsaw6c2bc462002-10-14 15:09:30 +000013from types import ListType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000014from cStringIO import StringIO
15
Barry Warsaw062749a2002-06-28 23:41:42 +000016from email.Header import Header
Barry Warsaw5d384ef2003-03-06 05:22:02 +000017from email.Parser import NLCRE
Barry Warsaw062749a2002-06-28 23:41:42 +000018
Barry Warsawb1c1de32002-09-10 16:13:45 +000019try:
20 from email._compat22 import _isstring
21except SyntaxError:
22 from email._compat21 import _isstring
23
Barry Warsaw56835dd2002-09-28 18:04:55 +000024try:
25 True, False
26except NameError:
27 True = 1
28 False = 0
Barry Warsawb1c1de32002-09-10 16:13:45 +000029
Barry Warsawd1eeecb2001-10-17 20:51:42 +000030EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000031SEMISPACE = '; '
32BAR = '|'
33UNDERSCORE = '_'
34NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000035NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000036SEMINLTAB = ';\n\t'
37SPACE8 = ' ' * 8
38
39fcre = re.compile(r'^From ', re.MULTILINE)
40
Barry Warsaw6c2bc462002-10-14 15:09:30 +000041def _is8bitstring(s):
42 if isinstance(s, StringType):
43 try:
44 unicode(s, 'us-ascii')
45 except UnicodeError:
46 return True
47 return False
48
Barry Warsawba925802001-09-23 03:17:28 +000049
Barry Warsawe968ead2001-10-04 17:05:11 +000050
Barry Warsawba925802001-09-23 03:17:28 +000051class Generator:
52 """Generates output from a Message object tree.
53
54 This basic generator writes the message to the given file object as plain
55 text.
56 """
57 #
58 # Public interface
59 #
60
Barry Warsaw56835dd2002-09-28 18:04:55 +000061 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
Barry Warsawba925802001-09-23 03:17:28 +000062 """Create the generator for message flattening.
63
64 outfp is the output file-like object for writing the message to. It
65 must have a write() method.
66
Barry Warsaw56835dd2002-09-28 18:04:55 +000067 Optional mangle_from_ is a flag that, when True (the default), escapes
68 From_ lines in the body of the message by putting a `>' in front of
69 them.
Barry Warsawba925802001-09-23 03:17:28 +000070
71 Optional maxheaderlen specifies the longest length for a non-continued
72 header. When a header line is longer (in characters, with tabs
73 expanded to 8 spaces), than maxheaderlen, the header will be broken on
74 semicolons and continued as per RFC 2822. If no semicolon is found,
75 then the header is left alone. Set to zero to disable wrapping
76 headers. Default is 78, as recommended (but not required by RFC
77 2822.
78 """
79 self._fp = outfp
80 self._mangle_from_ = mangle_from_
Barry Warsawba925802001-09-23 03:17:28 +000081 self.__maxheaderlen = maxheaderlen
82
83 def write(self, s):
84 # Just delegate to the file object
85 self._fp.write(s)
86
Barry Warsaw56835dd2002-09-28 18:04:55 +000087 def flatten(self, msg, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +000088 """Print the message object tree rooted at msg to the output file
89 specified when the Generator instance was created.
90
91 unixfrom is a flag that forces the printing of a Unix From_ delimiter
92 before the first object in the message tree. If the original message
93 has no From_ delimiter, a `standard' one is crafted. By default, this
Barry Warsaw56835dd2002-09-28 18:04:55 +000094 is False to inhibit the printing of any From_ delimiter.
Barry Warsawba925802001-09-23 03:17:28 +000095
96 Note that for subobjects, no From_ line is printed.
97 """
98 if unixfrom:
99 ufrom = msg.get_unixfrom()
100 if not ufrom:
101 ufrom = 'From nobody ' + time.ctime(time.time())
102 print >> self._fp, ufrom
103 self._write(msg)
104
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000105 # For backwards compatibility, but this is slower
106 __call__ = flatten
107
Barry Warsaw93c40f02002-07-09 02:43:47 +0000108 def clone(self, fp):
109 """Clone this generator with the exact same options."""
110 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
111
Barry Warsawba925802001-09-23 03:17:28 +0000112 #
113 # Protected interface - undocumented ;/
114 #
115
116 def _write(self, msg):
117 # We can't write the headers yet because of the following scenario:
118 # say a multipart message includes the boundary string somewhere in
119 # its body. We'd have to calculate the new boundary /before/ we write
120 # the headers so that we can write the correct Content-Type:
121 # parameter.
122 #
123 # The way we do this, so as to make the _handle_*() methods simpler,
124 # is to cache any subpart writes into a StringIO. The we write the
125 # headers and the StringIO contents. That way, subpart handlers can
126 # Do The Right Thing, and can still modify the Content-Type: header if
127 # necessary.
128 oldfp = self._fp
129 try:
130 self._fp = sfp = StringIO()
131 self._dispatch(msg)
132 finally:
133 self._fp = oldfp
134 # Write the headers. First we see if the message object wants to
135 # handle that itself. If not, we'll do it generically.
136 meth = getattr(msg, '_write_headers', None)
137 if meth is None:
138 self._write_headers(msg)
139 else:
140 meth(self)
141 self._fp.write(sfp.getvalue())
142
143 def _dispatch(self, msg):
144 # Get the Content-Type: for the message, then try to dispatch to
Barry Warsawf488b2c2002-07-11 18:48:40 +0000145 # self._handle_<maintype>_<subtype>(). If there's no handler for the
146 # full MIME type, then dispatch to self._handle_<maintype>(). If
147 # that's missing too, then dispatch to self._writeBody().
Barry Warsawdfea3b32002-08-20 14:47:30 +0000148 main = msg.get_content_maintype()
149 sub = msg.get_content_subtype()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000150 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
151 meth = getattr(self, '_handle_' + specific, None)
152 if meth is None:
153 generic = main.replace('-', '_')
154 meth = getattr(self, '_handle_' + generic, None)
Barry Warsawba925802001-09-23 03:17:28 +0000155 if meth is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000156 meth = self._writeBody
157 meth(msg)
Barry Warsawba925802001-09-23 03:17:28 +0000158
159 #
160 # Default handlers
161 #
162
163 def _write_headers(self, msg):
164 for h, v in msg.items():
Barry Warsawce6bf592003-03-07 15:43:17 +0000165 print >> self._fp, '%s:' % h,
166 if self.__maxheaderlen == 0:
167 # Explicit no-wrapping
168 print >> self._fp, v
169 elif isinstance(v, Header):
170 # Header instances know what to do
171 print >> self._fp, v.encode()
172 elif _is8bitstring(v):
173 # If we have raw 8bit data in a byte string, we have no idea
174 # what the encoding is. There is no safe way to split this
175 # string. If it's ascii-subset, then we could do a normal
176 # ascii split, but if it's multibyte then we could break the
177 # string. There's no way to know so the least harm seems to
178 # be to not split the string and risk it being too long.
179 print >> self._fp, v
180 else:
181 # Header's got lots of smarts, so use it.
182 print >> self._fp, Header(
183 v, maxlinelen=self.__maxheaderlen,
184 header_name=h, continuation_ws='\t').encode()
Barry Warsawba925802001-09-23 03:17:28 +0000185 # A blank line always separates headers from body
186 print >> self._fp
187
Barry Warsawba925802001-09-23 03:17:28 +0000188 #
189 # Handlers for writing types and subtypes
190 #
191
192 def _handle_text(self, msg):
193 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000194 if payload is None:
195 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000196 cset = msg.get_charset()
197 if cset is not None:
198 payload = cset.body_encode(payload)
Barry Warsawb1c1de32002-09-10 16:13:45 +0000199 if not _isstring(payload):
Barry Warsawb384e012001-09-26 05:32:41 +0000200 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000201 if self._mangle_from_:
202 payload = fcre.sub('>From ', payload)
203 self._fp.write(payload)
204
205 # Default body handler
206 _writeBody = _handle_text
207
Barry Warsaw93c40f02002-07-09 02:43:47 +0000208 def _handle_multipart(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000209 # The trick here is to write out each part separately, merge them all
210 # together, and then make sure that the boundary we've chosen isn't
211 # present in the payload.
212 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000213 subparts = msg.get_payload()
214 if subparts is None:
Barry Warsaw93c40f02002-07-09 02:43:47 +0000215 # Nothing has ever been attached
Barry Warsaw409a4c02002-04-10 21:01:31 +0000216 boundary = msg.get_boundary(failobj=_make_boundary())
217 print >> self._fp, '--' + boundary
218 print >> self._fp, '\n'
219 print >> self._fp, '--' + boundary + '--'
220 return
Barry Warsawb1c1de32002-09-10 16:13:45 +0000221 elif _isstring(subparts):
222 # e.g. a non-strict parse of a message with no starting boundary.
223 self._fp.write(subparts)
224 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000225 elif not isinstance(subparts, ListType):
226 # Scalar payload
227 subparts = [subparts]
228 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000229 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000230 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000231 g.flatten(part, unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000232 msgtexts.append(s.getvalue())
233 # Now make sure the boundary we've selected doesn't appear in any of
234 # the message texts.
235 alltext = NL.join(msgtexts)
236 # BAW: What about boundaries that are wrapped in double-quotes?
237 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
238 # If we had to calculate a new boundary because the body text
239 # contained that string, set the new boundary. We don't do it
240 # unconditionally because, while set_boundary() preserves order, it
241 # doesn't preserve newlines/continuations in headers. This is no big
242 # deal in practice, but turns out to be inconvenient for the unittest
243 # suite.
244 if msg.get_boundary() <> boundary:
245 msg.set_boundary(boundary)
246 # Write out any preamble
247 if msg.preamble is not None:
248 self._fp.write(msg.preamble)
Barry Warsaw5d384ef2003-03-06 05:22:02 +0000249 # If preamble is the empty string, the length of the split will be
250 # 1, but the last element will be the empty string. If it's
251 # anything else but does not end in a line separator, the length
252 # will be > 1 and not end in an empty string. We need to
253 # guarantee a newline after the preamble, but don't add too many.
254 plines = NLCRE.split(msg.preamble)
255 if plines <> [''] and plines[-1] <> '':
256 self._fp.write('\n')
Barry Warsawba925802001-09-23 03:17:28 +0000257 # First boundary is a bit different; it doesn't have a leading extra
258 # newline.
259 print >> self._fp, '--' + boundary
Barry Warsawba925802001-09-23 03:17:28 +0000260 # Join and write the individual parts
261 joiner = '\n--' + boundary + '\n'
Barry Warsawba925802001-09-23 03:17:28 +0000262 self._fp.write(joiner.join(msgtexts))
263 print >> self._fp, '\n--' + boundary + '--',
264 # Write out any epilogue
265 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000266 if not msg.epilogue.startswith('\n'):
267 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000268 self._fp.write(msg.epilogue)
269
Barry Warsawb384e012001-09-26 05:32:41 +0000270 def _handle_message_delivery_status(self, msg):
271 # We can't just write the headers directly to self's file object
272 # because this will leave an extra newline between the last header
273 # block and the boundary. Sigh.
274 blocks = []
275 for part in msg.get_payload():
276 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000277 g = self.clone(s)
Barry Warsaw56835dd2002-09-28 18:04:55 +0000278 g.flatten(part, unixfrom=False)
Barry Warsawb384e012001-09-26 05:32:41 +0000279 text = s.getvalue()
280 lines = text.split('\n')
281 # Strip off the unnecessary trailing empty line
282 if lines and lines[-1] == '':
283 blocks.append(NL.join(lines[:-1]))
284 else:
285 blocks.append(text)
286 # Now join all the blocks with an empty line. This has the lovely
287 # effect of separating each block with an empty line, but not adding
288 # an extra one after the last one.
289 self._fp.write(NL.join(blocks))
290
291 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000292 s = StringIO()
Barry Warsaw93c40f02002-07-09 02:43:47 +0000293 g = self.clone(s)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000294 # The payload of a message/rfc822 part should be a multipart sequence
295 # of length 1. The zeroth element of the list should be the Message
Barry Warsaw93c40f02002-07-09 02:43:47 +0000296 # object for the subpart. Extract that object, stringify it, and
297 # write it out.
Barry Warsaw56835dd2002-09-28 18:04:55 +0000298 g.flatten(msg.get_payload(0), unixfrom=False)
Barry Warsawba925802001-09-23 03:17:28 +0000299 self._fp.write(s.getvalue())
300
301
Barry Warsawe968ead2001-10-04 17:05:11 +0000302
Barry Warsawba925802001-09-23 03:17:28 +0000303class DecodedGenerator(Generator):
304 """Generator a text representation of a message.
305
306 Like the Generator base class, except that non-text parts are substituted
307 with a format string representing the part.
308 """
Barry Warsaw56835dd2002-09-28 18:04:55 +0000309 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
Barry Warsawba925802001-09-23 03:17:28 +0000310 """Like Generator.__init__() except that an additional optional
311 argument is allowed.
312
313 Walks through all subparts of a message. If the subpart is of main
314 type `text', then it prints the decoded payload of the subpart.
315
316 Otherwise, fmt is a format string that is used instead of the message
317 payload. fmt is expanded with the following keywords (in
318 %(keyword)s format):
319
320 type : Full MIME type of the non-text part
321 maintype : Main MIME type of the non-text part
322 subtype : Sub-MIME type of the non-text part
323 filename : Filename of the non-text part
324 description: Description associated with the non-text part
325 encoding : Content transfer encoding of the non-text part
326
327 The default value for fmt is None, meaning
328
329 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
330 """
331 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
332 if fmt is None:
333 fmt = ('[Non-text (%(type)s) part of message omitted, '
334 'filename %(filename)s]')
335 self._fmt = fmt
336
337 def _dispatch(self, msg):
338 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000339 maintype = part.get_main_type('text')
340 if maintype == 'text':
Barry Warsaw56835dd2002-09-28 18:04:55 +0000341 print >> self, part.get_payload(decode=True)
Barry Warsawb384e012001-09-26 05:32:41 +0000342 elif maintype == 'multipart':
343 # Just skip this
344 pass
Barry Warsawba925802001-09-23 03:17:28 +0000345 else:
346 print >> self, self._fmt % {
347 'type' : part.get_type('[no MIME type]'),
348 'maintype' : part.get_main_type('[no main MIME type]'),
349 'subtype' : part.get_subtype('[no sub-MIME type]'),
350 'filename' : part.get_filename('[no filename]'),
351 'description': part.get('Content-Description',
352 '[no description]'),
353 'encoding' : part.get('Content-Transfer-Encoding',
354 '[no encoding]'),
355 }
356
357
Barry Warsawe968ead2001-10-04 17:05:11 +0000358
Barry Warsawba925802001-09-23 03:17:28 +0000359# Helper
Barry Warsawdb6888b2003-05-29 19:39:33 +0000360_width = len(repr(sys.maxint-1))
361_fmt = '%%0%dd' % _width
362
Barry Warsaw409a4c02002-04-10 21:01:31 +0000363def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000364 # Craft a random boundary. If text is given, ensure that the chosen
365 # boundary doesn't appear in the text.
Barry Warsaw663219a2003-06-24 20:19:34 +0000366 token = random.randrange(sys.maxint)
Barry Warsawdb6888b2003-05-29 19:39:33 +0000367 boundary = ('=' * 15) + (_fmt % token) + '=='
Barry Warsawba925802001-09-23 03:17:28 +0000368 if text is None:
369 return boundary
370 b = boundary
371 counter = 0
Barry Warsaw56835dd2002-09-28 18:04:55 +0000372 while True:
Barry Warsawba925802001-09-23 03:17:28 +0000373 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
374 if not cre.search(text):
375 break
376 b = boundary + '.' + str(counter)
377 counter += 1
378 return b