blob: 64db084238c6f46a827010a5df5d14274bf7b47e [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
Barry Warsawd1eeecb2001-10-17 20:51:42 +000014EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000015SEMISPACE = '; '
16BAR = '|'
17UNDERSCORE = '_'
18NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000019NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000020SEMINLTAB = ';\n\t'
21SPACE8 = ' ' * 8
22
23fcre = re.compile(r'^From ', re.MULTILINE)
24
25
Barry Warsawe968ead2001-10-04 17:05:11 +000026
Barry Warsawba925802001-09-23 03:17:28 +000027class Generator:
28 """Generates output from a Message object tree.
29
30 This basic generator writes the message to the given file object as plain
31 text.
32 """
33 #
34 # Public interface
35 #
36
37 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
38 """Create the generator for message flattening.
39
40 outfp is the output file-like object for writing the message to. It
41 must have a write() method.
42
43 Optional mangle_from_ is a flag that, when true, escapes From_ lines
44 in the body of the message by putting a `>' in front of them.
45
46 Optional maxheaderlen specifies the longest length for a non-continued
47 header. When a header line is longer (in characters, with tabs
48 expanded to 8 spaces), than maxheaderlen, the header will be broken on
49 semicolons and continued as per RFC 2822. If no semicolon is found,
50 then the header is left alone. Set to zero to disable wrapping
51 headers. Default is 78, as recommended (but not required by RFC
52 2822.
53 """
54 self._fp = outfp
55 self._mangle_from_ = mangle_from_
56 self.__first = 1
57 self.__maxheaderlen = maxheaderlen
58
59 def write(self, s):
60 # Just delegate to the file object
61 self._fp.write(s)
62
Barry Warsaw7dc865a2002-06-02 19:02:37 +000063 def flatten(self, msg, unixfrom=0):
Barry Warsawba925802001-09-23 03:17:28 +000064 """Print the message object tree rooted at msg to the output file
65 specified when the Generator instance was created.
66
67 unixfrom is a flag that forces the printing of a Unix From_ delimiter
68 before the first object in the message tree. If the original message
69 has no From_ delimiter, a `standard' one is crafted. By default, this
70 is 0 to inhibit the printing of any From_ delimiter.
71
72 Note that for subobjects, no From_ line is printed.
73 """
74 if unixfrom:
75 ufrom = msg.get_unixfrom()
76 if not ufrom:
77 ufrom = 'From nobody ' + time.ctime(time.time())
78 print >> self._fp, ufrom
79 self._write(msg)
80
Barry Warsaw7dc865a2002-06-02 19:02:37 +000081 # For backwards compatibility, but this is slower
82 __call__ = flatten
83
Barry Warsawba925802001-09-23 03:17:28 +000084 #
85 # Protected interface - undocumented ;/
86 #
87
88 def _write(self, msg):
89 # We can't write the headers yet because of the following scenario:
90 # say a multipart message includes the boundary string somewhere in
91 # its body. We'd have to calculate the new boundary /before/ we write
92 # the headers so that we can write the correct Content-Type:
93 # parameter.
94 #
95 # The way we do this, so as to make the _handle_*() methods simpler,
96 # is to cache any subpart writes into a StringIO. The we write the
97 # headers and the StringIO contents. That way, subpart handlers can
98 # Do The Right Thing, and can still modify the Content-Type: header if
99 # necessary.
100 oldfp = self._fp
101 try:
102 self._fp = sfp = StringIO()
103 self._dispatch(msg)
104 finally:
105 self._fp = oldfp
106 # Write the headers. First we see if the message object wants to
107 # handle that itself. If not, we'll do it generically.
108 meth = getattr(msg, '_write_headers', None)
109 if meth is None:
110 self._write_headers(msg)
111 else:
112 meth(self)
113 self._fp.write(sfp.getvalue())
114
115 def _dispatch(self, msg):
116 # Get the Content-Type: for the message, then try to dispatch to
117 # self._handle_maintype_subtype(). If there's no handler for the full
118 # MIME type, then dispatch to self._handle_maintype(). If that's
119 # missing too, then dispatch to self._writeBody().
120 ctype = msg.get_type()
121 if ctype is None:
122 # No Content-Type: header so try the default handler
123 self._writeBody(msg)
124 else:
125 # We do have a Content-Type: header.
126 specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
127 meth = getattr(self, '_handle_' + specific, None)
128 if meth is None:
129 generic = msg.get_main_type().replace('-', '_')
130 meth = getattr(self, '_handle_' + generic, None)
131 if meth is None:
132 meth = self._writeBody
133 meth(msg)
134
135 #
136 # Default handlers
137 #
138
139 def _write_headers(self, msg):
140 for h, v in msg.items():
141 # We only write the MIME-Version: header for the outermost
142 # container message. Unfortunately, we can't use same technique
143 # as for the Unix-From above because we don't know when
144 # MIME-Version: will occur.
145 if h.lower() == 'mime-version' and not self.__first:
146 continue
147 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
148 # characters wide, so we're well within our rights to split long
149 # headers.
150 text = '%s: %s' % (h, v)
151 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
152 text = self._split_header(text)
153 print >> self._fp, text
154 # A blank line always separates headers from body
155 print >> self._fp
156
157 def _split_header(self, text):
158 maxheaderlen = self.__maxheaderlen
159 # Find out whether any lines in the header are really longer than
160 # maxheaderlen characters wide. There could be continuation lines
161 # that actually shorten it. Also, replace hard tabs with 8 spaces.
162 lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
163 for line in lines:
164 if len(line) > maxheaderlen:
165 break
166 else:
167 # No line was actually longer than maxheaderlen characters, so
168 # just return the original unchanged.
169 return text
170 rtn = []
171 for line in text.split('\n'):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000172 splitline = []
Barry Warsawba925802001-09-23 03:17:28 +0000173 # Short lines can remain unchanged
174 if len(line.replace('\t', SPACE8)) <= maxheaderlen:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000175 splitline.append(line)
176 rtn.append(SEMINLTAB.join(splitline))
Barry Warsawba925802001-09-23 03:17:28 +0000177 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000178 oldlen = len(line)
Barry Warsawba925802001-09-23 03:17:28 +0000179 # Try to break the line on semicolons, but if that doesn't
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000180 # work, try to split on folding whitespace.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000181 while len(line) > maxheaderlen:
182 i = line.rfind(';', 0, maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000183 if i < 0:
Barry Warsawba925802001-09-23 03:17:28 +0000184 break
Barry Warsaw409a4c02002-04-10 21:01:31 +0000185 splitline.append(line[:i])
186 line = line[i+1:].lstrip()
187 if len(line) <> oldlen:
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000188 # Splitting on semis worked
Barry Warsaw409a4c02002-04-10 21:01:31 +0000189 splitline.append(line)
190 rtn.append(SEMINLTAB.join(splitline))
191 continue
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000192 # Splitting on semis didn't help, so try to split on
193 # whitespace.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000194 parts = re.split(r'(\s+)', line)
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000195 # Watch out though for "Header: longnonsplittableline"
196 if parts[0].endswith(':') and len(parts) == 3:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000197 rtn.append(line)
198 continue
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000199 first = parts.pop(0)
200 sublines = [first]
201 acc = len(first)
202 while parts:
203 len0 = len(parts[0])
204 len1 = len(parts[1])
205 if acc + len0 + len1 < maxheaderlen:
206 sublines.append(parts.pop(0))
207 sublines.append(parts.pop(0))
208 acc += len0 + len1
209 else:
210 # Split it here, but don't forget to ignore the
211 # next whitespace-only part
Barry Warsaw409a4c02002-04-10 21:01:31 +0000212 splitline.append(EMPTYSTRING.join(sublines))
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000213 del parts[0]
214 first = parts.pop(0)
215 sublines = [first]
216 acc = len(first)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000217 splitline.append(EMPTYSTRING.join(sublines))
218 rtn.append(NLTAB.join(splitline))
219 return NL.join(rtn)
Barry Warsawba925802001-09-23 03:17:28 +0000220
221 #
222 # Handlers for writing types and subtypes
223 #
224
225 def _handle_text(self, msg):
226 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000227 if payload is None:
228 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000229 cset = msg.get_charset()
230 if cset is not None:
231 payload = cset.body_encode(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000232 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000233 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000234 if self._mangle_from_:
235 payload = fcre.sub('>From ', payload)
236 self._fp.write(payload)
237
238 # Default body handler
239 _writeBody = _handle_text
240
241 def _handle_multipart(self, msg, isdigest=0):
242 # The trick here is to write out each part separately, merge them all
243 # together, and then make sure that the boundary we've chosen isn't
244 # present in the payload.
245 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000246 subparts = msg.get_payload()
247 if subparts is None:
248 # Nothing has every been attached
249 boundary = msg.get_boundary(failobj=_make_boundary())
250 print >> self._fp, '--' + boundary
251 print >> self._fp, '\n'
252 print >> self._fp, '--' + boundary + '--'
253 return
254 elif not isinstance(subparts, ListType):
255 # Scalar payload
256 subparts = [subparts]
257 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000258 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000259 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000260 g.flatten(part, unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000261 msgtexts.append(s.getvalue())
262 # Now make sure the boundary we've selected doesn't appear in any of
263 # the message texts.
264 alltext = NL.join(msgtexts)
265 # BAW: What about boundaries that are wrapped in double-quotes?
266 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
267 # If we had to calculate a new boundary because the body text
268 # contained that string, set the new boundary. We don't do it
269 # unconditionally because, while set_boundary() preserves order, it
270 # doesn't preserve newlines/continuations in headers. This is no big
271 # deal in practice, but turns out to be inconvenient for the unittest
272 # suite.
273 if msg.get_boundary() <> boundary:
274 msg.set_boundary(boundary)
275 # Write out any preamble
276 if msg.preamble is not None:
277 self._fp.write(msg.preamble)
278 # First boundary is a bit different; it doesn't have a leading extra
279 # newline.
280 print >> self._fp, '--' + boundary
281 if isdigest:
282 print >> self._fp
283 # Join and write the individual parts
284 joiner = '\n--' + boundary + '\n'
285 if isdigest:
286 # multipart/digest types effectively add an extra newline between
287 # the boundary and the body part.
288 joiner += '\n'
289 self._fp.write(joiner.join(msgtexts))
290 print >> self._fp, '\n--' + boundary + '--',
291 # Write out any epilogue
292 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000293 if not msg.epilogue.startswith('\n'):
294 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000295 self._fp.write(msg.epilogue)
296
297 def _handle_multipart_digest(self, msg):
298 self._handle_multipart(msg, isdigest=1)
299
Barry Warsawb384e012001-09-26 05:32:41 +0000300 def _handle_message_delivery_status(self, msg):
301 # We can't just write the headers directly to self's file object
302 # because this will leave an extra newline between the last header
303 # block and the boundary. Sigh.
304 blocks = []
305 for part in msg.get_payload():
306 s = StringIO()
307 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000308 g.flatten(part, unixfrom=0)
Barry Warsawb384e012001-09-26 05:32:41 +0000309 text = s.getvalue()
310 lines = text.split('\n')
311 # Strip off the unnecessary trailing empty line
312 if lines and lines[-1] == '':
313 blocks.append(NL.join(lines[:-1]))
314 else:
315 blocks.append(text)
316 # Now join all the blocks with an empty line. This has the lovely
317 # effect of separating each block with an empty line, but not adding
318 # an extra one after the last one.
319 self._fp.write(NL.join(blocks))
320
321 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000322 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000323 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000324 # The payload of a message/rfc822 part should be a multipart sequence
325 # of length 1. The zeroth element of the list should be the Message
326 # object for the subpart.Extract that object, stringify it, and write
327 # that out.
328 g.flatten(msg.get_payload(0), unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000329 self._fp.write(s.getvalue())
330
331
Barry Warsawe968ead2001-10-04 17:05:11 +0000332
Barry Warsawba925802001-09-23 03:17:28 +0000333class DecodedGenerator(Generator):
334 """Generator a text representation of a message.
335
336 Like the Generator base class, except that non-text parts are substituted
337 with a format string representing the part.
338 """
339 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
340 """Like Generator.__init__() except that an additional optional
341 argument is allowed.
342
343 Walks through all subparts of a message. If the subpart is of main
344 type `text', then it prints the decoded payload of the subpart.
345
346 Otherwise, fmt is a format string that is used instead of the message
347 payload. fmt is expanded with the following keywords (in
348 %(keyword)s format):
349
350 type : Full MIME type of the non-text part
351 maintype : Main MIME type of the non-text part
352 subtype : Sub-MIME type of the non-text part
353 filename : Filename of the non-text part
354 description: Description associated with the non-text part
355 encoding : Content transfer encoding of the non-text part
356
357 The default value for fmt is None, meaning
358
359 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
360 """
361 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
362 if fmt is None:
363 fmt = ('[Non-text (%(type)s) part of message omitted, '
364 'filename %(filename)s]')
365 self._fmt = fmt
366
367 def _dispatch(self, msg):
368 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000369 maintype = part.get_main_type('text')
370 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000371 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000372 elif maintype == 'multipart':
373 # Just skip this
374 pass
Barry Warsawba925802001-09-23 03:17:28 +0000375 else:
376 print >> self, self._fmt % {
377 'type' : part.get_type('[no MIME type]'),
378 'maintype' : part.get_main_type('[no main MIME type]'),
379 'subtype' : part.get_subtype('[no sub-MIME type]'),
380 'filename' : part.get_filename('[no filename]'),
381 'description': part.get('Content-Description',
382 '[no description]'),
383 'encoding' : part.get('Content-Transfer-Encoding',
384 '[no encoding]'),
385 }
386
387
Barry Warsawe968ead2001-10-04 17:05:11 +0000388
Barry Warsawba925802001-09-23 03:17:28 +0000389# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000390def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000391 # Craft a random boundary. If text is given, ensure that the chosen
392 # boundary doesn't appear in the text.
393 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
394 if text is None:
395 return boundary
396 b = boundary
397 counter = 0
398 while 1:
399 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
400 if not cre.search(text):
401 break
402 b = boundary + '.' + str(counter)
403 counter += 1
404 return b