blob: dbbcabc3fd9a7f97d1ecbe14cd4763825f047b9a [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
Barry Warsawd1eeecb2001-10-17 20:51:42 +000014EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000015SEMISPACE = '; '
16BAR = '|'
17UNDERSCORE = '_'
18NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000019NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000020SEMINLTAB = ';\n\t'
21SPACE8 = ' ' * 8
22
23fcre = re.compile(r'^From ', re.MULTILINE)
24
25
Barry Warsawe968ead2001-10-04 17:05:11 +000026
Barry Warsawba925802001-09-23 03:17:28 +000027class Generator:
28 """Generates output from a Message object tree.
29
30 This basic generator writes the message to the given file object as plain
31 text.
32 """
33 #
34 # Public interface
35 #
36
37 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
38 """Create the generator for message flattening.
39
40 outfp is the output file-like object for writing the message to. It
41 must have a write() method.
42
43 Optional mangle_from_ is a flag that, when true, escapes From_ lines
44 in the body of the message by putting a `>' in front of them.
45
46 Optional maxheaderlen specifies the longest length for a non-continued
47 header. When a header line is longer (in characters, with tabs
48 expanded to 8 spaces), than maxheaderlen, the header will be broken on
49 semicolons and continued as per RFC 2822. If no semicolon is found,
50 then the header is left alone. Set to zero to disable wrapping
51 headers. Default is 78, as recommended (but not required by RFC
52 2822.
53 """
54 self._fp = outfp
55 self._mangle_from_ = mangle_from_
56 self.__first = 1
57 self.__maxheaderlen = maxheaderlen
58
59 def write(self, s):
60 # Just delegate to the file object
61 self._fp.write(s)
62
63 def __call__(self, msg, unixfrom=0):
64 """Print the message object tree rooted at msg to the output file
65 specified when the Generator instance was created.
66
67 unixfrom is a flag that forces the printing of a Unix From_ delimiter
68 before the first object in the message tree. If the original message
69 has no From_ delimiter, a `standard' one is crafted. By default, this
70 is 0 to inhibit the printing of any From_ delimiter.
71
72 Note that for subobjects, no From_ line is printed.
73 """
74 if unixfrom:
75 ufrom = msg.get_unixfrom()
76 if not ufrom:
77 ufrom = 'From nobody ' + time.ctime(time.time())
78 print >> self._fp, ufrom
79 self._write(msg)
80
81 #
82 # Protected interface - undocumented ;/
83 #
84
85 def _write(self, msg):
86 # We can't write the headers yet because of the following scenario:
87 # say a multipart message includes the boundary string somewhere in
88 # its body. We'd have to calculate the new boundary /before/ we write
89 # the headers so that we can write the correct Content-Type:
90 # parameter.
91 #
92 # The way we do this, so as to make the _handle_*() methods simpler,
93 # is to cache any subpart writes into a StringIO. The we write the
94 # headers and the StringIO contents. That way, subpart handlers can
95 # Do The Right Thing, and can still modify the Content-Type: header if
96 # necessary.
97 oldfp = self._fp
98 try:
99 self._fp = sfp = StringIO()
100 self._dispatch(msg)
101 finally:
102 self._fp = oldfp
103 # Write the headers. First we see if the message object wants to
104 # handle that itself. If not, we'll do it generically.
105 meth = getattr(msg, '_write_headers', None)
106 if meth is None:
107 self._write_headers(msg)
108 else:
109 meth(self)
110 self._fp.write(sfp.getvalue())
111
112 def _dispatch(self, msg):
113 # Get the Content-Type: for the message, then try to dispatch to
114 # self._handle_maintype_subtype(). If there's no handler for the full
115 # MIME type, then dispatch to self._handle_maintype(). If that's
116 # missing too, then dispatch to self._writeBody().
117 ctype = msg.get_type()
118 if ctype is None:
119 # No Content-Type: header so try the default handler
120 self._writeBody(msg)
121 else:
122 # We do have a Content-Type: header.
123 specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
124 meth = getattr(self, '_handle_' + specific, None)
125 if meth is None:
126 generic = msg.get_main_type().replace('-', '_')
127 meth = getattr(self, '_handle_' + generic, None)
128 if meth is None:
129 meth = self._writeBody
130 meth(msg)
131
132 #
133 # Default handlers
134 #
135
136 def _write_headers(self, msg):
137 for h, v in msg.items():
138 # We only write the MIME-Version: header for the outermost
139 # container message. Unfortunately, we can't use same technique
140 # as for the Unix-From above because we don't know when
141 # MIME-Version: will occur.
142 if h.lower() == 'mime-version' and not self.__first:
143 continue
144 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
145 # characters wide, so we're well within our rights to split long
146 # headers.
147 text = '%s: %s' % (h, v)
148 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
149 text = self._split_header(text)
150 print >> self._fp, text
151 # A blank line always separates headers from body
152 print >> self._fp
153
154 def _split_header(self, text):
155 maxheaderlen = self.__maxheaderlen
156 # Find out whether any lines in the header are really longer than
157 # maxheaderlen characters wide. There could be continuation lines
158 # that actually shorten it. Also, replace hard tabs with 8 spaces.
159 lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
160 for line in lines:
161 if len(line) > maxheaderlen:
162 break
163 else:
164 # No line was actually longer than maxheaderlen characters, so
165 # just return the original unchanged.
166 return text
167 rtn = []
168 for line in text.split('\n'):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000169 splitline = []
Barry Warsawba925802001-09-23 03:17:28 +0000170 # Short lines can remain unchanged
171 if len(line.replace('\t', SPACE8)) <= maxheaderlen:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000172 splitline.append(line)
173 rtn.append(SEMINLTAB.join(splitline))
Barry Warsawba925802001-09-23 03:17:28 +0000174 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000175 oldlen = len(line)
Barry Warsawba925802001-09-23 03:17:28 +0000176 # Try to break the line on semicolons, but if that doesn't
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000177 # work, try to split on folding whitespace.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000178 while len(line) > maxheaderlen:
179 i = line.rfind(';', 0, maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000180 if i < 0:
Barry Warsawba925802001-09-23 03:17:28 +0000181 break
Barry Warsaw409a4c02002-04-10 21:01:31 +0000182 splitline.append(line[:i])
183 line = line[i+1:].lstrip()
184 if len(line) <> oldlen:
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000185 # Splitting on semis worked
Barry Warsaw409a4c02002-04-10 21:01:31 +0000186 splitline.append(line)
187 rtn.append(SEMINLTAB.join(splitline))
188 continue
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000189 # Splitting on semis didn't help, so try to split on
190 # whitespace.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000191 parts = re.split(r'(\s+)', line)
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000192 # Watch out though for "Header: longnonsplittableline"
193 if parts[0].endswith(':') and len(parts) == 3:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000194 rtn.append(line)
195 continue
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000196 first = parts.pop(0)
197 sublines = [first]
198 acc = len(first)
199 while parts:
200 len0 = len(parts[0])
201 len1 = len(parts[1])
202 if acc + len0 + len1 < maxheaderlen:
203 sublines.append(parts.pop(0))
204 sublines.append(parts.pop(0))
205 acc += len0 + len1
206 else:
207 # Split it here, but don't forget to ignore the
208 # next whitespace-only part
Barry Warsaw409a4c02002-04-10 21:01:31 +0000209 splitline.append(EMPTYSTRING.join(sublines))
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000210 del parts[0]
211 first = parts.pop(0)
212 sublines = [first]
213 acc = len(first)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000214 splitline.append(EMPTYSTRING.join(sublines))
215 rtn.append(NLTAB.join(splitline))
216 return NL.join(rtn)
Barry Warsawba925802001-09-23 03:17:28 +0000217
218 #
219 # Handlers for writing types and subtypes
220 #
221
222 def _handle_text(self, msg):
223 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000224 if payload is None:
225 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000226 cset = msg.get_charset()
227 if cset is not None:
228 payload = cset.body_encode(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000229 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000230 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000231 if self._mangle_from_:
232 payload = fcre.sub('>From ', payload)
233 self._fp.write(payload)
234
235 # Default body handler
236 _writeBody = _handle_text
237
238 def _handle_multipart(self, msg, isdigest=0):
239 # The trick here is to write out each part separately, merge them all
240 # together, and then make sure that the boundary we've chosen isn't
241 # present in the payload.
242 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000243 subparts = msg.get_payload()
244 if subparts is None:
245 # Nothing has every been attached
246 boundary = msg.get_boundary(failobj=_make_boundary())
247 print >> self._fp, '--' + boundary
248 print >> self._fp, '\n'
249 print >> self._fp, '--' + boundary + '--'
250 return
251 elif not isinstance(subparts, ListType):
252 # Scalar payload
253 subparts = [subparts]
254 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000255 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000256 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000257 g(part, unixfrom=0)
258 msgtexts.append(s.getvalue())
259 # Now make sure the boundary we've selected doesn't appear in any of
260 # the message texts.
261 alltext = NL.join(msgtexts)
262 # BAW: What about boundaries that are wrapped in double-quotes?
263 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
264 # If we had to calculate a new boundary because the body text
265 # contained that string, set the new boundary. We don't do it
266 # unconditionally because, while set_boundary() preserves order, it
267 # doesn't preserve newlines/continuations in headers. This is no big
268 # deal in practice, but turns out to be inconvenient for the unittest
269 # suite.
270 if msg.get_boundary() <> boundary:
271 msg.set_boundary(boundary)
272 # Write out any preamble
273 if msg.preamble is not None:
274 self._fp.write(msg.preamble)
275 # First boundary is a bit different; it doesn't have a leading extra
276 # newline.
277 print >> self._fp, '--' + boundary
278 if isdigest:
279 print >> self._fp
280 # Join and write the individual parts
281 joiner = '\n--' + boundary + '\n'
282 if isdigest:
283 # multipart/digest types effectively add an extra newline between
284 # the boundary and the body part.
285 joiner += '\n'
286 self._fp.write(joiner.join(msgtexts))
287 print >> self._fp, '\n--' + boundary + '--',
288 # Write out any epilogue
289 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000290 if not msg.epilogue.startswith('\n'):
291 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000292 self._fp.write(msg.epilogue)
293
294 def _handle_multipart_digest(self, msg):
295 self._handle_multipart(msg, isdigest=1)
296
Barry Warsawb384e012001-09-26 05:32:41 +0000297 def _handle_message_delivery_status(self, msg):
298 # We can't just write the headers directly to self's file object
299 # because this will leave an extra newline between the last header
300 # block and the boundary. Sigh.
301 blocks = []
302 for part in msg.get_payload():
303 s = StringIO()
304 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
305 g(part, unixfrom=0)
306 text = s.getvalue()
307 lines = text.split('\n')
308 # Strip off the unnecessary trailing empty line
309 if lines and lines[-1] == '':
310 blocks.append(NL.join(lines[:-1]))
311 else:
312 blocks.append(text)
313 # Now join all the blocks with an empty line. This has the lovely
314 # effect of separating each block with an empty line, but not adding
315 # an extra one after the last one.
316 self._fp.write(NL.join(blocks))
317
318 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000319 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000320 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000321 # A message/rfc822 should contain a scalar payload which is another
322 # Message object. Extract that object, stringify it, and write that
323 # out.
324 g(msg.get_payload(), unixfrom=0)
325 self._fp.write(s.getvalue())
326
327
Barry Warsawe968ead2001-10-04 17:05:11 +0000328
Barry Warsawba925802001-09-23 03:17:28 +0000329class DecodedGenerator(Generator):
330 """Generator a text representation of a message.
331
332 Like the Generator base class, except that non-text parts are substituted
333 with a format string representing the part.
334 """
335 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
336 """Like Generator.__init__() except that an additional optional
337 argument is allowed.
338
339 Walks through all subparts of a message. If the subpart is of main
340 type `text', then it prints the decoded payload of the subpart.
341
342 Otherwise, fmt is a format string that is used instead of the message
343 payload. fmt is expanded with the following keywords (in
344 %(keyword)s format):
345
346 type : Full MIME type of the non-text part
347 maintype : Main MIME type of the non-text part
348 subtype : Sub-MIME type of the non-text part
349 filename : Filename of the non-text part
350 description: Description associated with the non-text part
351 encoding : Content transfer encoding of the non-text part
352
353 The default value for fmt is None, meaning
354
355 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
356 """
357 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
358 if fmt is None:
359 fmt = ('[Non-text (%(type)s) part of message omitted, '
360 'filename %(filename)s]')
361 self._fmt = fmt
362
363 def _dispatch(self, msg):
364 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000365 maintype = part.get_main_type('text')
366 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000367 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000368 elif maintype == 'multipart':
369 # Just skip this
370 pass
Barry Warsawba925802001-09-23 03:17:28 +0000371 else:
372 print >> self, self._fmt % {
373 'type' : part.get_type('[no MIME type]'),
374 'maintype' : part.get_main_type('[no main MIME type]'),
375 'subtype' : part.get_subtype('[no sub-MIME type]'),
376 'filename' : part.get_filename('[no filename]'),
377 'description': part.get('Content-Description',
378 '[no description]'),
379 'encoding' : part.get('Content-Transfer-Encoding',
380 '[no encoding]'),
381 }
382
383
Barry Warsawe968ead2001-10-04 17:05:11 +0000384
Barry Warsawba925802001-09-23 03:17:28 +0000385# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000386def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000387 # Craft a random boundary. If text is given, ensure that the chosen
388 # boundary doesn't appear in the text.
389 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
390 if text is None:
391 return boundary
392 b = boundary
393 counter = 0
394 while 1:
395 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
396 if not cre.search(text):
397 break
398 b = boundary + '.' + str(counter)
399 counter += 1
400 return b