blob: 981e0ffd04855ada5d93c19ccb9afb98e799ec2a [file] [log] [blame]
Barry Warsawba925802001-09-23 03:17:28 +00001# Copyright (C) 2001 Python Software Foundation
2# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
Barry Warsawd1eeecb2001-10-17 20:51:42 +000014EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000015SEMISPACE = '; '
16BAR = '|'
17UNDERSCORE = '_'
18NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000019NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000020SEMINLTAB = ';\n\t'
21SPACE8 = ' ' * 8
22
23fcre = re.compile(r'^From ', re.MULTILINE)
24
25
Barry Warsawe968ead2001-10-04 17:05:11 +000026
Barry Warsawba925802001-09-23 03:17:28 +000027class Generator:
28 """Generates output from a Message object tree.
29
30 This basic generator writes the message to the given file object as plain
31 text.
32 """
33 #
34 # Public interface
35 #
36
37 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
38 """Create the generator for message flattening.
39
40 outfp is the output file-like object for writing the message to. It
41 must have a write() method.
42
43 Optional mangle_from_ is a flag that, when true, escapes From_ lines
44 in the body of the message by putting a `>' in front of them.
45
46 Optional maxheaderlen specifies the longest length for a non-continued
47 header. When a header line is longer (in characters, with tabs
48 expanded to 8 spaces), than maxheaderlen, the header will be broken on
49 semicolons and continued as per RFC 2822. If no semicolon is found,
50 then the header is left alone. Set to zero to disable wrapping
51 headers. Default is 78, as recommended (but not required by RFC
52 2822.
53 """
54 self._fp = outfp
55 self._mangle_from_ = mangle_from_
56 self.__first = 1
57 self.__maxheaderlen = maxheaderlen
58
59 def write(self, s):
60 # Just delegate to the file object
61 self._fp.write(s)
62
63 def __call__(self, msg, unixfrom=0):
64 """Print the message object tree rooted at msg to the output file
65 specified when the Generator instance was created.
66
67 unixfrom is a flag that forces the printing of a Unix From_ delimiter
68 before the first object in the message tree. If the original message
69 has no From_ delimiter, a `standard' one is crafted. By default, this
70 is 0 to inhibit the printing of any From_ delimiter.
71
72 Note that for subobjects, no From_ line is printed.
73 """
74 if unixfrom:
75 ufrom = msg.get_unixfrom()
76 if not ufrom:
77 ufrom = 'From nobody ' + time.ctime(time.time())
78 print >> self._fp, ufrom
79 self._write(msg)
80
81 #
82 # Protected interface - undocumented ;/
83 #
84
85 def _write(self, msg):
86 # We can't write the headers yet because of the following scenario:
87 # say a multipart message includes the boundary string somewhere in
88 # its body. We'd have to calculate the new boundary /before/ we write
89 # the headers so that we can write the correct Content-Type:
90 # parameter.
91 #
92 # The way we do this, so as to make the _handle_*() methods simpler,
93 # is to cache any subpart writes into a StringIO. The we write the
94 # headers and the StringIO contents. That way, subpart handlers can
95 # Do The Right Thing, and can still modify the Content-Type: header if
96 # necessary.
97 oldfp = self._fp
98 try:
99 self._fp = sfp = StringIO()
100 self._dispatch(msg)
101 finally:
102 self._fp = oldfp
103 # Write the headers. First we see if the message object wants to
104 # handle that itself. If not, we'll do it generically.
105 meth = getattr(msg, '_write_headers', None)
106 if meth is None:
107 self._write_headers(msg)
108 else:
109 meth(self)
110 self._fp.write(sfp.getvalue())
111
112 def _dispatch(self, msg):
113 # Get the Content-Type: for the message, then try to dispatch to
114 # self._handle_maintype_subtype(). If there's no handler for the full
115 # MIME type, then dispatch to self._handle_maintype(). If that's
116 # missing too, then dispatch to self._writeBody().
117 ctype = msg.get_type()
118 if ctype is None:
119 # No Content-Type: header so try the default handler
120 self._writeBody(msg)
121 else:
122 # We do have a Content-Type: header.
123 specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
124 meth = getattr(self, '_handle_' + specific, None)
125 if meth is None:
126 generic = msg.get_main_type().replace('-', '_')
127 meth = getattr(self, '_handle_' + generic, None)
128 if meth is None:
129 meth = self._writeBody
130 meth(msg)
131
132 #
133 # Default handlers
134 #
135
136 def _write_headers(self, msg):
137 for h, v in msg.items():
138 # We only write the MIME-Version: header for the outermost
139 # container message. Unfortunately, we can't use same technique
140 # as for the Unix-From above because we don't know when
141 # MIME-Version: will occur.
142 if h.lower() == 'mime-version' and not self.__first:
143 continue
144 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
145 # characters wide, so we're well within our rights to split long
146 # headers.
147 text = '%s: %s' % (h, v)
148 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
149 text = self._split_header(text)
150 print >> self._fp, text
151 # A blank line always separates headers from body
152 print >> self._fp
153
154 def _split_header(self, text):
155 maxheaderlen = self.__maxheaderlen
156 # Find out whether any lines in the header are really longer than
157 # maxheaderlen characters wide. There could be continuation lines
158 # that actually shorten it. Also, replace hard tabs with 8 spaces.
159 lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
160 for line in lines:
161 if len(line) > maxheaderlen:
162 break
163 else:
164 # No line was actually longer than maxheaderlen characters, so
165 # just return the original unchanged.
166 return text
167 rtn = []
168 for line in text.split('\n'):
169 # Short lines can remain unchanged
170 if len(line.replace('\t', SPACE8)) <= maxheaderlen:
171 rtn.append(line)
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000172 SEMINLTAB.join(rtn)
Barry Warsawba925802001-09-23 03:17:28 +0000173 else:
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000174 oldlen = len(text)
Barry Warsawba925802001-09-23 03:17:28 +0000175 # Try to break the line on semicolons, but if that doesn't
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000176 # work, try to split on folding whitespace.
Barry Warsawba925802001-09-23 03:17:28 +0000177 while len(text) > maxheaderlen:
178 i = text.rfind(';', 0, maxheaderlen)
179 if i < 0:
Barry Warsawba925802001-09-23 03:17:28 +0000180 break
181 rtn.append(text[:i])
182 text = text[i+1:].lstrip()
Barry Warsawd1eeecb2001-10-17 20:51:42 +0000183 if len(text) <> oldlen:
184 # Splitting on semis worked
185 rtn.append(text)
186 return SEMINLTAB.join(rtn)
187 # Splitting on semis didn't help, so try to split on
188 # whitespace.
189 parts = re.split(r'(\s+)', text)
190 # Watch out though for "Header: longnonsplittableline"
191 if parts[0].endswith(':') and len(parts) == 3:
192 return text
193 first = parts.pop(0)
194 sublines = [first]
195 acc = len(first)
196 while parts:
197 len0 = len(parts[0])
198 len1 = len(parts[1])
199 if acc + len0 + len1 < maxheaderlen:
200 sublines.append(parts.pop(0))
201 sublines.append(parts.pop(0))
202 acc += len0 + len1
203 else:
204 # Split it here, but don't forget to ignore the
205 # next whitespace-only part
206 rtn.append(EMPTYSTRING.join(sublines))
207 del parts[0]
208 first = parts.pop(0)
209 sublines = [first]
210 acc = len(first)
211 rtn.append(EMPTYSTRING.join(sublines))
212 return NLTAB.join(rtn)
Barry Warsawba925802001-09-23 03:17:28 +0000213
214 #
215 # Handlers for writing types and subtypes
216 #
217
218 def _handle_text(self, msg):
219 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000220 if payload is None:
221 return
Barry Warsawba925802001-09-23 03:17:28 +0000222 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000223 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000224 if self._mangle_from_:
225 payload = fcre.sub('>From ', payload)
226 self._fp.write(payload)
227
228 # Default body handler
229 _writeBody = _handle_text
230
231 def _handle_multipart(self, msg, isdigest=0):
232 # The trick here is to write out each part separately, merge them all
233 # together, and then make sure that the boundary we've chosen isn't
234 # present in the payload.
235 msgtexts = []
236 for part in msg.get_payload():
237 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000238 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000239 g(part, unixfrom=0)
240 msgtexts.append(s.getvalue())
241 # Now make sure the boundary we've selected doesn't appear in any of
242 # the message texts.
243 alltext = NL.join(msgtexts)
244 # BAW: What about boundaries that are wrapped in double-quotes?
245 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
246 # If we had to calculate a new boundary because the body text
247 # contained that string, set the new boundary. We don't do it
248 # unconditionally because, while set_boundary() preserves order, it
249 # doesn't preserve newlines/continuations in headers. This is no big
250 # deal in practice, but turns out to be inconvenient for the unittest
251 # suite.
252 if msg.get_boundary() <> boundary:
253 msg.set_boundary(boundary)
254 # Write out any preamble
255 if msg.preamble is not None:
256 self._fp.write(msg.preamble)
257 # First boundary is a bit different; it doesn't have a leading extra
258 # newline.
259 print >> self._fp, '--' + boundary
260 if isdigest:
261 print >> self._fp
262 # Join and write the individual parts
263 joiner = '\n--' + boundary + '\n'
264 if isdigest:
265 # multipart/digest types effectively add an extra newline between
266 # the boundary and the body part.
267 joiner += '\n'
268 self._fp.write(joiner.join(msgtexts))
269 print >> self._fp, '\n--' + boundary + '--',
270 # Write out any epilogue
271 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000272 if not msg.epilogue.startswith('\n'):
273 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000274 self._fp.write(msg.epilogue)
275
276 def _handle_multipart_digest(self, msg):
277 self._handle_multipart(msg, isdigest=1)
278
Barry Warsawb384e012001-09-26 05:32:41 +0000279 def _handle_message_delivery_status(self, msg):
280 # We can't just write the headers directly to self's file object
281 # because this will leave an extra newline between the last header
282 # block and the boundary. Sigh.
283 blocks = []
284 for part in msg.get_payload():
285 s = StringIO()
286 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
287 g(part, unixfrom=0)
288 text = s.getvalue()
289 lines = text.split('\n')
290 # Strip off the unnecessary trailing empty line
291 if lines and lines[-1] == '':
292 blocks.append(NL.join(lines[:-1]))
293 else:
294 blocks.append(text)
295 # Now join all the blocks with an empty line. This has the lovely
296 # effect of separating each block with an empty line, but not adding
297 # an extra one after the last one.
298 self._fp.write(NL.join(blocks))
299
300 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000301 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000302 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000303 # A message/rfc822 should contain a scalar payload which is another
304 # Message object. Extract that object, stringify it, and write that
305 # out.
306 g(msg.get_payload(), unixfrom=0)
307 self._fp.write(s.getvalue())
308
309
Barry Warsawe968ead2001-10-04 17:05:11 +0000310
Barry Warsawba925802001-09-23 03:17:28 +0000311class DecodedGenerator(Generator):
312 """Generator a text representation of a message.
313
314 Like the Generator base class, except that non-text parts are substituted
315 with a format string representing the part.
316 """
317 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
318 """Like Generator.__init__() except that an additional optional
319 argument is allowed.
320
321 Walks through all subparts of a message. If the subpart is of main
322 type `text', then it prints the decoded payload of the subpart.
323
324 Otherwise, fmt is a format string that is used instead of the message
325 payload. fmt is expanded with the following keywords (in
326 %(keyword)s format):
327
328 type : Full MIME type of the non-text part
329 maintype : Main MIME type of the non-text part
330 subtype : Sub-MIME type of the non-text part
331 filename : Filename of the non-text part
332 description: Description associated with the non-text part
333 encoding : Content transfer encoding of the non-text part
334
335 The default value for fmt is None, meaning
336
337 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
338 """
339 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
340 if fmt is None:
341 fmt = ('[Non-text (%(type)s) part of message omitted, '
342 'filename %(filename)s]')
343 self._fmt = fmt
344
345 def _dispatch(self, msg):
346 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000347 maintype = part.get_main_type('text')
348 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000349 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000350 elif maintype == 'multipart':
351 # Just skip this
352 pass
Barry Warsawba925802001-09-23 03:17:28 +0000353 else:
354 print >> self, self._fmt % {
355 'type' : part.get_type('[no MIME type]'),
356 'maintype' : part.get_main_type('[no main MIME type]'),
357 'subtype' : part.get_subtype('[no sub-MIME type]'),
358 'filename' : part.get_filename('[no filename]'),
359 'description': part.get('Content-Description',
360 '[no description]'),
361 'encoding' : part.get('Content-Transfer-Encoding',
362 '[no encoding]'),
363 }
364
365
Barry Warsawe968ead2001-10-04 17:05:11 +0000366
Barry Warsawba925802001-09-23 03:17:28 +0000367# Helper
368def _make_boundary(self, text=None):
369 # Craft a random boundary. If text is given, ensure that the chosen
370 # boundary doesn't appear in the text.
371 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
372 if text is None:
373 return boundary
374 b = boundary
375 counter = 0
376 while 1:
377 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
378 if not cre.search(text):
379 break
380 b = boundary + '.' + str(counter)
381 counter += 1
382 return b