blob: c31dc803ec7de694a7ead0af14b34fe138f9b374 [file] [log] [blame]
Barry Warsawba925802001-09-23 03:17:28 +00001# Copyright (C) 2001 Python Software Foundation
2# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
14# Intrapackage imports
15import Message
16import Errors
17
18SEMISPACE = '; '
19BAR = '|'
20UNDERSCORE = '_'
21NL = '\n'
22SEMINLTAB = ';\n\t'
23SPACE8 = ' ' * 8
24
25fcre = re.compile(r'^From ', re.MULTILINE)
26
27
28
29class Generator:
30 """Generates output from a Message object tree.
31
32 This basic generator writes the message to the given file object as plain
33 text.
34 """
35 #
36 # Public interface
37 #
38
39 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
40 """Create the generator for message flattening.
41
42 outfp is the output file-like object for writing the message to. It
43 must have a write() method.
44
45 Optional mangle_from_ is a flag that, when true, escapes From_ lines
46 in the body of the message by putting a `>' in front of them.
47
48 Optional maxheaderlen specifies the longest length for a non-continued
49 header. When a header line is longer (in characters, with tabs
50 expanded to 8 spaces), than maxheaderlen, the header will be broken on
51 semicolons and continued as per RFC 2822. If no semicolon is found,
52 then the header is left alone. Set to zero to disable wrapping
53 headers. Default is 78, as recommended (but not required by RFC
54 2822.
55 """
56 self._fp = outfp
57 self._mangle_from_ = mangle_from_
58 self.__first = 1
59 self.__maxheaderlen = maxheaderlen
60
61 def write(self, s):
62 # Just delegate to the file object
63 self._fp.write(s)
64
65 def __call__(self, msg, unixfrom=0):
66 """Print the message object tree rooted at msg to the output file
67 specified when the Generator instance was created.
68
69 unixfrom is a flag that forces the printing of a Unix From_ delimiter
70 before the first object in the message tree. If the original message
71 has no From_ delimiter, a `standard' one is crafted. By default, this
72 is 0 to inhibit the printing of any From_ delimiter.
73
74 Note that for subobjects, no From_ line is printed.
75 """
76 if unixfrom:
77 ufrom = msg.get_unixfrom()
78 if not ufrom:
79 ufrom = 'From nobody ' + time.ctime(time.time())
80 print >> self._fp, ufrom
81 self._write(msg)
82
83 #
84 # Protected interface - undocumented ;/
85 #
86
87 def _write(self, msg):
88 # We can't write the headers yet because of the following scenario:
89 # say a multipart message includes the boundary string somewhere in
90 # its body. We'd have to calculate the new boundary /before/ we write
91 # the headers so that we can write the correct Content-Type:
92 # parameter.
93 #
94 # The way we do this, so as to make the _handle_*() methods simpler,
95 # is to cache any subpart writes into a StringIO. The we write the
96 # headers and the StringIO contents. That way, subpart handlers can
97 # Do The Right Thing, and can still modify the Content-Type: header if
98 # necessary.
99 oldfp = self._fp
100 try:
101 self._fp = sfp = StringIO()
102 self._dispatch(msg)
103 finally:
104 self._fp = oldfp
105 # Write the headers. First we see if the message object wants to
106 # handle that itself. If not, we'll do it generically.
107 meth = getattr(msg, '_write_headers', None)
108 if meth is None:
109 self._write_headers(msg)
110 else:
111 meth(self)
112 self._fp.write(sfp.getvalue())
113
114 def _dispatch(self, msg):
115 # Get the Content-Type: for the message, then try to dispatch to
116 # self._handle_maintype_subtype(). If there's no handler for the full
117 # MIME type, then dispatch to self._handle_maintype(). If that's
118 # missing too, then dispatch to self._writeBody().
119 ctype = msg.get_type()
120 if ctype is None:
121 # No Content-Type: header so try the default handler
122 self._writeBody(msg)
123 else:
124 # We do have a Content-Type: header.
125 specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
126 meth = getattr(self, '_handle_' + specific, None)
127 if meth is None:
128 generic = msg.get_main_type().replace('-', '_')
129 meth = getattr(self, '_handle_' + generic, None)
130 if meth is None:
131 meth = self._writeBody
132 meth(msg)
133
134 #
135 # Default handlers
136 #
137
138 def _write_headers(self, msg):
139 for h, v in msg.items():
140 # We only write the MIME-Version: header for the outermost
141 # container message. Unfortunately, we can't use same technique
142 # as for the Unix-From above because we don't know when
143 # MIME-Version: will occur.
144 if h.lower() == 'mime-version' and not self.__first:
145 continue
146 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
147 # characters wide, so we're well within our rights to split long
148 # headers.
149 text = '%s: %s' % (h, v)
150 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
151 text = self._split_header(text)
152 print >> self._fp, text
153 # A blank line always separates headers from body
154 print >> self._fp
155
156 def _split_header(self, text):
157 maxheaderlen = self.__maxheaderlen
158 # Find out whether any lines in the header are really longer than
159 # maxheaderlen characters wide. There could be continuation lines
160 # that actually shorten it. Also, replace hard tabs with 8 spaces.
161 lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
162 for line in lines:
163 if len(line) > maxheaderlen:
164 break
165 else:
166 # No line was actually longer than maxheaderlen characters, so
167 # just return the original unchanged.
168 return text
169 rtn = []
170 for line in text.split('\n'):
171 # Short lines can remain unchanged
172 if len(line.replace('\t', SPACE8)) <= maxheaderlen:
173 rtn.append(line)
174 else:
175 # Try to break the line on semicolons, but if that doesn't
176 # work, then just leave it alone.
177 while len(text) > maxheaderlen:
178 i = text.rfind(';', 0, maxheaderlen)
179 if i < 0:
180 rtn.append(text)
181 break
182 rtn.append(text[:i])
183 text = text[i+1:].lstrip()
184 rtn.append(text)
185 return SEMINLTAB.join(rtn)
186
187 #
188 # Handlers for writing types and subtypes
189 #
190
191 def _handle_text(self, msg):
192 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000193 if payload is None:
194 return
Barry Warsawba925802001-09-23 03:17:28 +0000195 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000196 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000197 if self._mangle_from_:
198 payload = fcre.sub('>From ', payload)
199 self._fp.write(payload)
200
201 # Default body handler
202 _writeBody = _handle_text
203
204 def _handle_multipart(self, msg, isdigest=0):
205 # The trick here is to write out each part separately, merge them all
206 # together, and then make sure that the boundary we've chosen isn't
207 # present in the payload.
208 msgtexts = []
209 for part in msg.get_payload():
210 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000211 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000212 g(part, unixfrom=0)
213 msgtexts.append(s.getvalue())
214 # Now make sure the boundary we've selected doesn't appear in any of
215 # the message texts.
216 alltext = NL.join(msgtexts)
217 # BAW: What about boundaries that are wrapped in double-quotes?
218 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
219 # If we had to calculate a new boundary because the body text
220 # contained that string, set the new boundary. We don't do it
221 # unconditionally because, while set_boundary() preserves order, it
222 # doesn't preserve newlines/continuations in headers. This is no big
223 # deal in practice, but turns out to be inconvenient for the unittest
224 # suite.
225 if msg.get_boundary() <> boundary:
226 msg.set_boundary(boundary)
227 # Write out any preamble
228 if msg.preamble is not None:
229 self._fp.write(msg.preamble)
230 # First boundary is a bit different; it doesn't have a leading extra
231 # newline.
232 print >> self._fp, '--' + boundary
233 if isdigest:
234 print >> self._fp
235 # Join and write the individual parts
236 joiner = '\n--' + boundary + '\n'
237 if isdigest:
238 # multipart/digest types effectively add an extra newline between
239 # the boundary and the body part.
240 joiner += '\n'
241 self._fp.write(joiner.join(msgtexts))
242 print >> self._fp, '\n--' + boundary + '--',
243 # Write out any epilogue
244 if msg.epilogue is not None:
245 self._fp.write(msg.epilogue)
246
247 def _handle_multipart_digest(self, msg):
248 self._handle_multipart(msg, isdigest=1)
249
Barry Warsawb384e012001-09-26 05:32:41 +0000250 def _handle_message_delivery_status(self, msg):
251 # We can't just write the headers directly to self's file object
252 # because this will leave an extra newline between the last header
253 # block and the boundary. Sigh.
254 blocks = []
255 for part in msg.get_payload():
256 s = StringIO()
257 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
258 g(part, unixfrom=0)
259 text = s.getvalue()
260 lines = text.split('\n')
261 # Strip off the unnecessary trailing empty line
262 if lines and lines[-1] == '':
263 blocks.append(NL.join(lines[:-1]))
264 else:
265 blocks.append(text)
266 # Now join all the blocks with an empty line. This has the lovely
267 # effect of separating each block with an empty line, but not adding
268 # an extra one after the last one.
269 self._fp.write(NL.join(blocks))
270
271 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000272 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000273 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsawba925802001-09-23 03:17:28 +0000274 # A message/rfc822 should contain a scalar payload which is another
275 # Message object. Extract that object, stringify it, and write that
276 # out.
277 g(msg.get_payload(), unixfrom=0)
278 self._fp.write(s.getvalue())
279
280
281
282class DecodedGenerator(Generator):
283 """Generator a text representation of a message.
284
285 Like the Generator base class, except that non-text parts are substituted
286 with a format string representing the part.
287 """
288 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
289 """Like Generator.__init__() except that an additional optional
290 argument is allowed.
291
292 Walks through all subparts of a message. If the subpart is of main
293 type `text', then it prints the decoded payload of the subpart.
294
295 Otherwise, fmt is a format string that is used instead of the message
296 payload. fmt is expanded with the following keywords (in
297 %(keyword)s format):
298
299 type : Full MIME type of the non-text part
300 maintype : Main MIME type of the non-text part
301 subtype : Sub-MIME type of the non-text part
302 filename : Filename of the non-text part
303 description: Description associated with the non-text part
304 encoding : Content transfer encoding of the non-text part
305
306 The default value for fmt is None, meaning
307
308 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
309 """
310 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
311 if fmt is None:
312 fmt = ('[Non-text (%(type)s) part of message omitted, '
313 'filename %(filename)s]')
314 self._fmt = fmt
315
316 def _dispatch(self, msg):
317 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000318 maintype = part.get_main_type('text')
319 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000320 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000321 elif maintype == 'multipart':
322 # Just skip this
323 pass
Barry Warsawba925802001-09-23 03:17:28 +0000324 else:
325 print >> self, self._fmt % {
326 'type' : part.get_type('[no MIME type]'),
327 'maintype' : part.get_main_type('[no main MIME type]'),
328 'subtype' : part.get_subtype('[no sub-MIME type]'),
329 'filename' : part.get_filename('[no filename]'),
330 'description': part.get('Content-Description',
331 '[no description]'),
332 'encoding' : part.get('Content-Transfer-Encoding',
333 '[no encoding]'),
334 }
335
336
337
338# Helper
339def _make_boundary(self, text=None):
340 # Craft a random boundary. If text is given, ensure that the chosen
341 # boundary doesn't appear in the text.
342 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
343 if text is None:
344 return boundary
345 b = boundary
346 counter = 0
347 while 1:
348 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
349 if not cre.search(text):
350 break
351 b = boundary + '.' + str(counter)
352 counter += 1
353 return b