blob: 0f788613be35002af792cedc8052b5ace5041ca3 [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Classes to generate plain text from a message object tree.
5"""
6
7import time
8import re
9import random
10
11from types import ListType, StringType
12from cStringIO import StringIO
13
Barry Warsaw062749a2002-06-28 23:41:42 +000014from email.Header import Header
15
Barry Warsawd1eeecb2001-10-17 20:51:42 +000016EMPTYSTRING = ''
Barry Warsawba925802001-09-23 03:17:28 +000017SEMISPACE = '; '
18BAR = '|'
19UNDERSCORE = '_'
20NL = '\n'
Barry Warsawd1eeecb2001-10-17 20:51:42 +000021NLTAB = '\n\t'
Barry Warsawba925802001-09-23 03:17:28 +000022SEMINLTAB = ';\n\t'
23SPACE8 = ' ' * 8
24
25fcre = re.compile(r'^From ', re.MULTILINE)
26
27
Barry Warsawe968ead2001-10-04 17:05:11 +000028
Barry Warsawba925802001-09-23 03:17:28 +000029class Generator:
30 """Generates output from a Message object tree.
31
32 This basic generator writes the message to the given file object as plain
33 text.
34 """
35 #
36 # Public interface
37 #
38
39 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
40 """Create the generator for message flattening.
41
42 outfp is the output file-like object for writing the message to. It
43 must have a write() method.
44
45 Optional mangle_from_ is a flag that, when true, escapes From_ lines
46 in the body of the message by putting a `>' in front of them.
47
48 Optional maxheaderlen specifies the longest length for a non-continued
49 header. When a header line is longer (in characters, with tabs
50 expanded to 8 spaces), than maxheaderlen, the header will be broken on
51 semicolons and continued as per RFC 2822. If no semicolon is found,
52 then the header is left alone. Set to zero to disable wrapping
53 headers. Default is 78, as recommended (but not required by RFC
54 2822.
55 """
56 self._fp = outfp
57 self._mangle_from_ = mangle_from_
58 self.__first = 1
59 self.__maxheaderlen = maxheaderlen
60
61 def write(self, s):
62 # Just delegate to the file object
63 self._fp.write(s)
64
Barry Warsaw7dc865a2002-06-02 19:02:37 +000065 def flatten(self, msg, unixfrom=0):
Barry Warsawba925802001-09-23 03:17:28 +000066 """Print the message object tree rooted at msg to the output file
67 specified when the Generator instance was created.
68
69 unixfrom is a flag that forces the printing of a Unix From_ delimiter
70 before the first object in the message tree. If the original message
71 has no From_ delimiter, a `standard' one is crafted. By default, this
72 is 0 to inhibit the printing of any From_ delimiter.
73
74 Note that for subobjects, no From_ line is printed.
75 """
76 if unixfrom:
77 ufrom = msg.get_unixfrom()
78 if not ufrom:
79 ufrom = 'From nobody ' + time.ctime(time.time())
80 print >> self._fp, ufrom
81 self._write(msg)
82
Barry Warsaw7dc865a2002-06-02 19:02:37 +000083 # For backwards compatibility, but this is slower
84 __call__ = flatten
85
Barry Warsawba925802001-09-23 03:17:28 +000086 #
87 # Protected interface - undocumented ;/
88 #
89
90 def _write(self, msg):
91 # We can't write the headers yet because of the following scenario:
92 # say a multipart message includes the boundary string somewhere in
93 # its body. We'd have to calculate the new boundary /before/ we write
94 # the headers so that we can write the correct Content-Type:
95 # parameter.
96 #
97 # The way we do this, so as to make the _handle_*() methods simpler,
98 # is to cache any subpart writes into a StringIO. The we write the
99 # headers and the StringIO contents. That way, subpart handlers can
100 # Do The Right Thing, and can still modify the Content-Type: header if
101 # necessary.
102 oldfp = self._fp
103 try:
104 self._fp = sfp = StringIO()
105 self._dispatch(msg)
106 finally:
107 self._fp = oldfp
108 # Write the headers. First we see if the message object wants to
109 # handle that itself. If not, we'll do it generically.
110 meth = getattr(msg, '_write_headers', None)
111 if meth is None:
112 self._write_headers(msg)
113 else:
114 meth(self)
115 self._fp.write(sfp.getvalue())
116
117 def _dispatch(self, msg):
118 # Get the Content-Type: for the message, then try to dispatch to
119 # self._handle_maintype_subtype(). If there's no handler for the full
120 # MIME type, then dispatch to self._handle_maintype(). If that's
121 # missing too, then dispatch to self._writeBody().
122 ctype = msg.get_type()
123 if ctype is None:
124 # No Content-Type: header so try the default handler
125 self._writeBody(msg)
126 else:
127 # We do have a Content-Type: header.
128 specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
129 meth = getattr(self, '_handle_' + specific, None)
130 if meth is None:
131 generic = msg.get_main_type().replace('-', '_')
132 meth = getattr(self, '_handle_' + generic, None)
133 if meth is None:
134 meth = self._writeBody
135 meth(msg)
136
137 #
138 # Default handlers
139 #
140
141 def _write_headers(self, msg):
142 for h, v in msg.items():
143 # We only write the MIME-Version: header for the outermost
144 # container message. Unfortunately, we can't use same technique
145 # as for the Unix-From above because we don't know when
146 # MIME-Version: will occur.
147 if h.lower() == 'mime-version' and not self.__first:
148 continue
149 # RFC 2822 says that lines SHOULD be no more than maxheaderlen
150 # characters wide, so we're well within our rights to split long
151 # headers.
152 text = '%s: %s' % (h, v)
153 if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
Barry Warsaw062749a2002-06-28 23:41:42 +0000154 text = self._split_header(h, text)
Barry Warsawba925802001-09-23 03:17:28 +0000155 print >> self._fp, text
156 # A blank line always separates headers from body
157 print >> self._fp
158
Barry Warsaw062749a2002-06-28 23:41:42 +0000159 def _split_header(self, name, text):
Barry Warsawba925802001-09-23 03:17:28 +0000160 maxheaderlen = self.__maxheaderlen
161 # Find out whether any lines in the header are really longer than
162 # maxheaderlen characters wide. There could be continuation lines
163 # that actually shorten it. Also, replace hard tabs with 8 spaces.
Barry Warsaw062749a2002-06-28 23:41:42 +0000164 lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
Barry Warsawba925802001-09-23 03:17:28 +0000165 for line in lines:
166 if len(line) > maxheaderlen:
167 break
168 else:
169 # No line was actually longer than maxheaderlen characters, so
170 # just return the original unchanged.
171 return text
Barry Warsaw062749a2002-06-28 23:41:42 +0000172 # The `text' argument already has the field name prepended, so don't
173 # provide it here or the first line will get folded too short.
174 h = Header(text, maxlinelen=maxheaderlen,
175 # For backwards compatibility, we use a hard tab here
176 continuation_ws='\t')
177 return h.encode()
Barry Warsawba925802001-09-23 03:17:28 +0000178
179 #
180 # Handlers for writing types and subtypes
181 #
182
183 def _handle_text(self, msg):
184 payload = msg.get_payload()
Barry Warsawb384e012001-09-26 05:32:41 +0000185 if payload is None:
186 return
Barry Warsaw409a4c02002-04-10 21:01:31 +0000187 cset = msg.get_charset()
188 if cset is not None:
189 payload = cset.body_encode(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000190 if not isinstance(payload, StringType):
Barry Warsawb384e012001-09-26 05:32:41 +0000191 raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000192 if self._mangle_from_:
193 payload = fcre.sub('>From ', payload)
194 self._fp.write(payload)
195
196 # Default body handler
197 _writeBody = _handle_text
198
199 def _handle_multipart(self, msg, isdigest=0):
200 # The trick here is to write out each part separately, merge them all
201 # together, and then make sure that the boundary we've chosen isn't
202 # present in the payload.
203 msgtexts = []
Barry Warsaw409a4c02002-04-10 21:01:31 +0000204 subparts = msg.get_payload()
205 if subparts is None:
206 # Nothing has every been attached
207 boundary = msg.get_boundary(failobj=_make_boundary())
208 print >> self._fp, '--' + boundary
209 print >> self._fp, '\n'
210 print >> self._fp, '--' + boundary + '--'
211 return
212 elif not isinstance(subparts, ListType):
213 # Scalar payload
214 subparts = [subparts]
215 for part in subparts:
Barry Warsawba925802001-09-23 03:17:28 +0000216 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000217 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000218 g.flatten(part, unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000219 msgtexts.append(s.getvalue())
220 # Now make sure the boundary we've selected doesn't appear in any of
221 # the message texts.
222 alltext = NL.join(msgtexts)
223 # BAW: What about boundaries that are wrapped in double-quotes?
224 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
225 # If we had to calculate a new boundary because the body text
226 # contained that string, set the new boundary. We don't do it
227 # unconditionally because, while set_boundary() preserves order, it
228 # doesn't preserve newlines/continuations in headers. This is no big
229 # deal in practice, but turns out to be inconvenient for the unittest
230 # suite.
231 if msg.get_boundary() <> boundary:
232 msg.set_boundary(boundary)
233 # Write out any preamble
234 if msg.preamble is not None:
235 self._fp.write(msg.preamble)
236 # First boundary is a bit different; it doesn't have a leading extra
237 # newline.
238 print >> self._fp, '--' + boundary
239 if isdigest:
240 print >> self._fp
241 # Join and write the individual parts
242 joiner = '\n--' + boundary + '\n'
243 if isdigest:
244 # multipart/digest types effectively add an extra newline between
245 # the boundary and the body part.
246 joiner += '\n'
247 self._fp.write(joiner.join(msgtexts))
248 print >> self._fp, '\n--' + boundary + '--',
249 # Write out any epilogue
250 if msg.epilogue is not None:
Barry Warsaw856c32b2001-10-19 04:06:39 +0000251 if not msg.epilogue.startswith('\n'):
252 print >> self._fp
Barry Warsawba925802001-09-23 03:17:28 +0000253 self._fp.write(msg.epilogue)
254
255 def _handle_multipart_digest(self, msg):
256 self._handle_multipart(msg, isdigest=1)
257
Barry Warsawb384e012001-09-26 05:32:41 +0000258 def _handle_message_delivery_status(self, msg):
259 # We can't just write the headers directly to self's file object
260 # because this will leave an extra newline between the last header
261 # block and the boundary. Sigh.
262 blocks = []
263 for part in msg.get_payload():
264 s = StringIO()
265 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000266 g.flatten(part, unixfrom=0)
Barry Warsawb384e012001-09-26 05:32:41 +0000267 text = s.getvalue()
268 lines = text.split('\n')
269 # Strip off the unnecessary trailing empty line
270 if lines and lines[-1] == '':
271 blocks.append(NL.join(lines[:-1]))
272 else:
273 blocks.append(text)
274 # Now join all the blocks with an empty line. This has the lovely
275 # effect of separating each block with an empty line, but not adding
276 # an extra one after the last one.
277 self._fp.write(NL.join(blocks))
278
279 def _handle_message(self, msg):
Barry Warsawba925802001-09-23 03:17:28 +0000280 s = StringIO()
Barry Warsawb384e012001-09-26 05:32:41 +0000281 g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw7dc865a2002-06-02 19:02:37 +0000282 # The payload of a message/rfc822 part should be a multipart sequence
283 # of length 1. The zeroth element of the list should be the Message
284 # object for the subpart.Extract that object, stringify it, and write
285 # that out.
286 g.flatten(msg.get_payload(0), unixfrom=0)
Barry Warsawba925802001-09-23 03:17:28 +0000287 self._fp.write(s.getvalue())
288
289
Barry Warsawe968ead2001-10-04 17:05:11 +0000290
Barry Warsawba925802001-09-23 03:17:28 +0000291class DecodedGenerator(Generator):
292 """Generator a text representation of a message.
293
294 Like the Generator base class, except that non-text parts are substituted
295 with a format string representing the part.
296 """
297 def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
298 """Like Generator.__init__() except that an additional optional
299 argument is allowed.
300
301 Walks through all subparts of a message. If the subpart is of main
302 type `text', then it prints the decoded payload of the subpart.
303
304 Otherwise, fmt is a format string that is used instead of the message
305 payload. fmt is expanded with the following keywords (in
306 %(keyword)s format):
307
308 type : Full MIME type of the non-text part
309 maintype : Main MIME type of the non-text part
310 subtype : Sub-MIME type of the non-text part
311 filename : Filename of the non-text part
312 description: Description associated with the non-text part
313 encoding : Content transfer encoding of the non-text part
314
315 The default value for fmt is None, meaning
316
317 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
318 """
319 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
320 if fmt is None:
321 fmt = ('[Non-text (%(type)s) part of message omitted, '
322 'filename %(filename)s]')
323 self._fmt = fmt
324
325 def _dispatch(self, msg):
326 for part in msg.walk():
Barry Warsawb384e012001-09-26 05:32:41 +0000327 maintype = part.get_main_type('text')
328 if maintype == 'text':
Barry Warsawba925802001-09-23 03:17:28 +0000329 print >> self, part.get_payload(decode=1)
Barry Warsawb384e012001-09-26 05:32:41 +0000330 elif maintype == 'multipart':
331 # Just skip this
332 pass
Barry Warsawba925802001-09-23 03:17:28 +0000333 else:
334 print >> self, self._fmt % {
335 'type' : part.get_type('[no MIME type]'),
336 'maintype' : part.get_main_type('[no main MIME type]'),
337 'subtype' : part.get_subtype('[no sub-MIME type]'),
338 'filename' : part.get_filename('[no filename]'),
339 'description': part.get('Content-Description',
340 '[no description]'),
341 'encoding' : part.get('Content-Transfer-Encoding',
342 '[no encoding]'),
343 }
344
345
Barry Warsawe968ead2001-10-04 17:05:11 +0000346
Barry Warsawba925802001-09-23 03:17:28 +0000347# Helper
Barry Warsaw409a4c02002-04-10 21:01:31 +0000348def _make_boundary(text=None):
Barry Warsawba925802001-09-23 03:17:28 +0000349 # Craft a random boundary. If text is given, ensure that the chosen
350 # boundary doesn't appear in the text.
351 boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
352 if text is None:
353 return boundary
354 b = boundary
355 counter = 0
356 while 1:
357 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
358 if not cre.search(text):
359 break
360 b = boundary + '.' + str(counter)
361 counter += 1
362 return b