blob: 91931a11e2dae6d3326e9b6dea96d321fbd54c7f [file] [log] [blame]
Barry Warsawba925802001-09-23 03:17:28 +00001# Copyright (C) 2001 Python Software Foundation
2# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
7from __future__ import generators
8
9import re
10import base64
11import quopri
12from cStringIO import StringIO
13from types import ListType
14
Barry Warsawba925802001-09-23 03:17:28 +000015# Intrapackage imports
16import Errors
17import Utils
18
Barry Warsawbeb59452001-09-26 05:41:51 +000019SEMISPACE = '; '
Barry Warsaw2539cf52001-10-25 22:43:46 +000020paramre = re.compile(r'\s*;\s*')
Barry Warsawbeb59452001-09-26 05:41:51 +000021
Barry Warsawba925802001-09-23 03:17:28 +000022
Barry Warsawe968ead2001-10-04 17:05:11 +000023
Barry Warsawba925802001-09-23 03:17:28 +000024class Message:
25 """Basic message object for use inside the object tree.
26
27 A message object is defined as something that has a bunch of RFC 2822
28 headers and a payload. If the body of the message is a multipart, then
29 the payload is a list of Messages, otherwise it is a string.
30
31 These objects implement part of the `mapping' interface, which assumes
32 there is exactly one occurrance of the header per message. Some headers
33 do in fact appear multiple times (e.g. Received:) and for those headers,
34 you must use the explicit API to set or get all the headers. Not all of
35 the mapping methods are implemented.
36
37 """
38 def __init__(self):
39 self._headers = []
40 self._unixfrom = None
41 self._payload = None
42 # Defaults for multipart messages
43 self.preamble = self.epilogue = None
44
45 def __str__(self):
46 """Return the entire formatted message as a string.
47 This includes the headers, body, and `unixfrom' line.
48 """
49 return self.as_string(unixfrom=1)
50
51 def as_string(self, unixfrom=0):
52 """Return the entire formatted message as a string.
53 Optional `unixfrom' when true, means include the Unix From_ envelope
54 header.
55 """
56 from Generator import Generator
57 fp = StringIO()
58 g = Generator(fp)
59 g(self, unixfrom=unixfrom)
60 return fp.getvalue()
61
62 def is_multipart(self):
63 """Return true if the message consists of multiple parts."""
64 if type(self._payload) is ListType:
65 return 1
66 return 0
67
68 #
69 # Unix From_ line
70 #
71 def set_unixfrom(self, unixfrom):
72 self._unixfrom = unixfrom
73
74 def get_unixfrom(self):
75 return self._unixfrom
76
77 #
78 # Payload manipulation.
79 #
80 def add_payload(self, payload):
81 """Add the given payload to the current payload.
82
83 If the current payload is empty, then the current payload will be made
84 a scalar, set to the given value.
85 """
86 if self._payload is None:
87 self._payload = payload
88 elif type(self._payload) is ListType:
89 self._payload.append(payload)
90 elif self.get_main_type() not in (None, 'multipart'):
91 raise Errors.MultipartConversionError(
92 'Message main Content-Type: must be "multipart" or missing')
93 else:
94 self._payload = [self._payload, payload]
95
96 # A useful synonym
97 attach = add_payload
98
99 def get_payload(self, i=None, decode=0):
100 """Return the current payload exactly as is.
101
102 Optional i returns that index into the payload.
103
104 Optional decode is a flag indicating whether the payload should be
105 decoded or not, according to the Content-Transfer-Encoding: header.
106 When true and the message is not a multipart, the payload will be
107 decoded if this header's value is `quoted-printable' or `base64'. If
108 some other encoding is used, or the header is missing, the payload is
109 returned as-is (undecoded). If the message is a multipart and the
110 decode flag is true, then None is returned.
111 """
112 if i is None:
113 payload = self._payload
114 elif type(self._payload) is not ListType:
115 raise TypeError, i
116 else:
117 payload = self._payload[i]
118 if decode:
119 if self.is_multipart():
120 return None
121 cte = self.get('content-transfer-encoding', '')
122 if cte.lower() == 'quoted-printable':
123 return Utils._qdecode(payload)
124 elif cte.lower() == 'base64':
125 return Utils._bdecode(payload)
126 # Everything else, including encodings with 8bit or 7bit are returned
127 # unchanged.
128 return payload
129
130
131 def set_payload(self, payload):
132 """Set the payload to the given value."""
133 self._payload = payload
134
135 #
136 # MAPPING INTERFACE (partial)
137 #
138 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000139 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000140 return len(self._headers)
141
142 def __getitem__(self, name):
143 """Get a header value.
144
145 Return None if the header is missing instead of raising an exception.
146
147 Note that if the header appeared multiple times, exactly which
148 occurrance gets returned is undefined. Use getall() to get all
149 the values matching a header field name.
150 """
151 return self.get(name)
152
153 def __setitem__(self, name, val):
154 """Set the value of a header.
155
156 Note: this does not overwrite an existing header with the same field
157 name. Use __delitem__() first to delete any existing headers.
158 """
159 self._headers.append((name, val))
160
161 def __delitem__(self, name):
162 """Delete all occurrences of a header, if present.
163
164 Does not raise an exception if the header is missing.
165 """
166 name = name.lower()
167 newheaders = []
168 for k, v in self._headers:
169 if k.lower() <> name:
170 newheaders.append((k, v))
171 self._headers = newheaders
172
173 def __contains__(self, key):
174 return key.lower() in [k.lower() for k, v in self._headers]
175
176 def has_key(self, name):
177 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000178 missing = []
179 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000180
181 def keys(self):
182 """Return a list of all the message's header field names.
183
184 These will be sorted in the order they appeared in the original
185 message, and may contain duplicates. Any fields deleted and
186 re-inserted are always appended to the header list.
187 """
188 return [k for k, v in self._headers]
189
190 def values(self):
191 """Return a list of all the message's header values.
192
193 These will be sorted in the order they appeared in the original
194 message, and may contain duplicates. Any fields deleted and
Barry Warsawbf7c52c2001-11-24 16:56:56 +0000195 re-inserted are always appended to the header list.
Barry Warsawba925802001-09-23 03:17:28 +0000196 """
197 return [v for k, v in self._headers]
198
199 def items(self):
200 """Get all the message's header fields and values.
201
202 These will be sorted in the order they appeared in the original
203 message, and may contain duplicates. Any fields deleted and
Barry Warsawbf7c52c2001-11-24 16:56:56 +0000204 re-inserted are always appended to the header list.
Barry Warsawba925802001-09-23 03:17:28 +0000205 """
206 return self._headers[:]
207
208 def get(self, name, failobj=None):
209 """Get a header value.
210
211 Like __getitem__() but return failobj instead of None when the field
212 is missing.
213 """
214 name = name.lower()
215 for k, v in self._headers:
216 if k.lower() == name:
217 return v
218 return failobj
219
220 #
221 # Additional useful stuff
222 #
223
224 def get_all(self, name, failobj=None):
225 """Return a list of all the values for the named field.
226
227 These will be sorted in the order they appeared in the original
228 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000229 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000230
231 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000232 """
233 values = []
234 name = name.lower()
235 for k, v in self._headers:
236 if k.lower() == name:
237 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000238 if not values:
239 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000240 return values
241
242 def add_header(self, _name, _value, **_params):
243 """Extended header setting.
244
245 name is the header field to add. keyword arguments can be used to set
246 additional parameters for the header field, with underscores converted
247 to dashes. Normally the parameter will be added as key="value" unless
248 value is None, in which case only the key will be added.
249
250 Example:
251
252 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
253
254 """
255 parts = []
256 for k, v in _params.items():
257 if v is None:
258 parts.append(k.replace('_', '-'))
259 else:
260 parts.append('%s="%s"' % (k.replace('_', '-'), v))
261 if _value is not None:
262 parts.insert(0, _value)
263 self._headers.append((_name, SEMISPACE.join(parts)))
264
265 def get_type(self, failobj=None):
266 """Returns the message's content type.
267
268 The returned string is coerced to lowercase and returned as a single
269 string of the form `maintype/subtype'. If there was no Content-Type:
270 header in the message, failobj is returned (defaults to None).
271 """
272 missing = []
273 value = self.get('content-type', missing)
274 if value is missing:
275 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000276 return paramre.split(value)[0].lower()
Barry Warsawba925802001-09-23 03:17:28 +0000277
278 def get_main_type(self, failobj=None):
279 """Return the message's main content type if present."""
280 missing = []
281 ctype = self.get_type(missing)
282 if ctype is missing:
283 return failobj
284 parts = ctype.split('/')
285 if len(parts) > 0:
286 return ctype.split('/')[0]
287 return failobj
288
289 def get_subtype(self, failobj=None):
290 """Return the message's content subtype if present."""
291 missing = []
292 ctype = self.get_type(missing)
293 if ctype is missing:
294 return failobj
295 parts = ctype.split('/')
296 if len(parts) > 1:
297 return ctype.split('/')[1]
298 return failobj
299
Barry Warsawbeb59452001-09-26 05:41:51 +0000300 def _get_params_preserve(self, failobj, header):
301 # Like get_params() but preserves the quoting of values. BAW:
302 # should this be part of the public interface?
303 missing = []
304 value = self.get(header, missing)
305 if value is missing:
306 return failobj
307 params = []
308 for p in paramre.split(value):
309 try:
310 name, val = p.split('=', 1)
311 except ValueError:
312 # Must have been a bare attribute
313 name = p
314 val = ''
315 params.append((name, val))
316 return params
317
Barry Warsawba925802001-09-23 03:17:28 +0000318 def get_params(self, failobj=None, header='content-type'):
319 """Return the message's Content-Type: parameters, as a list.
320
Barry Warsawbeb59452001-09-26 05:41:51 +0000321 The elements of the returned list are 2-tuples of key/value pairs, as
322 split on the `=' sign. The left hand side of the `=' is the key,
323 while the right hand side is the value. If there is no `=' sign in
324 the parameter the value is the empty string. The value is always
325 unquoted.
326
Barry Warsawba925802001-09-23 03:17:28 +0000327 Optional failobj is the object to return if there is no Content-Type:
328 header. Optional header is the header to search instead of
329 Content-Type:
330 """
331 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000332 params = self._get_params_preserve(missing, header)
333 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000334 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000335 return [(k, Utils.unquote(v)) for k, v in params]
Barry Warsawba925802001-09-23 03:17:28 +0000336
337 def get_param(self, param, failobj=None, header='content-type'):
338 """Return the parameter value if found in the Content-Type: header.
339
340 Optional failobj is the object to return if there is no Content-Type:
341 header. Optional header is the header to search instead of
342 Content-Type:
Barry Warsawbeb59452001-09-26 05:41:51 +0000343
344 Parameter keys are always compared case insensitively. Values are
345 always unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000346 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000347 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000348 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000349 for k, v in self._get_params_preserve(failobj, header):
350 if k.lower() == param.lower():
351 return Utils.unquote(v)
Barry Warsawba925802001-09-23 03:17:28 +0000352 return failobj
353
354 def get_filename(self, failobj=None):
355 """Return the filename associated with the payload if present.
356
357 The filename is extracted from the Content-Disposition: header's
358 `filename' parameter, and it is unquoted.
359 """
360 missing = []
361 filename = self.get_param('filename', missing, 'content-disposition')
362 if filename is missing:
363 return failobj
364 return Utils.unquote(filename.strip())
365
366 def get_boundary(self, failobj=None):
367 """Return the boundary associated with the payload if present.
368
369 The boundary is extracted from the Content-Type: header's `boundary'
370 parameter, and it is unquoted.
371 """
372 missing = []
373 boundary = self.get_param('boundary', missing)
374 if boundary is missing:
375 return failobj
376 return Utils.unquote(boundary.strip())
377
378 def set_boundary(self, boundary):
379 """Set the boundary parameter in Content-Type: to 'boundary'.
380
381 This is subtly different than deleting the Content-Type: header and
382 adding a new one with a new boundary parameter via add_header(). The
383 main difference is that using the set_boundary() method preserves the
384 order of the Content-Type: header in the original message.
385
386 HeaderParseError is raised if the message has no Content-Type: header.
387 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000388 missing = []
389 params = self._get_params_preserve(missing, 'content-type')
390 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000391 # There was no Content-Type: header, and we don't know what type
392 # to set it to, so raise an exception.
393 raise Errors.HeaderParseError, 'No Content-Type: header found'
394 newparams = []
395 foundp = 0
Barry Warsawbeb59452001-09-26 05:41:51 +0000396 for pk, pv in params:
397 if pk.lower() == 'boundary':
398 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000399 foundp = 1
400 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000401 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000402 if not foundp:
403 # The original Content-Type: header had no boundary attribute.
404 # Tack one one the end. BAW: should we raise an exception
405 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000406 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000407 # Replace the existing Content-Type: header with the new value
408 newheaders = []
409 for h, v in self._headers:
410 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000411 parts = []
412 for k, v in newparams:
413 if v == '':
414 parts.append(k)
415 else:
416 parts.append('%s=%s' % (k, v))
417 newheaders.append((h, SEMISPACE.join(parts)))
418
Barry Warsawba925802001-09-23 03:17:28 +0000419 else:
420 newheaders.append((h, v))
421 self._headers = newheaders
422
423 def walk(self):
424 """Walk over the message tree, yielding each subpart.
425
Barry Warsaw2a9e3852001-11-05 19:19:55 +0000426 The walk is performed in depth-first order. This method is a
Barry Warsawba925802001-09-23 03:17:28 +0000427 generator.
428 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000429 yield self
Barry Warsawba925802001-09-23 03:17:28 +0000430 if self.is_multipart():
431 for subpart in self.get_payload():
432 for subsubpart in subpart.walk():
433 yield subsubpart
Barry Warsawba925802001-09-23 03:17:28 +0000434
435 def get_charsets(self, failobj=None):
436 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000437
Barry Warsawba925802001-09-23 03:17:28 +0000438 The returned list of items describes the Content-Type: headers'
439 charset parameter for this message and all the subparts in its
440 payload.
441
442 Each item will either be a string (the value of the charset parameter
443 in the Content-Type: header of that part) or the value of the
444 'failobj' parameter (defaults to None), if the part does not have a
445 main MIME type of "text", or the charset is not defined.
446
447 The list will contain one string for each part of the message, plus
448 one for the container message (i.e. self), so that a non-multipart
449 message will still return a list of length 1.
450 """
451 return [part.get_param('charset', failobj) for part in self.walk()]