blob: d03b43f58c540ac8fd79fa7ec93f630ad84cd0c1 [file] [log] [blame]
Barry Warsawba925802001-09-23 03:17:28 +00001# Copyright (C) 2001 Python Software Foundation
2# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
7from __future__ import generators
8
9import re
10import base64
11import quopri
12from cStringIO import StringIO
13from types import ListType
14
Barry Warsawba925802001-09-23 03:17:28 +000015# Intrapackage imports
16import Errors
17import Utils
18
Barry Warsawbeb59452001-09-26 05:41:51 +000019SEMISPACE = '; '
20paramre = re.compile(r';\s*')
21
Barry Warsawba925802001-09-23 03:17:28 +000022
Tim Peters527e64f2001-10-04 05:36:56 +000023
Barry Warsawba925802001-09-23 03:17:28 +000024class Message:
25 """Basic message object for use inside the object tree.
26
27 A message object is defined as something that has a bunch of RFC 2822
28 headers and a payload. If the body of the message is a multipart, then
29 the payload is a list of Messages, otherwise it is a string.
30
31 These objects implement part of the `mapping' interface, which assumes
32 there is exactly one occurrance of the header per message. Some headers
33 do in fact appear multiple times (e.g. Received:) and for those headers,
34 you must use the explicit API to set or get all the headers. Not all of
35 the mapping methods are implemented.
36
37 """
38 def __init__(self):
39 self._headers = []
40 self._unixfrom = None
41 self._payload = None
42 # Defaults for multipart messages
43 self.preamble = self.epilogue = None
44
45 def __str__(self):
46 """Return the entire formatted message as a string.
47 This includes the headers, body, and `unixfrom' line.
48 """
49 return self.as_string(unixfrom=1)
50
51 def as_string(self, unixfrom=0):
52 """Return the entire formatted message as a string.
53 Optional `unixfrom' when true, means include the Unix From_ envelope
54 header.
55 """
56 from Generator import Generator
57 fp = StringIO()
58 g = Generator(fp)
59 g(self, unixfrom=unixfrom)
60 return fp.getvalue()
61
62 def is_multipart(self):
63 """Return true if the message consists of multiple parts."""
64 if type(self._payload) is ListType:
65 return 1
66 return 0
67
68 #
69 # Unix From_ line
70 #
71 def set_unixfrom(self, unixfrom):
72 self._unixfrom = unixfrom
73
74 def get_unixfrom(self):
75 return self._unixfrom
76
77 #
78 # Payload manipulation.
79 #
80 def add_payload(self, payload):
81 """Add the given payload to the current payload.
82
83 If the current payload is empty, then the current payload will be made
84 a scalar, set to the given value.
85 """
86 if self._payload is None:
87 self._payload = payload
88 elif type(self._payload) is ListType:
89 self._payload.append(payload)
90 elif self.get_main_type() not in (None, 'multipart'):
91 raise Errors.MultipartConversionError(
92 'Message main Content-Type: must be "multipart" or missing')
93 else:
94 self._payload = [self._payload, payload]
95
96 # A useful synonym
97 attach = add_payload
98
99 def get_payload(self, i=None, decode=0):
100 """Return the current payload exactly as is.
101
102 Optional i returns that index into the payload.
103
104 Optional decode is a flag indicating whether the payload should be
105 decoded or not, according to the Content-Transfer-Encoding: header.
106 When true and the message is not a multipart, the payload will be
107 decoded if this header's value is `quoted-printable' or `base64'. If
108 some other encoding is used, or the header is missing, the payload is
109 returned as-is (undecoded). If the message is a multipart and the
110 decode flag is true, then None is returned.
111 """
112 if i is None:
113 payload = self._payload
114 elif type(self._payload) is not ListType:
115 raise TypeError, i
116 else:
117 payload = self._payload[i]
118 if decode:
119 if self.is_multipart():
120 return None
121 cte = self.get('content-transfer-encoding', '')
122 if cte.lower() == 'quoted-printable':
123 return Utils._qdecode(payload)
124 elif cte.lower() == 'base64':
125 return Utils._bdecode(payload)
126 # Everything else, including encodings with 8bit or 7bit are returned
127 # unchanged.
128 return payload
129
130
131 def set_payload(self, payload):
132 """Set the payload to the given value."""
133 self._payload = payload
134
135 #
136 # MAPPING INTERFACE (partial)
137 #
138 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000139 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000140 return len(self._headers)
141
142 def __getitem__(self, name):
143 """Get a header value.
144
145 Return None if the header is missing instead of raising an exception.
146
147 Note that if the header appeared multiple times, exactly which
148 occurrance gets returned is undefined. Use getall() to get all
149 the values matching a header field name.
150 """
151 return self.get(name)
152
153 def __setitem__(self, name, val):
154 """Set the value of a header.
155
156 Note: this does not overwrite an existing header with the same field
157 name. Use __delitem__() first to delete any existing headers.
158 """
159 self._headers.append((name, val))
160
161 def __delitem__(self, name):
162 """Delete all occurrences of a header, if present.
163
164 Does not raise an exception if the header is missing.
165 """
166 name = name.lower()
167 newheaders = []
168 for k, v in self._headers:
169 if k.lower() <> name:
170 newheaders.append((k, v))
171 self._headers = newheaders
172
173 def __contains__(self, key):
174 return key.lower() in [k.lower() for k, v in self._headers]
175
176 def has_key(self, name):
177 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000178 missing = []
179 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000180
181 def keys(self):
182 """Return a list of all the message's header field names.
183
184 These will be sorted in the order they appeared in the original
185 message, and may contain duplicates. Any fields deleted and
186 re-inserted are always appended to the header list.
187 """
188 return [k for k, v in self._headers]
189
190 def values(self):
191 """Return a list of all the message's header values.
192
193 These will be sorted in the order they appeared in the original
194 message, and may contain duplicates. Any fields deleted and
195 re-inserted are alwyas appended to the header list.
196 """
197 return [v for k, v in self._headers]
198
199 def items(self):
200 """Get all the message's header fields and values.
201
202 These will be sorted in the order they appeared in the original
203 message, and may contain duplicates. Any fields deleted and
204 re-inserted are alwyas appended to the header list.
205 """
206 return self._headers[:]
207
208 def get(self, name, failobj=None):
209 """Get a header value.
210
211 Like __getitem__() but return failobj instead of None when the field
212 is missing.
213 """
214 name = name.lower()
215 for k, v in self._headers:
216 if k.lower() == name:
217 return v
218 return failobj
219
220 #
221 # Additional useful stuff
222 #
223
224 def get_all(self, name, failobj=None):
225 """Return a list of all the values for the named field.
226
227 These will be sorted in the order they appeared in the original
228 message, and may contain duplicates. Any fields deleted and
229 re-inserted are alwyas appended to the header list.
230 """
231 values = []
232 name = name.lower()
233 for k, v in self._headers:
234 if k.lower() == name:
235 values.append(v)
236 return values
237
238 def add_header(self, _name, _value, **_params):
239 """Extended header setting.
240
241 name is the header field to add. keyword arguments can be used to set
242 additional parameters for the header field, with underscores converted
243 to dashes. Normally the parameter will be added as key="value" unless
244 value is None, in which case only the key will be added.
245
246 Example:
247
248 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
249
250 """
251 parts = []
252 for k, v in _params.items():
253 if v is None:
254 parts.append(k.replace('_', '-'))
255 else:
256 parts.append('%s="%s"' % (k.replace('_', '-'), v))
257 if _value is not None:
258 parts.insert(0, _value)
259 self._headers.append((_name, SEMISPACE.join(parts)))
260
261 def get_type(self, failobj=None):
262 """Returns the message's content type.
263
264 The returned string is coerced to lowercase and returned as a single
265 string of the form `maintype/subtype'. If there was no Content-Type:
266 header in the message, failobj is returned (defaults to None).
267 """
268 missing = []
269 value = self.get('content-type', missing)
270 if value is missing:
271 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000272 return paramre.split(value)[0].lower()
Barry Warsawba925802001-09-23 03:17:28 +0000273
274 def get_main_type(self, failobj=None):
275 """Return the message's main content type if present."""
276 missing = []
277 ctype = self.get_type(missing)
278 if ctype is missing:
279 return failobj
280 parts = ctype.split('/')
281 if len(parts) > 0:
282 return ctype.split('/')[0]
283 return failobj
284
285 def get_subtype(self, failobj=None):
286 """Return the message's content subtype if present."""
287 missing = []
288 ctype = self.get_type(missing)
289 if ctype is missing:
290 return failobj
291 parts = ctype.split('/')
292 if len(parts) > 1:
293 return ctype.split('/')[1]
294 return failobj
295
Barry Warsawbeb59452001-09-26 05:41:51 +0000296 def _get_params_preserve(self, failobj, header):
297 # Like get_params() but preserves the quoting of values. BAW:
298 # should this be part of the public interface?
299 missing = []
300 value = self.get(header, missing)
301 if value is missing:
302 return failobj
303 params = []
304 for p in paramre.split(value):
305 try:
306 name, val = p.split('=', 1)
307 except ValueError:
308 # Must have been a bare attribute
309 name = p
310 val = ''
311 params.append((name, val))
312 return params
313
Barry Warsawba925802001-09-23 03:17:28 +0000314 def get_params(self, failobj=None, header='content-type'):
315 """Return the message's Content-Type: parameters, as a list.
316
Barry Warsawbeb59452001-09-26 05:41:51 +0000317 The elements of the returned list are 2-tuples of key/value pairs, as
318 split on the `=' sign. The left hand side of the `=' is the key,
319 while the right hand side is the value. If there is no `=' sign in
320 the parameter the value is the empty string. The value is always
321 unquoted.
322
Barry Warsawba925802001-09-23 03:17:28 +0000323 Optional failobj is the object to return if there is no Content-Type:
324 header. Optional header is the header to search instead of
325 Content-Type:
326 """
327 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000328 params = self._get_params_preserve(missing, header)
329 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000330 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000331 return [(k, Utils.unquote(v)) for k, v in params]
Barry Warsawba925802001-09-23 03:17:28 +0000332
333 def get_param(self, param, failobj=None, header='content-type'):
334 """Return the parameter value if found in the Content-Type: header.
335
336 Optional failobj is the object to return if there is no Content-Type:
337 header. Optional header is the header to search instead of
338 Content-Type:
Barry Warsawbeb59452001-09-26 05:41:51 +0000339
340 Parameter keys are always compared case insensitively. Values are
341 always unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000342 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000343 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000344 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000345 for k, v in self._get_params_preserve(failobj, header):
346 if k.lower() == param.lower():
347 return Utils.unquote(v)
Barry Warsawba925802001-09-23 03:17:28 +0000348 return failobj
349
350 def get_filename(self, failobj=None):
351 """Return the filename associated with the payload if present.
352
353 The filename is extracted from the Content-Disposition: header's
354 `filename' parameter, and it is unquoted.
355 """
356 missing = []
357 filename = self.get_param('filename', missing, 'content-disposition')
358 if filename is missing:
359 return failobj
360 return Utils.unquote(filename.strip())
361
362 def get_boundary(self, failobj=None):
363 """Return the boundary associated with the payload if present.
364
365 The boundary is extracted from the Content-Type: header's `boundary'
366 parameter, and it is unquoted.
367 """
368 missing = []
369 boundary = self.get_param('boundary', missing)
370 if boundary is missing:
371 return failobj
372 return Utils.unquote(boundary.strip())
373
374 def set_boundary(self, boundary):
375 """Set the boundary parameter in Content-Type: to 'boundary'.
376
377 This is subtly different than deleting the Content-Type: header and
378 adding a new one with a new boundary parameter via add_header(). The
379 main difference is that using the set_boundary() method preserves the
380 order of the Content-Type: header in the original message.
381
382 HeaderParseError is raised if the message has no Content-Type: header.
383 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000384 missing = []
385 params = self._get_params_preserve(missing, 'content-type')
386 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000387 # There was no Content-Type: header, and we don't know what type
388 # to set it to, so raise an exception.
389 raise Errors.HeaderParseError, 'No Content-Type: header found'
390 newparams = []
391 foundp = 0
Barry Warsawbeb59452001-09-26 05:41:51 +0000392 for pk, pv in params:
393 if pk.lower() == 'boundary':
394 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000395 foundp = 1
396 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000397 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000398 if not foundp:
399 # The original Content-Type: header had no boundary attribute.
400 # Tack one one the end. BAW: should we raise an exception
401 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000402 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000403 # Replace the existing Content-Type: header with the new value
404 newheaders = []
405 for h, v in self._headers:
406 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000407 parts = []
408 for k, v in newparams:
409 if v == '':
410 parts.append(k)
411 else:
412 parts.append('%s=%s' % (k, v))
413 newheaders.append((h, SEMISPACE.join(parts)))
414
Barry Warsawba925802001-09-23 03:17:28 +0000415 else:
416 newheaders.append((h, v))
417 self._headers = newheaders
418
419 def walk(self):
420 """Walk over the message tree, yielding each subpart.
421
422 The walk is performed in breadth-first order. This method is a
423 generator.
424 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000425 yield self
Barry Warsawba925802001-09-23 03:17:28 +0000426 if self.is_multipart():
427 for subpart in self.get_payload():
428 for subsubpart in subpart.walk():
429 yield subsubpart
Barry Warsawba925802001-09-23 03:17:28 +0000430
431 def get_charsets(self, failobj=None):
432 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000433
Barry Warsawba925802001-09-23 03:17:28 +0000434 The returned list of items describes the Content-Type: headers'
435 charset parameter for this message and all the subparts in its
436 payload.
437
438 Each item will either be a string (the value of the charset parameter
439 in the Content-Type: header of that part) or the value of the
440 'failobj' parameter (defaults to None), if the part does not have a
441 main MIME type of "text", or the charset is not defined.
442
443 The list will contain one string for each part of the message, plus
444 one for the container message (i.e. self), so that a non-multipart
445 message will still return a list of length 1.
446 """
447 return [part.get_param('charset', failobj) for part in self.walk()]