blob: d4c10dff0cb717cb22d8a93d9286416bdf1e74dc [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw08898492003-03-11 04:33:30 +00008import uu
Barry Warsaw21191d32003-03-10 16:13:14 +00009import binascii
Barry Warsaw409a4c02002-04-10 21:01:31 +000010import warnings
Barry Warsawba925802001-09-23 03:17:28 +000011from cStringIO import StringIO
Barry Warsaw908dc4b2002-06-29 05:56:15 +000012from types import ListType, TupleType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000013
Barry Warsawba925802001-09-23 03:17:28 +000014# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000015from email import Utils
Barry Warsaw21191d32003-03-10 16:13:14 +000016from email import Errors
Barry Warsaw8ba76e82002-06-02 19:05:51 +000017from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000018
Barry Warsawbeb59452001-09-26 05:41:51 +000019SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000020
Barry Warsawc4945492002-09-28 20:40:25 +000021try:
22 True, False
23except NameError:
24 True = 1
25 False = 0
26
Barry Warsaw409a4c02002-04-10 21:01:31 +000027# Regular expression used to split header parameters. BAW: this may be too
28# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
29# most headers found in the wild. We may eventually need a full fledged
30# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000031paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000032# Regular expression that matches `special' characters in parameters, the
33# existance of which force quoting of the parameter value.
34tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
35
36
37
Barry Warsaw908dc4b2002-06-29 05:56:15 +000038# Helper functions
Barry Warsawc4945492002-09-28 20:40:25 +000039def _formatparam(param, value=None, quote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +000040 """Convenience function to format and return a key=value pair.
41
Barry Warsaw908dc4b2002-06-29 05:56:15 +000042 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000043 """
44 if value is not None and len(value) > 0:
Barry Warsaw908dc4b2002-06-29 05:56:15 +000045 # TupleType is used for RFC 2231 encoded parameter values where items
46 # are (charset, language, value). charset is a string, not a Charset
47 # instance.
48 if isinstance(value, TupleType):
Barry Warsaw3c255352002-09-06 03:55:04 +000049 # Encode as per RFC 2231
50 param += '*'
51 value = Utils.encode_rfc2231(value[2], value[0], value[1])
Barry Warsaw409a4c02002-04-10 21:01:31 +000052 # BAW: Please check this. I think that if quote is set it should
53 # force quoting even if not necessary.
54 if quote or tspecials.search(value):
55 return '%s="%s"' % (param, Utils.quote(value))
56 else:
57 return '%s=%s' % (param, value)
58 else:
59 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000060
Barry Warsawa74e8682003-09-03 04:08:13 +000061def _parseparam(s):
62 plist = []
63 while s[:1] == ';':
64 s = s[1:]
65 end = s.find(';')
66 while end > 0 and s.count('"', 0, end) % 2:
67 end = s.find(';', end + 1)
68 if end < 0:
69 end = len(s)
70 f = s[:end]
71 if '=' in f:
72 i = f.index('=')
73 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
74 plist.append(f.strip())
75 s = s[end:]
76 return plist
77
Barry Warsawba925802001-09-23 03:17:28 +000078
Barry Warsaw908dc4b2002-06-29 05:56:15 +000079def _unquotevalue(value):
80 if isinstance(value, TupleType):
Barry Warsaw15aefa92002-09-26 17:19:34 +000081 return value[0], value[1], Utils.unquote(value[2])
Barry Warsaw908dc4b2002-06-29 05:56:15 +000082 else:
Tim Peters280488b2002-08-23 18:19:30 +000083 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000084
85
Barry Warsaw48b0d362002-08-27 22:34:44 +000086
Barry Warsawba925802001-09-23 03:17:28 +000087class Message:
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000088 """Basic message object.
Barry Warsawba925802001-09-23 03:17:28 +000089
90 A message object is defined as something that has a bunch of RFC 2822
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000091 headers and a payload. It may optionally have an envelope header
92 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
93 multipart or a message/rfc822), then the payload is a list of Message
94 objects, otherwise it is a string.
Barry Warsawba925802001-09-23 03:17:28 +000095
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000096 Message objects implement part of the `mapping' interface, which assumes
Barry Warsawba925802001-09-23 03:17:28 +000097 there is exactly one occurrance of the header per message. Some headers
Barry Warsawc4945492002-09-28 20:40:25 +000098 do in fact appear multiple times (e.g. Received) and for those headers,
Barry Warsawba925802001-09-23 03:17:28 +000099 you must use the explicit API to set or get all the headers. Not all of
100 the mapping methods are implemented.
Barry Warsawba925802001-09-23 03:17:28 +0000101 """
102 def __init__(self):
103 self._headers = []
104 self._unixfrom = None
105 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +0000106 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +0000107 # Defaults for multipart messages
108 self.preamble = self.epilogue = None
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000109 # Default content type
110 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +0000111
112 def __str__(self):
113 """Return the entire formatted message as a string.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000114 This includes the headers, body, and envelope header.
Barry Warsawba925802001-09-23 03:17:28 +0000115 """
Barry Warsawc4945492002-09-28 20:40:25 +0000116 return self.as_string(unixfrom=True)
Barry Warsawba925802001-09-23 03:17:28 +0000117
Barry Warsawc4945492002-09-28 20:40:25 +0000118 def as_string(self, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +0000119 """Return the entire formatted message as a string.
Barry Warsawc4945492002-09-28 20:40:25 +0000120 Optional `unixfrom' when True, means include the Unix From_ envelope
Barry Warsawba925802001-09-23 03:17:28 +0000121 header.
Barry Warsaw482c5f72003-04-18 23:04:35 +0000122
123 This is a convenience method and may not generate the message exactly
124 as you intend. For more flexibility, use the flatten() method of a
125 Generator instance.
Barry Warsawba925802001-09-23 03:17:28 +0000126 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000127 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +0000128 fp = StringIO()
129 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000130 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000131 return fp.getvalue()
132
133 def is_multipart(self):
Barry Warsawc4945492002-09-28 20:40:25 +0000134 """Return True if the message consists of multiple parts."""
Barry Warsaw4ece7782002-09-28 20:41:39 +0000135 if isinstance(self._payload, ListType):
Barry Warsawc4945492002-09-28 20:40:25 +0000136 return True
137 return False
Barry Warsawba925802001-09-23 03:17:28 +0000138
139 #
140 # Unix From_ line
141 #
142 def set_unixfrom(self, unixfrom):
143 self._unixfrom = unixfrom
144
145 def get_unixfrom(self):
146 return self._unixfrom
147
148 #
149 # Payload manipulation.
150 #
151 def add_payload(self, payload):
152 """Add the given payload to the current payload.
153
154 If the current payload is empty, then the current payload will be made
155 a scalar, set to the given value.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000156
157 Note: This method is deprecated. Use .attach() instead.
Barry Warsawba925802001-09-23 03:17:28 +0000158 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000159 warnings.warn('add_payload() is deprecated, use attach() instead.',
160 DeprecationWarning, 2)
Barry Warsawba925802001-09-23 03:17:28 +0000161 if self._payload is None:
162 self._payload = payload
Barry Warsawc4945492002-09-28 20:40:25 +0000163 elif isinstance(self._payload, ListType):
Barry Warsawba925802001-09-23 03:17:28 +0000164 self._payload.append(payload)
165 elif self.get_main_type() not in (None, 'multipart'):
166 raise Errors.MultipartConversionError(
Barry Warsawc4945492002-09-28 20:40:25 +0000167 'Message main content type must be "multipart" or missing')
Barry Warsawba925802001-09-23 03:17:28 +0000168 else:
169 self._payload = [self._payload, payload]
170
Barry Warsaw409a4c02002-04-10 21:01:31 +0000171 def attach(self, payload):
172 """Add the given payload to the current payload.
173
174 The current payload will always be a list of objects after this method
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000175 is called. If you want to set the payload to a scalar object, use
Barry Warsaw409a4c02002-04-10 21:01:31 +0000176 set_payload() instead.
177 """
178 if self._payload is None:
179 self._payload = [payload]
180 else:
181 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000182
Barry Warsawc4945492002-09-28 20:40:25 +0000183 def get_payload(self, i=None, decode=False):
Barry Warsawfbcde752002-09-11 14:11:35 +0000184 """Return a reference to the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000185
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000186 The payload will either be a list object or a string. If you mutate
187 the list object, you modify the message's payload in place. Optional
188 i returns that index into the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000189
Barry Warsaw08898492003-03-11 04:33:30 +0000190 Optional decode is a flag indicating whether the payload should be
191 decoded or not, according to the Content-Transfer-Encoding header
192 (default is False).
193
194 When True and the message is not a multipart, the payload will be
195 decoded if this header's value is `quoted-printable' or `base64'. If
196 some other encoding is used, or the header is missing, or if the
197 payload has bogus data (i.e. bogus base64 or uuencoded data), the
198 payload is returned as-is.
Barry Warsaw21191d32003-03-10 16:13:14 +0000199
200 If the message is a multipart and the decode flag is True, then None
201 is returned.
Barry Warsawba925802001-09-23 03:17:28 +0000202 """
203 if i is None:
204 payload = self._payload
Barry Warsawc4945492002-09-28 20:40:25 +0000205 elif not isinstance(self._payload, ListType):
Barry Warsaw6754d522003-06-10 16:31:55 +0000206 raise TypeError, 'Expected list, got %s' % type(self._payload)
Barry Warsawba925802001-09-23 03:17:28 +0000207 else:
208 payload = self._payload[i]
209 if decode:
210 if self.is_multipart():
211 return None
Barry Warsaw08898492003-03-11 04:33:30 +0000212 cte = self.get('content-transfer-encoding', '').lower()
213 if cte == 'quoted-printable':
Barry Warsawba925802001-09-23 03:17:28 +0000214 return Utils._qdecode(payload)
Barry Warsaw08898492003-03-11 04:33:30 +0000215 elif cte == 'base64':
Barry Warsaw21191d32003-03-10 16:13:14 +0000216 try:
217 return Utils._bdecode(payload)
218 except binascii.Error:
219 # Incorrect padding
220 return payload
Barry Warsaw08898492003-03-11 04:33:30 +0000221 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
222 sfp = StringIO()
223 try:
224 uu.decode(StringIO(payload+'\n'), sfp)
225 payload = sfp.getvalue()
226 except uu.Error:
227 # Some decoding problem
228 return payload
Barry Warsawba925802001-09-23 03:17:28 +0000229 # Everything else, including encodings with 8bit or 7bit are returned
230 # unchanged.
231 return payload
232
Barry Warsaw409a4c02002-04-10 21:01:31 +0000233 def set_payload(self, payload, charset=None):
234 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000235
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000236 Optional charset sets the message's default character set. See
237 set_charset() for details.
238 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000239 self._payload = payload
240 if charset is not None:
241 self.set_charset(charset)
242
243 def set_charset(self, charset):
244 """Set the charset of the payload to a given character set.
245
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000246 charset can be a Charset instance, a string naming a character set, or
247 None. If it is a string it will be converted to a Charset instance.
248 If charset is None, the charset parameter will be removed from the
249 Content-Type field. Anything else will generate a TypeError.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000250
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000251 The message will be assumed to be of type text/* encoded with
Barry Warsaw409a4c02002-04-10 21:01:31 +0000252 charset.input_charset. It will be converted to charset.output_charset
253 and encoded properly, if needed, when generating the plain text
254 representation of the message. MIME headers (MIME-Version,
255 Content-Type, Content-Transfer-Encoding) will be added as needed.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000256
Barry Warsaw409a4c02002-04-10 21:01:31 +0000257 """
258 if charset is None:
259 self.del_param('charset')
260 self._charset = None
261 return
262 if isinstance(charset, StringType):
263 charset = Charset.Charset(charset)
264 if not isinstance(charset, Charset.Charset):
265 raise TypeError, charset
266 # BAW: should we accept strings that can serve as arguments to the
267 # Charset constructor?
268 self._charset = charset
269 if not self.has_key('MIME-Version'):
270 self.add_header('MIME-Version', '1.0')
271 if not self.has_key('Content-Type'):
272 self.add_header('Content-Type', 'text/plain',
273 charset=charset.get_output_charset())
274 else:
275 self.set_param('charset', charset.get_output_charset())
276 if not self.has_key('Content-Transfer-Encoding'):
277 cte = charset.get_body_encoding()
278 if callable(cte):
279 cte(self)
280 else:
281 self.add_header('Content-Transfer-Encoding', cte)
282
283 def get_charset(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000284 """Return the Charset instance associated with the message's payload.
285 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000286 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000287
Barry Warsawba925802001-09-23 03:17:28 +0000288 #
289 # MAPPING INTERFACE (partial)
290 #
291 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000292 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000293 return len(self._headers)
294
295 def __getitem__(self, name):
296 """Get a header value.
297
298 Return None if the header is missing instead of raising an exception.
299
300 Note that if the header appeared multiple times, exactly which
301 occurrance gets returned is undefined. Use getall() to get all
302 the values matching a header field name.
303 """
304 return self.get(name)
305
306 def __setitem__(self, name, val):
307 """Set the value of a header.
308
309 Note: this does not overwrite an existing header with the same field
310 name. Use __delitem__() first to delete any existing headers.
311 """
312 self._headers.append((name, val))
313
314 def __delitem__(self, name):
315 """Delete all occurrences of a header, if present.
316
317 Does not raise an exception if the header is missing.
318 """
319 name = name.lower()
320 newheaders = []
321 for k, v in self._headers:
322 if k.lower() <> name:
323 newheaders.append((k, v))
324 self._headers = newheaders
325
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000326 def __contains__(self, name):
327 return name.lower() in [k.lower() for k, v in self._headers]
Barry Warsawba925802001-09-23 03:17:28 +0000328
329 def has_key(self, name):
330 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000331 missing = []
332 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000333
334 def keys(self):
335 """Return a list of all the message's header field names.
336
337 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000338 message, or were added to the message, and may contain duplicates.
339 Any fields deleted and re-inserted are always appended to the header
340 list.
Barry Warsawba925802001-09-23 03:17:28 +0000341 """
342 return [k for k, v in self._headers]
343
344 def values(self):
345 """Return a list of all the message's header values.
346
347 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000348 message, or were added to the message, and may contain duplicates.
349 Any fields deleted and re-inserted are always appended to the header
350 list.
Barry Warsawba925802001-09-23 03:17:28 +0000351 """
352 return [v for k, v in self._headers]
353
354 def items(self):
355 """Get all the message's header fields and values.
356
357 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000358 message, or were added to the message, and may contain duplicates.
359 Any fields deleted and re-inserted are always appended to the header
360 list.
Barry Warsawba925802001-09-23 03:17:28 +0000361 """
362 return self._headers[:]
363
364 def get(self, name, failobj=None):
365 """Get a header value.
366
367 Like __getitem__() but return failobj instead of None when the field
368 is missing.
369 """
370 name = name.lower()
371 for k, v in self._headers:
372 if k.lower() == name:
373 return v
374 return failobj
375
376 #
377 # Additional useful stuff
378 #
379
380 def get_all(self, name, failobj=None):
381 """Return a list of all the values for the named field.
382
383 These will be sorted in the order they appeared in the original
384 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000385 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000386
387 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000388 """
389 values = []
390 name = name.lower()
391 for k, v in self._headers:
392 if k.lower() == name:
393 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000394 if not values:
395 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000396 return values
397
398 def add_header(self, _name, _value, **_params):
399 """Extended header setting.
400
401 name is the header field to add. keyword arguments can be used to set
402 additional parameters for the header field, with underscores converted
403 to dashes. Normally the parameter will be added as key="value" unless
404 value is None, in which case only the key will be added.
405
406 Example:
407
408 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000409 """
410 parts = []
411 for k, v in _params.items():
412 if v is None:
413 parts.append(k.replace('_', '-'))
414 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000415 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000416 if _value is not None:
417 parts.insert(0, _value)
418 self._headers.append((_name, SEMISPACE.join(parts)))
419
Barry Warsaw229727f2002-09-06 03:38:12 +0000420 def replace_header(self, _name, _value):
421 """Replace a header.
422
423 Replace the first matching header found in the message, retaining
424 header order and case. If no matching header was found, a KeyError is
425 raised.
426 """
427 _name = _name.lower()
428 for i, (k, v) in zip(range(len(self._headers)), self._headers):
429 if k.lower() == _name:
430 self._headers[i] = (k, _value)
431 break
432 else:
433 raise KeyError, _name
434
Barry Warsawc1068642002-07-19 22:24:55 +0000435 #
436 # These methods are silently deprecated in favor of get_content_type() and
437 # friends (see below). They will be noisily deprecated in email 3.0.
438 #
439
Barry Warsawba925802001-09-23 03:17:28 +0000440 def get_type(self, failobj=None):
441 """Returns the message's content type.
442
443 The returned string is coerced to lowercase and returned as a single
Barry Warsawc4945492002-09-28 20:40:25 +0000444 string of the form `maintype/subtype'. If there was no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000445 header in the message, failobj is returned (defaults to None).
446 """
447 missing = []
448 value = self.get('content-type', missing)
449 if value is missing:
450 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000451 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000452
453 def get_main_type(self, failobj=None):
454 """Return the message's main content type if present."""
455 missing = []
456 ctype = self.get_type(missing)
457 if ctype is missing:
458 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000459 if ctype.count('/') <> 1:
460 return failobj
461 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000462
463 def get_subtype(self, failobj=None):
464 """Return the message's content subtype if present."""
465 missing = []
466 ctype = self.get_type(missing)
467 if ctype is missing:
468 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000469 if ctype.count('/') <> 1:
470 return failobj
471 return ctype.split('/')[1]
472
473 #
474 # Use these three methods instead of the three above.
475 #
476
477 def get_content_type(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000478 """Return the message's content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000479
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000480 The returned string is coerced to lower case of the form
481 `maintype/subtype'. If there was no Content-Type header in the
482 message, the default type as given by get_default_type() will be
483 returned. Since according to RFC 2045, messages always have a default
484 type this will always return a value.
Barry Warsawc1068642002-07-19 22:24:55 +0000485
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000486 RFC 2045 defines a message's default type to be text/plain unless it
487 appears inside a multipart/digest container, in which case it would be
488 message/rfc822.
Barry Warsawc1068642002-07-19 22:24:55 +0000489 """
490 missing = []
491 value = self.get('content-type', missing)
492 if value is missing:
493 # This should have no parameters
494 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000495 ctype = paramre.split(value)[0].lower().strip()
496 # RFC 2045, section 5.2 says if its invalid, use text/plain
497 if ctype.count('/') <> 1:
498 return 'text/plain'
499 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000500
501 def get_content_maintype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000502 """Return the message's main content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000503
504 This is the `maintype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000505 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000506 """
507 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000508 return ctype.split('/')[0]
509
510 def get_content_subtype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000511 """Returns the message's sub-content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000512
513 This is the `subtype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000514 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000515 """
516 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000517 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000518
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000519 def get_default_type(self):
520 """Return the `default' content type.
521
522 Most messages have a default content type of text/plain, except for
523 messages that are subparts of multipart/digest containers. Such
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000524 subparts have a default content type of message/rfc822.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000525 """
526 return self._default_type
527
528 def set_default_type(self, ctype):
529 """Set the `default' content type.
530
Barry Warsawc1068642002-07-19 22:24:55 +0000531 ctype should be either "text/plain" or "message/rfc822", although this
532 is not enforced. The default content type is not stored in the
Barry Warsawc4945492002-09-28 20:40:25 +0000533 Content-Type header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000534 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000535 self._default_type = ctype
536
Barry Warsawbeb59452001-09-26 05:41:51 +0000537 def _get_params_preserve(self, failobj, header):
538 # Like get_params() but preserves the quoting of values. BAW:
539 # should this be part of the public interface?
540 missing = []
541 value = self.get(header, missing)
542 if value is missing:
543 return failobj
544 params = []
Barry Warsawa74e8682003-09-03 04:08:13 +0000545 for p in _parseparam(';' + value):
Barry Warsawbeb59452001-09-26 05:41:51 +0000546 try:
547 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000548 name = name.strip()
549 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000550 except ValueError:
551 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000552 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000553 val = ''
554 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000555 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000556 return params
557
Barry Warsawc4945492002-09-28 20:40:25 +0000558 def get_params(self, failobj=None, header='content-type', unquote=True):
559 """Return the message's Content-Type parameters, as a list.
Barry Warsawba925802001-09-23 03:17:28 +0000560
Barry Warsawbeb59452001-09-26 05:41:51 +0000561 The elements of the returned list are 2-tuples of key/value pairs, as
562 split on the `=' sign. The left hand side of the `=' is the key,
563 while the right hand side is the value. If there is no `=' sign in
Barry Warsaw15aefa92002-09-26 17:19:34 +0000564 the parameter the value is the empty string. The value is as
565 described in the get_param() method.
Barry Warsawbeb59452001-09-26 05:41:51 +0000566
Barry Warsawc4945492002-09-28 20:40:25 +0000567 Optional failobj is the object to return if there is no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000568 header. Optional header is the header to search instead of
Barry Warsawc4945492002-09-28 20:40:25 +0000569 Content-Type. If unquote is True, the value is unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000570 """
571 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000572 params = self._get_params_preserve(missing, header)
573 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000574 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000575 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000576 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000577 else:
578 return params
Barry Warsawba925802001-09-23 03:17:28 +0000579
Barry Warsawc4945492002-09-28 20:40:25 +0000580 def get_param(self, param, failobj=None, header='content-type',
581 unquote=True):
582 """Return the parameter value if found in the Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000583
Barry Warsawc4945492002-09-28 20:40:25 +0000584 Optional failobj is the object to return if there is no Content-Type
Barry Warsaw15aefa92002-09-26 17:19:34 +0000585 header, or the Content-Type header has no such parameter. Optional
Barry Warsawc4945492002-09-28 20:40:25 +0000586 header is the header to search instead of Content-Type.
Barry Warsawbeb59452001-09-26 05:41:51 +0000587
Barry Warsaw15aefa92002-09-26 17:19:34 +0000588 Parameter keys are always compared case insensitively. The return
589 value can either be a string, or a 3-tuple if the parameter was RFC
590 2231 encoded. When it's a 3-tuple, the elements of the value are of
Barry Warsaw62083692003-08-19 03:53:02 +0000591 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
592 LANGUAGE can be None, in which case you should consider VALUE to be
593 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
594
595 Your application should be prepared to deal with 3-tuple return
596 values, and can convert the parameter to a Unicode string like so:
Barry Warsaw15aefa92002-09-26 17:19:34 +0000597
598 param = msg.get_param('foo')
599 if isinstance(param, tuple):
Barry Warsaw62083692003-08-19 03:53:02 +0000600 param = unicode(param[2], param[0] or 'us-ascii')
Barry Warsaw15aefa92002-09-26 17:19:34 +0000601
602 In any case, the parameter value (either the returned string, or the
603 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
Barry Warsawc4945492002-09-28 20:40:25 +0000604 to False.
Barry Warsawba925802001-09-23 03:17:28 +0000605 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000606 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000607 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000608 for k, v in self._get_params_preserve(failobj, header):
609 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000610 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000611 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000612 else:
613 return v
Barry Warsawba925802001-09-23 03:17:28 +0000614 return failobj
615
Barry Warsawc4945492002-09-28 20:40:25 +0000616 def set_param(self, param, value, header='Content-Type', requote=True,
Barry Warsaw3c255352002-09-06 03:55:04 +0000617 charset=None, language=''):
Barry Warsawc4945492002-09-28 20:40:25 +0000618 """Set a parameter in the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000619
620 If the parameter already exists in the header, its value will be
621 replaced with the new value.
622
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000623 If header is Content-Type and has not yet been defined for this
Barry Warsaw409a4c02002-04-10 21:01:31 +0000624 message, it will be set to "text/plain" and the new parameter and
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000625 value will be appended as per RFC 2045.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000626
Barry Warsawc4945492002-09-28 20:40:25 +0000627 An alternate header can specified in the header argument, and all
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000628 parameters will be quoted as necessary unless requote is False.
Barry Warsaw3c255352002-09-06 03:55:04 +0000629
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000630 If charset is specified, the parameter will be encoded according to RFC
631 2231. Optional language specifies the RFC 2231 language, defaulting
632 to the empty string. Both charset and language should be strings.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000633 """
Barry Warsaw3c255352002-09-06 03:55:04 +0000634 if not isinstance(value, TupleType) and charset:
635 value = (charset, language, value)
636
Barry Warsaw409a4c02002-04-10 21:01:31 +0000637 if not self.has_key(header) and header.lower() == 'content-type':
638 ctype = 'text/plain'
639 else:
640 ctype = self.get(header)
641 if not self.get_param(param, header=header):
642 if not ctype:
643 ctype = _formatparam(param, value, requote)
644 else:
645 ctype = SEMISPACE.join(
646 [ctype, _formatparam(param, value, requote)])
647 else:
648 ctype = ''
649 for old_param, old_value in self.get_params(header=header,
650 unquote=requote):
651 append_param = ''
652 if old_param.lower() == param.lower():
653 append_param = _formatparam(param, value, requote)
654 else:
655 append_param = _formatparam(old_param, old_value, requote)
656 if not ctype:
657 ctype = append_param
658 else:
659 ctype = SEMISPACE.join([ctype, append_param])
660 if ctype <> self.get(header):
661 del self[header]
662 self[header] = ctype
663
Barry Warsawc4945492002-09-28 20:40:25 +0000664 def del_param(self, param, header='content-type', requote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000665 """Remove the given parameter completely from the Content-Type header.
666
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000667 The header will be re-written in place without the parameter or its
668 value. All values will be quoted as necessary unless requote is
669 False. Optional header specifies an alternative to the Content-Type
670 header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000671 """
672 if not self.has_key(header):
673 return
674 new_ctype = ''
675 for p, v in self.get_params(header, unquote=requote):
676 if p.lower() <> param.lower():
677 if not new_ctype:
678 new_ctype = _formatparam(p, v, requote)
679 else:
680 new_ctype = SEMISPACE.join([new_ctype,
681 _formatparam(p, v, requote)])
682 if new_ctype <> self.get(header):
683 del self[header]
684 self[header] = new_ctype
685
Barry Warsawc4945492002-09-28 20:40:25 +0000686 def set_type(self, type, header='Content-Type', requote=True):
687 """Set the main type and subtype for the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000688
689 type must be a string in the form "maintype/subtype", otherwise a
690 ValueError is raised.
691
Barry Warsawc4945492002-09-28 20:40:25 +0000692 This method replaces the Content-Type header, keeping all the
693 parameters in place. If requote is False, this leaves the existing
Barry Warsaw409a4c02002-04-10 21:01:31 +0000694 header's quoting as is. Otherwise, the parameters will be quoted (the
695 default).
696
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000697 An alternative header can be specified in the header argument. When
698 the Content-Type header is set, we'll always also add a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000699 header.
700 """
701 # BAW: should we be strict?
702 if not type.count('/') == 1:
703 raise ValueError
Barry Warsawc4945492002-09-28 20:40:25 +0000704 # Set the Content-Type, you get a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000705 if header.lower() == 'content-type':
706 del self['mime-version']
707 self['MIME-Version'] = '1.0'
708 if not self.has_key(header):
709 self[header] = type
710 return
711 params = self.get_params(header, unquote=requote)
712 del self[header]
713 self[header] = type
714 # Skip the first param; it's the old type.
715 for p, v in params[1:]:
716 self.set_param(p, v, header, requote)
717
Barry Warsawba925802001-09-23 03:17:28 +0000718 def get_filename(self, failobj=None):
719 """Return the filename associated with the payload if present.
720
Barry Warsawc4945492002-09-28 20:40:25 +0000721 The filename is extracted from the Content-Disposition header's
Barry Warsawba925802001-09-23 03:17:28 +0000722 `filename' parameter, and it is unquoted.
723 """
724 missing = []
725 filename = self.get_param('filename', missing, 'content-disposition')
726 if filename is missing:
727 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000728 if isinstance(filename, TupleType):
729 # It's an RFC 2231 encoded parameter
730 newvalue = _unquotevalue(filename)
Barry Warsaw62083692003-08-19 03:53:02 +0000731 return unicode(newvalue[2], newvalue[0] or 'us-ascii')
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000732 else:
733 newvalue = _unquotevalue(filename.strip())
734 return newvalue
Barry Warsawba925802001-09-23 03:17:28 +0000735
736 def get_boundary(self, failobj=None):
737 """Return the boundary associated with the payload if present.
738
Barry Warsawc4945492002-09-28 20:40:25 +0000739 The boundary is extracted from the Content-Type header's `boundary'
Barry Warsawba925802001-09-23 03:17:28 +0000740 parameter, and it is unquoted.
741 """
742 missing = []
743 boundary = self.get_param('boundary', missing)
744 if boundary is missing:
745 return failobj
Barry Warsaw15aefa92002-09-26 17:19:34 +0000746 if isinstance(boundary, TupleType):
747 # RFC 2231 encoded, so decode. It better end up as ascii
Barry Warsaw62083692003-08-19 03:53:02 +0000748 charset = boundary[0] or 'us-ascii'
749 return unicode(boundary[2], charset).encode('us-ascii')
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000750 return _unquotevalue(boundary.strip())
Barry Warsawba925802001-09-23 03:17:28 +0000751
752 def set_boundary(self, boundary):
Barry Warsawc4945492002-09-28 20:40:25 +0000753 """Set the boundary parameter in Content-Type to 'boundary'.
Barry Warsawba925802001-09-23 03:17:28 +0000754
Barry Warsawc4945492002-09-28 20:40:25 +0000755 This is subtly different than deleting the Content-Type header and
Barry Warsawba925802001-09-23 03:17:28 +0000756 adding a new one with a new boundary parameter via add_header(). The
757 main difference is that using the set_boundary() method preserves the
Barry Warsawc4945492002-09-28 20:40:25 +0000758 order of the Content-Type header in the original message.
Barry Warsawba925802001-09-23 03:17:28 +0000759
Barry Warsawc4945492002-09-28 20:40:25 +0000760 HeaderParseError is raised if the message has no Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000761 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000762 missing = []
763 params = self._get_params_preserve(missing, 'content-type')
764 if params is missing:
Barry Warsawc4945492002-09-28 20:40:25 +0000765 # There was no Content-Type header, and we don't know what type
Barry Warsawba925802001-09-23 03:17:28 +0000766 # to set it to, so raise an exception.
Barry Warsawc4945492002-09-28 20:40:25 +0000767 raise Errors.HeaderParseError, 'No Content-Type header found'
Barry Warsawba925802001-09-23 03:17:28 +0000768 newparams = []
Barry Warsawc4945492002-09-28 20:40:25 +0000769 foundp = False
Barry Warsawbeb59452001-09-26 05:41:51 +0000770 for pk, pv in params:
771 if pk.lower() == 'boundary':
772 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000773 foundp = True
Barry Warsawba925802001-09-23 03:17:28 +0000774 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000775 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000776 if not foundp:
Barry Warsawc4945492002-09-28 20:40:25 +0000777 # The original Content-Type header had no boundary attribute.
Barry Warsawba925802001-09-23 03:17:28 +0000778 # Tack one one the end. BAW: should we raise an exception
779 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000780 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000781 # Replace the existing Content-Type header with the new value
Barry Warsawba925802001-09-23 03:17:28 +0000782 newheaders = []
783 for h, v in self._headers:
784 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000785 parts = []
786 for k, v in newparams:
787 if v == '':
788 parts.append(k)
789 else:
790 parts.append('%s=%s' % (k, v))
791 newheaders.append((h, SEMISPACE.join(parts)))
792
Barry Warsawba925802001-09-23 03:17:28 +0000793 else:
794 newheaders.append((h, v))
795 self._headers = newheaders
796
Barry Warsaw8c1aac22002-05-19 23:44:19 +0000797 try:
798 from email._compat22 import walk
799 except SyntaxError:
800 # Must be using Python 2.1
801 from email._compat21 import walk
Barry Warsawba925802001-09-23 03:17:28 +0000802
Barry Warsaw15aefa92002-09-26 17:19:34 +0000803 def get_content_charset(self, failobj=None):
804 """Return the charset parameter of the Content-Type header.
805
Barry Warsawee07cb12002-10-10 15:13:26 +0000806 The returned string is always coerced to lower case. If there is no
807 Content-Type header, or if that header has no charset parameter,
808 failobj is returned.
Barry Warsaw15aefa92002-09-26 17:19:34 +0000809 """
810 missing = []
811 charset = self.get_param('charset', missing)
812 if charset is missing:
813 return failobj
814 if isinstance(charset, TupleType):
815 # RFC 2231 encoded, so decode it, and it better end up as ascii.
Barry Warsaw62083692003-08-19 03:53:02 +0000816 pcharset = charset[0] or 'us-ascii'
817 charset = unicode(charset[2], pcharset).encode('us-ascii')
Barry Warsawee07cb12002-10-10 15:13:26 +0000818 # RFC 2046, $4.1.2 says charsets are not case sensitive
819 return charset.lower()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000820
Barry Warsawba925802001-09-23 03:17:28 +0000821 def get_charsets(self, failobj=None):
822 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000823
Barry Warsawc4945492002-09-28 20:40:25 +0000824 The returned list of items describes the Content-Type headers'
Barry Warsawba925802001-09-23 03:17:28 +0000825 charset parameter for this message and all the subparts in its
826 payload.
827
828 Each item will either be a string (the value of the charset parameter
Barry Warsawc4945492002-09-28 20:40:25 +0000829 in the Content-Type header of that part) or the value of the
Barry Warsawba925802001-09-23 03:17:28 +0000830 'failobj' parameter (defaults to None), if the part does not have a
831 main MIME type of "text", or the charset is not defined.
832
833 The list will contain one string for each part of the message, plus
834 one for the container message (i.e. self), so that a non-multipart
835 message will still return a list of length 1.
836 """
Barry Warsaw15aefa92002-09-26 17:19:34 +0000837 return [part.get_content_charset(failobj) for part in self.walk()]