blob: 62b82b79c100fa91100024ab34966d0087eaf5ad [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013from io import BytesIO, StringIO
14
15# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from email import utils
17from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040018from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000019from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040020from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000021Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000030def _splitparam(param):
31 # Split header parameters. BAW: this may be too simple. It isn't
32 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040033 # found in the wild. We may eventually need a full fledged parser.
34 # RDM: we might have a Header here; for now just stringify it.
35 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000036 if not sep:
37 return a.strip(), None
38 return a.strip(), b.strip()
39
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040def _formatparam(param, value=None, quote=True):
41 """Convenience function to format and return a key=value pair.
42
R. David Murray7ec754b2010-12-13 23:51:19 +000043 This will quote the value if needed or if quote is true. If value is a
44 three tuple (charset, language, value), it will be encoded according
45 to RFC2231 rules. If it contains non-ascii characters it will likewise
46 be encoded according to RFC2231 rules, using the utf-8 charset and
47 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048 """
49 if value is not None and len(value) > 0:
50 # A tuple is used for RFC 2231 encoded parameter values where items
51 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000052 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 if isinstance(value, tuple):
54 # Encode as per RFC 2231
55 param += '*'
56 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000057 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000058 else:
59 try:
60 value.encode('ascii')
61 except UnicodeEncodeError:
62 param += '*'
63 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000064 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 # BAW: Please check this. I think that if quote is set it should
66 # force quoting even if not necessary.
67 if quote or tspecials.search(value):
68 return '%s="%s"' % (param, utils.quote(value))
69 else:
70 return '%s=%s' % (param, value)
71 else:
72 return param
73
74def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040075 # RDM This might be a Header, so for now stringify it.
76 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 plist = []
78 while s[:1] == ';':
79 s = s[1:]
80 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000081 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 end = s.find(';', end + 1)
83 if end < 0:
84 end = len(s)
85 f = s[:end]
86 if '=' in f:
87 i = f.index('=')
88 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
89 plist.append(f.strip())
90 s = s[end:]
91 return plist
92
93
94def _unquotevalue(value):
95 # This is different than utils.collapse_rfc2231_value() because it doesn't
96 # try to convert the value to a unicode. Message.get_param() and
97 # Message.get_params() are both currently defined to return the tuple in
98 # the face of RFC 2231 parameters.
99 if isinstance(value, tuple):
100 return value[0], value[1], utils.unquote(value[2])
101 else:
102 return utils.unquote(value)
103
104
105
106class Message:
107 """Basic message object.
108
109 A message object is defined as something that has a bunch of RFC 2822
110 headers and a payload. It may optionally have an envelope header
111 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
112 multipart or a message/rfc822), then the payload is a list of Message
113 objects, otherwise it is a string.
114
115 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000116 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000117 do in fact appear multiple times (e.g. Received) and for those headers,
118 you must use the explicit API to set or get all the headers. Not all of
119 the mapping methods are implemented.
120 """
R David Murrayc27e5222012-05-25 15:01:48 -0400121 def __init__(self, policy=compat32):
122 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000123 self._headers = []
124 self._unixfrom = None
125 self._payload = None
126 self._charset = None
127 # Defaults for multipart messages
128 self.preamble = self.epilogue = None
129 self.defects = []
130 # Default content type
131 self._default_type = 'text/plain'
132
133 def __str__(self):
134 """Return the entire formatted message as a string.
135 This includes the headers, body, and envelope header.
136 """
137 return self.as_string()
138
139 def as_string(self, unixfrom=False, maxheaderlen=0):
140 """Return the entire formatted message as a string.
141 Optional `unixfrom' when True, means include the Unix From_ envelope
142 header.
143
144 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400145 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000146 Generator instance.
147 """
148 from email.generator import Generator
149 fp = StringIO()
150 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
151 g.flatten(self, unixfrom=unixfrom)
152 return fp.getvalue()
153
154 def is_multipart(self):
155 """Return True if the message consists of multiple parts."""
156 return isinstance(self._payload, list)
157
158 #
159 # Unix From_ line
160 #
161 def set_unixfrom(self, unixfrom):
162 self._unixfrom = unixfrom
163
164 def get_unixfrom(self):
165 return self._unixfrom
166
167 #
168 # Payload manipulation.
169 #
170 def attach(self, payload):
171 """Add the given payload to the current payload.
172
173 The current payload will always be a list of objects after this method
174 is called. If you want to set the payload to a scalar object, use
175 set_payload() instead.
176 """
177 if self._payload is None:
178 self._payload = [payload]
179 else:
180 self._payload.append(payload)
181
182 def get_payload(self, i=None, decode=False):
183 """Return a reference to the payload.
184
185 The payload will either be a list object or a string. If you mutate
186 the list object, you modify the message's payload in place. Optional
187 i returns that index into the payload.
188
189 Optional decode is a flag indicating whether the payload should be
190 decoded or not, according to the Content-Transfer-Encoding header
191 (default is False).
192
193 When True and the message is not a multipart, the payload will be
194 decoded if this header's value is `quoted-printable' or `base64'. If
195 some other encoding is used, or the header is missing, or if the
196 payload has bogus data (i.e. bogus base64 or uuencoded data), the
197 payload is returned as-is.
198
199 If the message is a multipart and the decode flag is True, then None
200 is returned.
201 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000202 # Here is the logic table for this code, based on the email5.0.0 code:
203 # i decode is_multipart result
204 # ------ ------ ------------ ------------------------------
205 # None True True None
206 # i True True None
207 # None False True _payload (a list)
208 # i False True _payload element i (a Message)
209 # i False False error (not a list)
210 # i True False error (not a list)
211 # None False False _payload
212 # None True False _payload decoded (bytes)
213 # Note that Barry planned to factor out the 'decode' case, but that
214 # isn't so easy now that we handle the 8 bit data, which needs to be
215 # converted in both the decode and non-decode path.
216 if self.is_multipart():
217 if decode:
218 return None
219 if i is None:
220 return self._payload
221 else:
222 return self._payload[i]
223 # For backward compatibility, Use isinstance and this error message
224 # instead of the more logical is_multipart test.
225 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000227 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400228 # cte might be a Header, so for now stringify it.
229 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400230 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000231 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400232 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000233 bpayload = payload.encode('ascii', 'surrogateescape')
234 if not decode:
235 try:
236 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
237 except LookupError:
238 payload = bpayload.decode('ascii', 'replace')
239 elif decode:
240 try:
241 bpayload = payload.encode('ascii')
242 except UnicodeError:
243 # This won't happen for RFC compliant messages (messages
244 # containing only ASCII codepoints in the unicode input).
245 # If it does happen, turn the string into bytes in a way
246 # guaranteed not to fail.
247 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248 if not decode:
249 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000251 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400253 # XXX: this is a bit of a hack; decode_b should probably be factored
254 # out somewhere, but I haven't figured out where yet.
255 value, defects = decode_b(b''.join(bpayload.splitlines()))
256 for defect in defects:
257 self.policy.handle_defect(self, defect)
258 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000260 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 out_file = BytesIO()
262 try:
263 uu.decode(in_file, out_file, quiet=True)
264 return out_file.getvalue()
265 except uu.Error:
266 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000267 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000268 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000270 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000271
272 def set_payload(self, payload, charset=None):
273 """Set the payload to the given value.
274
275 Optional charset sets the message's default character set. See
276 set_charset() for details.
277 """
278 self._payload = payload
279 if charset is not None:
280 self.set_charset(charset)
281
282 def set_charset(self, charset):
283 """Set the charset of the payload to a given character set.
284
285 charset can be a Charset instance, a string naming a character set, or
286 None. If it is a string it will be converted to a Charset instance.
287 If charset is None, the charset parameter will be removed from the
288 Content-Type field. Anything else will generate a TypeError.
289
290 The message will be assumed to be of type text/* encoded with
291 charset.input_charset. It will be converted to charset.output_charset
292 and encoded properly, if needed, when generating the plain text
293 representation of the message. MIME headers (MIME-Version,
294 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000295 """
296 if charset is None:
297 self.del_param('charset')
298 self._charset = None
299 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000300 if not isinstance(charset, Charset):
301 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000302 self._charset = charset
303 if 'MIME-Version' not in self:
304 self.add_header('MIME-Version', '1.0')
305 if 'Content-Type' not in self:
306 self.add_header('Content-Type', 'text/plain',
307 charset=charset.get_output_charset())
308 else:
309 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000310 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000311 self._payload = charset.body_encode(self._payload)
312 if 'Content-Transfer-Encoding' not in self:
313 cte = charset.get_body_encoding()
314 try:
315 cte(self)
316 except TypeError:
317 self._payload = charset.body_encode(self._payload)
318 self.add_header('Content-Transfer-Encoding', cte)
319
320 def get_charset(self):
321 """Return the Charset instance associated with the message's payload.
322 """
323 return self._charset
324
325 #
326 # MAPPING INTERFACE (partial)
327 #
328 def __len__(self):
329 """Return the total number of headers, including duplicates."""
330 return len(self._headers)
331
332 def __getitem__(self, name):
333 """Get a header value.
334
335 Return None if the header is missing instead of raising an exception.
336
337 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000338 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000339 the values matching a header field name.
340 """
341 return self.get(name)
342
343 def __setitem__(self, name, val):
344 """Set the value of a header.
345
346 Note: this does not overwrite an existing header with the same field
347 name. Use __delitem__() first to delete any existing headers.
348 """
R David Murrayc27e5222012-05-25 15:01:48 -0400349 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350
351 def __delitem__(self, name):
352 """Delete all occurrences of a header, if present.
353
354 Does not raise an exception if the header is missing.
355 """
356 name = name.lower()
357 newheaders = []
358 for k, v in self._headers:
359 if k.lower() != name:
360 newheaders.append((k, v))
361 self._headers = newheaders
362
363 def __contains__(self, name):
364 return name.lower() in [k.lower() for k, v in self._headers]
365
366 def __iter__(self):
367 for field, value in self._headers:
368 yield field
369
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000370 def keys(self):
371 """Return a list of all the message's header field names.
372
373 These will be sorted in the order they appeared in the original
374 message, or were added to the message, and may contain duplicates.
375 Any fields deleted and re-inserted are always appended to the header
376 list.
377 """
378 return [k for k, v in self._headers]
379
380 def values(self):
381 """Return a list of all the message's header values.
382
383 These will be sorted in the order they appeared in the original
384 message, or were added to the message, and may contain duplicates.
385 Any fields deleted and re-inserted are always appended to the header
386 list.
387 """
R David Murrayc27e5222012-05-25 15:01:48 -0400388 return [self.policy.header_fetch_parse(k, v)
389 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000390
391 def items(self):
392 """Get all the message's header fields and values.
393
394 These will be sorted in the order they appeared in the original
395 message, or were added to the message, and may contain duplicates.
396 Any fields deleted and re-inserted are always appended to the header
397 list.
398 """
R David Murrayc27e5222012-05-25 15:01:48 -0400399 return [(k, self.policy.header_fetch_parse(k, v))
400 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000401
402 def get(self, name, failobj=None):
403 """Get a header value.
404
405 Like __getitem__() but return failobj instead of None when the field
406 is missing.
407 """
408 name = name.lower()
409 for k, v in self._headers:
410 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400411 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000412 return failobj
413
414 #
R David Murrayc27e5222012-05-25 15:01:48 -0400415 # "Internal" methods (public API, but only intended for use by a parser
416 # or generator, not normal application code.
417 #
418
419 def set_raw(self, name, value):
420 """Store name and value in the model without modification.
421
422 This is an "internal" API, intended only for use by a parser.
423 """
424 self._headers.append((name, value))
425
426 def raw_items(self):
427 """Return the (name, value) header pairs without modification.
428
429 This is an "internal" API, intended only for use by a generator.
430 """
431 return iter(self._headers.copy())
432
433 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000434 # Additional useful stuff
435 #
436
437 def get_all(self, name, failobj=None):
438 """Return a list of all the values for the named field.
439
440 These will be sorted in the order they appeared in the original
441 message, and may contain duplicates. Any fields deleted and
442 re-inserted are always appended to the header list.
443
444 If no such fields exist, failobj is returned (defaults to None).
445 """
446 values = []
447 name = name.lower()
448 for k, v in self._headers:
449 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400450 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000451 if not values:
452 return failobj
453 return values
454
455 def add_header(self, _name, _value, **_params):
456 """Extended header setting.
457
458 name is the header field to add. keyword arguments can be used to set
459 additional parameters for the header field, with underscores converted
460 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000461 value is None, in which case only the key will be added. If a
462 parameter value contains non-ASCII characters it can be specified as a
463 three-tuple of (charset, language, value), in which case it will be
464 encoded according to RFC2231 rules. Otherwise it will be encoded using
465 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000466
R. David Murray7ec754b2010-12-13 23:51:19 +0000467 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000468
469 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000470 msg.add_header('content-disposition', 'attachment',
471 filename=('utf-8', '', Fußballer.ppt'))
472 msg.add_header('content-disposition', 'attachment',
473 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000474 """
475 parts = []
476 for k, v in _params.items():
477 if v is None:
478 parts.append(k.replace('_', '-'))
479 else:
480 parts.append(_formatparam(k.replace('_', '-'), v))
481 if _value is not None:
482 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400483 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000484
485 def replace_header(self, _name, _value):
486 """Replace a header.
487
488 Replace the first matching header found in the message, retaining
489 header order and case. If no matching header was found, a KeyError is
490 raised.
491 """
492 _name = _name.lower()
493 for i, (k, v) in zip(range(len(self._headers)), self._headers):
494 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400495 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000496 break
497 else:
498 raise KeyError(_name)
499
500 #
501 # Use these three methods instead of the three above.
502 #
503
504 def get_content_type(self):
505 """Return the message's content type.
506
507 The returned string is coerced to lower case of the form
508 `maintype/subtype'. If there was no Content-Type header in the
509 message, the default type as given by get_default_type() will be
510 returned. Since according to RFC 2045, messages always have a default
511 type this will always return a value.
512
513 RFC 2045 defines a message's default type to be text/plain unless it
514 appears inside a multipart/digest container, in which case it would be
515 message/rfc822.
516 """
517 missing = object()
518 value = self.get('content-type', missing)
519 if value is missing:
520 # This should have no parameters
521 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000522 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523 # RFC 2045, section 5.2 says if its invalid, use text/plain
524 if ctype.count('/') != 1:
525 return 'text/plain'
526 return ctype
527
528 def get_content_maintype(self):
529 """Return the message's main content type.
530
531 This is the `maintype' part of the string returned by
532 get_content_type().
533 """
534 ctype = self.get_content_type()
535 return ctype.split('/')[0]
536
537 def get_content_subtype(self):
538 """Returns the message's sub-content type.
539
540 This is the `subtype' part of the string returned by
541 get_content_type().
542 """
543 ctype = self.get_content_type()
544 return ctype.split('/')[1]
545
546 def get_default_type(self):
547 """Return the `default' content type.
548
549 Most messages have a default content type of text/plain, except for
550 messages that are subparts of multipart/digest containers. Such
551 subparts have a default content type of message/rfc822.
552 """
553 return self._default_type
554
555 def set_default_type(self, ctype):
556 """Set the `default' content type.
557
558 ctype should be either "text/plain" or "message/rfc822", although this
559 is not enforced. The default content type is not stored in the
560 Content-Type header.
561 """
562 self._default_type = ctype
563
564 def _get_params_preserve(self, failobj, header):
565 # Like get_params() but preserves the quoting of values. BAW:
566 # should this be part of the public interface?
567 missing = object()
568 value = self.get(header, missing)
569 if value is missing:
570 return failobj
571 params = []
R David Murraya2150232011-03-16 21:11:23 -0400572 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000573 try:
574 name, val = p.split('=', 1)
575 name = name.strip()
576 val = val.strip()
577 except ValueError:
578 # Must have been a bare attribute
579 name = p.strip()
580 val = ''
581 params.append((name, val))
582 params = utils.decode_params(params)
583 return params
584
585 def get_params(self, failobj=None, header='content-type', unquote=True):
586 """Return the message's Content-Type parameters, as a list.
587
588 The elements of the returned list are 2-tuples of key/value pairs, as
589 split on the `=' sign. The left hand side of the `=' is the key,
590 while the right hand side is the value. If there is no `=' sign in
591 the parameter the value is the empty string. The value is as
592 described in the get_param() method.
593
594 Optional failobj is the object to return if there is no Content-Type
595 header. Optional header is the header to search instead of
596 Content-Type. If unquote is True, the value is unquoted.
597 """
598 missing = object()
599 params = self._get_params_preserve(missing, header)
600 if params is missing:
601 return failobj
602 if unquote:
603 return [(k, _unquotevalue(v)) for k, v in params]
604 else:
605 return params
606
607 def get_param(self, param, failobj=None, header='content-type',
608 unquote=True):
609 """Return the parameter value if found in the Content-Type header.
610
611 Optional failobj is the object to return if there is no Content-Type
612 header, or the Content-Type header has no such parameter. Optional
613 header is the header to search instead of Content-Type.
614
615 Parameter keys are always compared case insensitively. The return
616 value can either be a string, or a 3-tuple if the parameter was RFC
617 2231 encoded. When it's a 3-tuple, the elements of the value are of
618 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
619 LANGUAGE can be None, in which case you should consider VALUE to be
620 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
621
622 Your application should be prepared to deal with 3-tuple return
623 values, and can convert the parameter to a Unicode string like so:
624
625 param = msg.get_param('foo')
626 if isinstance(param, tuple):
627 param = unicode(param[2], param[0] or 'us-ascii')
628
629 In any case, the parameter value (either the returned string, or the
630 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
631 to False.
632 """
633 if header not in self:
634 return failobj
635 for k, v in self._get_params_preserve(failobj, header):
636 if k.lower() == param.lower():
637 if unquote:
638 return _unquotevalue(v)
639 else:
640 return v
641 return failobj
642
643 def set_param(self, param, value, header='Content-Type', requote=True,
644 charset=None, language=''):
645 """Set a parameter in the Content-Type header.
646
647 If the parameter already exists in the header, its value will be
648 replaced with the new value.
649
650 If header is Content-Type and has not yet been defined for this
651 message, it will be set to "text/plain" and the new parameter and
652 value will be appended as per RFC 2045.
653
654 An alternate header can specified in the header argument, and all
655 parameters will be quoted as necessary unless requote is False.
656
657 If charset is specified, the parameter will be encoded according to RFC
658 2231. Optional language specifies the RFC 2231 language, defaulting
659 to the empty string. Both charset and language should be strings.
660 """
661 if not isinstance(value, tuple) and charset:
662 value = (charset, language, value)
663
664 if header not in self and header.lower() == 'content-type':
665 ctype = 'text/plain'
666 else:
667 ctype = self.get(header)
668 if not self.get_param(param, header=header):
669 if not ctype:
670 ctype = _formatparam(param, value, requote)
671 else:
672 ctype = SEMISPACE.join(
673 [ctype, _formatparam(param, value, requote)])
674 else:
675 ctype = ''
676 for old_param, old_value in self.get_params(header=header,
677 unquote=requote):
678 append_param = ''
679 if old_param.lower() == param.lower():
680 append_param = _formatparam(param, value, requote)
681 else:
682 append_param = _formatparam(old_param, old_value, requote)
683 if not ctype:
684 ctype = append_param
685 else:
686 ctype = SEMISPACE.join([ctype, append_param])
687 if ctype != self.get(header):
688 del self[header]
689 self[header] = ctype
690
691 def del_param(self, param, header='content-type', requote=True):
692 """Remove the given parameter completely from the Content-Type header.
693
694 The header will be re-written in place without the parameter or its
695 value. All values will be quoted as necessary unless requote is
696 False. Optional header specifies an alternative to the Content-Type
697 header.
698 """
699 if header not in self:
700 return
701 new_ctype = ''
702 for p, v in self.get_params(header=header, unquote=requote):
703 if p.lower() != param.lower():
704 if not new_ctype:
705 new_ctype = _formatparam(p, v, requote)
706 else:
707 new_ctype = SEMISPACE.join([new_ctype,
708 _formatparam(p, v, requote)])
709 if new_ctype != self.get(header):
710 del self[header]
711 self[header] = new_ctype
712
713 def set_type(self, type, header='Content-Type', requote=True):
714 """Set the main type and subtype for the Content-Type header.
715
716 type must be a string in the form "maintype/subtype", otherwise a
717 ValueError is raised.
718
719 This method replaces the Content-Type header, keeping all the
720 parameters in place. If requote is False, this leaves the existing
721 header's quoting as is. Otherwise, the parameters will be quoted (the
722 default).
723
724 An alternative header can be specified in the header argument. When
725 the Content-Type header is set, we'll always also add a MIME-Version
726 header.
727 """
728 # BAW: should we be strict?
729 if not type.count('/') == 1:
730 raise ValueError
731 # Set the Content-Type, you get a MIME-Version
732 if header.lower() == 'content-type':
733 del self['mime-version']
734 self['MIME-Version'] = '1.0'
735 if header not in self:
736 self[header] = type
737 return
738 params = self.get_params(header=header, unquote=requote)
739 del self[header]
740 self[header] = type
741 # Skip the first param; it's the old type.
742 for p, v in params[1:]:
743 self.set_param(p, v, header, requote)
744
745 def get_filename(self, failobj=None):
746 """Return the filename associated with the payload if present.
747
748 The filename is extracted from the Content-Disposition header's
749 `filename' parameter, and it is unquoted. If that header is missing
750 the `filename' parameter, this method falls back to looking for the
751 `name' parameter.
752 """
753 missing = object()
754 filename = self.get_param('filename', missing, 'content-disposition')
755 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000756 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757 if filename is missing:
758 return failobj
759 return utils.collapse_rfc2231_value(filename).strip()
760
761 def get_boundary(self, failobj=None):
762 """Return the boundary associated with the payload if present.
763
764 The boundary is extracted from the Content-Type header's `boundary'
765 parameter, and it is unquoted.
766 """
767 missing = object()
768 boundary = self.get_param('boundary', missing)
769 if boundary is missing:
770 return failobj
771 # RFC 2046 says that boundaries may begin but not end in w/s
772 return utils.collapse_rfc2231_value(boundary).rstrip()
773
774 def set_boundary(self, boundary):
775 """Set the boundary parameter in Content-Type to 'boundary'.
776
777 This is subtly different than deleting the Content-Type header and
778 adding a new one with a new boundary parameter via add_header(). The
779 main difference is that using the set_boundary() method preserves the
780 order of the Content-Type header in the original message.
781
782 HeaderParseError is raised if the message has no Content-Type header.
783 """
784 missing = object()
785 params = self._get_params_preserve(missing, 'content-type')
786 if params is missing:
787 # There was no Content-Type header, and we don't know what type
788 # to set it to, so raise an exception.
789 raise errors.HeaderParseError('No Content-Type header found')
790 newparams = []
791 foundp = False
792 for pk, pv in params:
793 if pk.lower() == 'boundary':
794 newparams.append(('boundary', '"%s"' % boundary))
795 foundp = True
796 else:
797 newparams.append((pk, pv))
798 if not foundp:
799 # The original Content-Type header had no boundary attribute.
800 # Tack one on the end. BAW: should we raise an exception
801 # instead???
802 newparams.append(('boundary', '"%s"' % boundary))
803 # Replace the existing Content-Type header with the new value
804 newheaders = []
805 for h, v in self._headers:
806 if h.lower() == 'content-type':
807 parts = []
808 for k, v in newparams:
809 if v == '':
810 parts.append(k)
811 else:
812 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400813 val = SEMISPACE.join(parts)
814 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000815
816 else:
817 newheaders.append((h, v))
818 self._headers = newheaders
819
820 def get_content_charset(self, failobj=None):
821 """Return the charset parameter of the Content-Type header.
822
823 The returned string is always coerced to lower case. If there is no
824 Content-Type header, or if that header has no charset parameter,
825 failobj is returned.
826 """
827 missing = object()
828 charset = self.get_param('charset', missing)
829 if charset is missing:
830 return failobj
831 if isinstance(charset, tuple):
832 # RFC 2231 encoded, so decode it, and it better end up as ascii.
833 pcharset = charset[0] or 'us-ascii'
834 try:
835 # LookupError will be raised if the charset isn't known to
836 # Python. UnicodeError will be raised if the encoded text
837 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000838 as_bytes = charset[2].encode('raw-unicode-escape')
839 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000840 except (LookupError, UnicodeError):
841 charset = charset[2]
842 # charset characters must be in us-ascii range
843 try:
844 charset.encode('us-ascii')
845 except UnicodeError:
846 return failobj
847 # RFC 2046, $4.1.2 says charsets are not case sensitive
848 return charset.lower()
849
850 def get_charsets(self, failobj=None):
851 """Return a list containing the charset(s) used in this message.
852
853 The returned list of items describes the Content-Type headers'
854 charset parameter for this message and all the subparts in its
855 payload.
856
857 Each item will either be a string (the value of the charset parameter
858 in the Content-Type header of that part) or the value of the
859 'failobj' parameter (defaults to None), if the part does not have a
860 main MIME type of "text", or the charset is not defined.
861
862 The list will contain one string for each part of the message, plus
863 one for the container message (i.e. self), so that a non-multipart
864 message will still return a list of length 1.
865 """
866 return [part.get_content_charset(failobj) for part in self.walk()]
867
868 # I.e. def walk(self): ...
869 from email.iterators import walk