blob: d2483cacf69a9850d79655e9cd375ac157f18f05 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
Guido van Rossum9604e662007-08-30 03:46:43 +000019from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
R. David Murray96fd54e2010-10-08 15:55:28 +000027# How to figure out if we are processing strings that come from a byte
28# source with undecodable characters.
29_has_surrogates = re.compile(
30 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
31
Guido van Rossum8b3febe2007-08-30 01:15:14 +000032
Guido van Rossum8b3febe2007-08-30 01:15:14 +000033# Helper functions
R. David Murray96fd54e2010-10-08 15:55:28 +000034def _sanitize_surrogates(value):
35 # If the value contains surrogates, re-decode and replace the original
36 # non-ascii bytes with '?'s. Used to sanitize header values before letting
37 # them escape as strings.
38 if not isinstance(value, str):
39 # Header object
40 return value
41 if _has_surrogates(value):
42 original_bytes = value.encode('ascii', 'surrogateescape')
43 return original_bytes.decode('ascii', 'replace').replace('\ufffd', '?')
44 else:
45 return value
46
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000047def _splitparam(param):
48 # Split header parameters. BAW: this may be too simple. It isn't
49 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
50 # found in the wild. We may eventually need a full fledged parser
51 # eventually.
52 a, sep, b = param.partition(';')
53 if not sep:
54 return a.strip(), None
55 return a.strip(), b.strip()
56
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057def _formatparam(param, value=None, quote=True):
58 """Convenience function to format and return a key=value pair.
59
R. David Murray7ec754b2010-12-13 23:51:19 +000060 This will quote the value if needed or if quote is true. If value is a
61 three tuple (charset, language, value), it will be encoded according
62 to RFC2231 rules. If it contains non-ascii characters it will likewise
63 be encoded according to RFC2231 rules, using the utf-8 charset and
64 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 """
66 if value is not None and len(value) > 0:
67 # A tuple is used for RFC 2231 encoded parameter values where items
68 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000069 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000070 if isinstance(value, tuple):
71 # Encode as per RFC 2231
72 param += '*'
73 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000074 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000075 else:
76 try:
77 value.encode('ascii')
78 except UnicodeEncodeError:
79 param += '*'
80 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000081 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 # BAW: Please check this. I think that if quote is set it should
83 # force quoting even if not necessary.
84 if quote or tspecials.search(value):
85 return '%s="%s"' % (param, utils.quote(value))
86 else:
87 return '%s=%s' % (param, value)
88 else:
89 return param
90
91def _parseparam(s):
92 plist = []
93 while s[:1] == ';':
94 s = s[1:]
95 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000096 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000097 end = s.find(';', end + 1)
98 if end < 0:
99 end = len(s)
100 f = s[:end]
101 if '=' in f:
102 i = f.index('=')
103 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
104 plist.append(f.strip())
105 s = s[end:]
106 return plist
107
108
109def _unquotevalue(value):
110 # This is different than utils.collapse_rfc2231_value() because it doesn't
111 # try to convert the value to a unicode. Message.get_param() and
112 # Message.get_params() are both currently defined to return the tuple in
113 # the face of RFC 2231 parameters.
114 if isinstance(value, tuple):
115 return value[0], value[1], utils.unquote(value[2])
116 else:
117 return utils.unquote(value)
118
119
120
121class Message:
122 """Basic message object.
123
124 A message object is defined as something that has a bunch of RFC 2822
125 headers and a payload. It may optionally have an envelope header
126 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
127 multipart or a message/rfc822), then the payload is a list of Message
128 objects, otherwise it is a string.
129
130 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000131 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000132 do in fact appear multiple times (e.g. Received) and for those headers,
133 you must use the explicit API to set or get all the headers. Not all of
134 the mapping methods are implemented.
135 """
136 def __init__(self):
137 self._headers = []
138 self._unixfrom = None
139 self._payload = None
140 self._charset = None
141 # Defaults for multipart messages
142 self.preamble = self.epilogue = None
143 self.defects = []
144 # Default content type
145 self._default_type = 'text/plain'
146
147 def __str__(self):
148 """Return the entire formatted message as a string.
149 This includes the headers, body, and envelope header.
150 """
151 return self.as_string()
152
153 def as_string(self, unixfrom=False, maxheaderlen=0):
154 """Return the entire formatted message as a string.
155 Optional `unixfrom' when True, means include the Unix From_ envelope
156 header.
157
158 This is a convenience method and may not generate the message exactly
159 as you intend because by default it mangles lines that begin with
160 "From ". For more flexibility, use the flatten() method of a
161 Generator instance.
162 """
163 from email.generator import Generator
164 fp = StringIO()
165 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
166 g.flatten(self, unixfrom=unixfrom)
167 return fp.getvalue()
168
169 def is_multipart(self):
170 """Return True if the message consists of multiple parts."""
171 return isinstance(self._payload, list)
172
173 #
174 # Unix From_ line
175 #
176 def set_unixfrom(self, unixfrom):
177 self._unixfrom = unixfrom
178
179 def get_unixfrom(self):
180 return self._unixfrom
181
182 #
183 # Payload manipulation.
184 #
185 def attach(self, payload):
186 """Add the given payload to the current payload.
187
188 The current payload will always be a list of objects after this method
189 is called. If you want to set the payload to a scalar object, use
190 set_payload() instead.
191 """
192 if self._payload is None:
193 self._payload = [payload]
194 else:
195 self._payload.append(payload)
196
197 def get_payload(self, i=None, decode=False):
198 """Return a reference to the payload.
199
200 The payload will either be a list object or a string. If you mutate
201 the list object, you modify the message's payload in place. Optional
202 i returns that index into the payload.
203
204 Optional decode is a flag indicating whether the payload should be
205 decoded or not, according to the Content-Transfer-Encoding header
206 (default is False).
207
208 When True and the message is not a multipart, the payload will be
209 decoded if this header's value is `quoted-printable' or `base64'. If
210 some other encoding is used, or the header is missing, or if the
211 payload has bogus data (i.e. bogus base64 or uuencoded data), the
212 payload is returned as-is.
213
214 If the message is a multipart and the decode flag is True, then None
215 is returned.
216 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000217 # Here is the logic table for this code, based on the email5.0.0 code:
218 # i decode is_multipart result
219 # ------ ------ ------------ ------------------------------
220 # None True True None
221 # i True True None
222 # None False True _payload (a list)
223 # i False True _payload element i (a Message)
224 # i False False error (not a list)
225 # i True False error (not a list)
226 # None False False _payload
227 # None True False _payload decoded (bytes)
228 # Note that Barry planned to factor out the 'decode' case, but that
229 # isn't so easy now that we handle the 8 bit data, which needs to be
230 # converted in both the decode and non-decode path.
231 if self.is_multipart():
232 if decode:
233 return None
234 if i is None:
235 return self._payload
236 else:
237 return self._payload[i]
238 # For backward compatibility, Use isinstance and this error message
239 # instead of the more logical is_multipart test.
240 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000241 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000242 payload = self._payload
243 cte = self.get('content-transfer-encoding', '').lower()
244 # payload can be bytes here, (I wonder if that is actually a bug?)
245 if isinstance(payload, str):
246 if _has_surrogates(payload):
247 bpayload = payload.encode('ascii', 'surrogateescape')
248 if not decode:
249 try:
250 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
251 except LookupError:
252 payload = bpayload.decode('ascii', 'replace')
253 elif decode:
254 try:
255 bpayload = payload.encode('ascii')
256 except UnicodeError:
257 # This won't happen for RFC compliant messages (messages
258 # containing only ASCII codepoints in the unicode input).
259 # If it does happen, turn the string into bytes in a way
260 # guaranteed not to fail.
261 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000262 if not decode:
263 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000265 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000266 elif cte == 'base64':
267 try:
R. David Murray96fd54e2010-10-08 15:55:28 +0000268 return base64.b64decode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000269 except binascii.Error:
270 # Incorrect padding
R. David Murray96fd54e2010-10-08 15:55:28 +0000271 return bpayload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000272 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000273 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 out_file = BytesIO()
275 try:
276 uu.decode(in_file, out_file, quiet=True)
277 return out_file.getvalue()
278 except uu.Error:
279 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000280 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000281 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000282 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000283 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000284
285 def set_payload(self, payload, charset=None):
286 """Set the payload to the given value.
287
288 Optional charset sets the message's default character set. See
289 set_charset() for details.
290 """
291 self._payload = payload
292 if charset is not None:
293 self.set_charset(charset)
294
295 def set_charset(self, charset):
296 """Set the charset of the payload to a given character set.
297
298 charset can be a Charset instance, a string naming a character set, or
299 None. If it is a string it will be converted to a Charset instance.
300 If charset is None, the charset parameter will be removed from the
301 Content-Type field. Anything else will generate a TypeError.
302
303 The message will be assumed to be of type text/* encoded with
304 charset.input_charset. It will be converted to charset.output_charset
305 and encoded properly, if needed, when generating the plain text
306 representation of the message. MIME headers (MIME-Version,
307 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000308 """
309 if charset is None:
310 self.del_param('charset')
311 self._charset = None
312 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000313 if not isinstance(charset, Charset):
314 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000315 self._charset = charset
316 if 'MIME-Version' not in self:
317 self.add_header('MIME-Version', '1.0')
318 if 'Content-Type' not in self:
319 self.add_header('Content-Type', 'text/plain',
320 charset=charset.get_output_charset())
321 else:
322 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000323 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000324 self._payload = charset.body_encode(self._payload)
325 if 'Content-Transfer-Encoding' not in self:
326 cte = charset.get_body_encoding()
327 try:
328 cte(self)
329 except TypeError:
330 self._payload = charset.body_encode(self._payload)
331 self.add_header('Content-Transfer-Encoding', cte)
332
333 def get_charset(self):
334 """Return the Charset instance associated with the message's payload.
335 """
336 return self._charset
337
338 #
339 # MAPPING INTERFACE (partial)
340 #
341 def __len__(self):
342 """Return the total number of headers, including duplicates."""
343 return len(self._headers)
344
345 def __getitem__(self, name):
346 """Get a header value.
347
348 Return None if the header is missing instead of raising an exception.
349
350 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000351 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 the values matching a header field name.
353 """
354 return self.get(name)
355
356 def __setitem__(self, name, val):
357 """Set the value of a header.
358
359 Note: this does not overwrite an existing header with the same field
360 name. Use __delitem__() first to delete any existing headers.
361 """
362 self._headers.append((name, val))
363
364 def __delitem__(self, name):
365 """Delete all occurrences of a header, if present.
366
367 Does not raise an exception if the header is missing.
368 """
369 name = name.lower()
370 newheaders = []
371 for k, v in self._headers:
372 if k.lower() != name:
373 newheaders.append((k, v))
374 self._headers = newheaders
375
376 def __contains__(self, name):
377 return name.lower() in [k.lower() for k, v in self._headers]
378
379 def __iter__(self):
380 for field, value in self._headers:
381 yield field
382
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000383 def keys(self):
384 """Return a list of all the message's header field names.
385
386 These will be sorted in the order they appeared in the original
387 message, or were added to the message, and may contain duplicates.
388 Any fields deleted and re-inserted are always appended to the header
389 list.
390 """
391 return [k for k, v in self._headers]
392
393 def values(self):
394 """Return a list of all the message's header values.
395
396 These will be sorted in the order they appeared in the original
397 message, or were added to the message, and may contain duplicates.
398 Any fields deleted and re-inserted are always appended to the header
399 list.
400 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000401 return [_sanitize_surrogates(v) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000402
403 def items(self):
404 """Get all the message's header fields and values.
405
406 These will be sorted in the order they appeared in the original
407 message, or were added to the message, and may contain duplicates.
408 Any fields deleted and re-inserted are always appended to the header
409 list.
410 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000411 return [(k, _sanitize_surrogates(v)) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000412
413 def get(self, name, failobj=None):
414 """Get a header value.
415
416 Like __getitem__() but return failobj instead of None when the field
417 is missing.
418 """
419 name = name.lower()
420 for k, v in self._headers:
421 if k.lower() == name:
R. David Murray96fd54e2010-10-08 15:55:28 +0000422 return _sanitize_surrogates(v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000423 return failobj
424
425 #
426 # Additional useful stuff
427 #
428
429 def get_all(self, name, failobj=None):
430 """Return a list of all the values for the named field.
431
432 These will be sorted in the order they appeared in the original
433 message, and may contain duplicates. Any fields deleted and
434 re-inserted are always appended to the header list.
435
436 If no such fields exist, failobj is returned (defaults to None).
437 """
438 values = []
439 name = name.lower()
440 for k, v in self._headers:
441 if k.lower() == name:
R. David Murray96fd54e2010-10-08 15:55:28 +0000442 values.append(_sanitize_surrogates(v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000443 if not values:
444 return failobj
445 return values
446
447 def add_header(self, _name, _value, **_params):
448 """Extended header setting.
449
450 name is the header field to add. keyword arguments can be used to set
451 additional parameters for the header field, with underscores converted
452 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000453 value is None, in which case only the key will be added. If a
454 parameter value contains non-ASCII characters it can be specified as a
455 three-tuple of (charset, language, value), in which case it will be
456 encoded according to RFC2231 rules. Otherwise it will be encoded using
457 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000458
R. David Murray7ec754b2010-12-13 23:51:19 +0000459 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000460
461 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000462 msg.add_header('content-disposition', 'attachment',
463 filename=('utf-8', '', Fußballer.ppt'))
464 msg.add_header('content-disposition', 'attachment',
465 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000466 """
467 parts = []
468 for k, v in _params.items():
469 if v is None:
470 parts.append(k.replace('_', '-'))
471 else:
472 parts.append(_formatparam(k.replace('_', '-'), v))
473 if _value is not None:
474 parts.insert(0, _value)
475 self._headers.append((_name, SEMISPACE.join(parts)))
476
477 def replace_header(self, _name, _value):
478 """Replace a header.
479
480 Replace the first matching header found in the message, retaining
481 header order and case. If no matching header was found, a KeyError is
482 raised.
483 """
484 _name = _name.lower()
485 for i, (k, v) in zip(range(len(self._headers)), self._headers):
486 if k.lower() == _name:
487 self._headers[i] = (k, _value)
488 break
489 else:
490 raise KeyError(_name)
491
492 #
493 # Use these three methods instead of the three above.
494 #
495
496 def get_content_type(self):
497 """Return the message's content type.
498
499 The returned string is coerced to lower case of the form
500 `maintype/subtype'. If there was no Content-Type header in the
501 message, the default type as given by get_default_type() will be
502 returned. Since according to RFC 2045, messages always have a default
503 type this will always return a value.
504
505 RFC 2045 defines a message's default type to be text/plain unless it
506 appears inside a multipart/digest container, in which case it would be
507 message/rfc822.
508 """
509 missing = object()
510 value = self.get('content-type', missing)
511 if value is missing:
512 # This should have no parameters
513 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000514 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000515 # RFC 2045, section 5.2 says if its invalid, use text/plain
516 if ctype.count('/') != 1:
517 return 'text/plain'
518 return ctype
519
520 def get_content_maintype(self):
521 """Return the message's main content type.
522
523 This is the `maintype' part of the string returned by
524 get_content_type().
525 """
526 ctype = self.get_content_type()
527 return ctype.split('/')[0]
528
529 def get_content_subtype(self):
530 """Returns the message's sub-content type.
531
532 This is the `subtype' part of the string returned by
533 get_content_type().
534 """
535 ctype = self.get_content_type()
536 return ctype.split('/')[1]
537
538 def get_default_type(self):
539 """Return the `default' content type.
540
541 Most messages have a default content type of text/plain, except for
542 messages that are subparts of multipart/digest containers. Such
543 subparts have a default content type of message/rfc822.
544 """
545 return self._default_type
546
547 def set_default_type(self, ctype):
548 """Set the `default' content type.
549
550 ctype should be either "text/plain" or "message/rfc822", although this
551 is not enforced. The default content type is not stored in the
552 Content-Type header.
553 """
554 self._default_type = ctype
555
556 def _get_params_preserve(self, failobj, header):
557 # Like get_params() but preserves the quoting of values. BAW:
558 # should this be part of the public interface?
559 missing = object()
560 value = self.get(header, missing)
561 if value is missing:
562 return failobj
563 params = []
564 for p in _parseparam(';' + value):
565 try:
566 name, val = p.split('=', 1)
567 name = name.strip()
568 val = val.strip()
569 except ValueError:
570 # Must have been a bare attribute
571 name = p.strip()
572 val = ''
573 params.append((name, val))
574 params = utils.decode_params(params)
575 return params
576
577 def get_params(self, failobj=None, header='content-type', unquote=True):
578 """Return the message's Content-Type parameters, as a list.
579
580 The elements of the returned list are 2-tuples of key/value pairs, as
581 split on the `=' sign. The left hand side of the `=' is the key,
582 while the right hand side is the value. If there is no `=' sign in
583 the parameter the value is the empty string. The value is as
584 described in the get_param() method.
585
586 Optional failobj is the object to return if there is no Content-Type
587 header. Optional header is the header to search instead of
588 Content-Type. If unquote is True, the value is unquoted.
589 """
590 missing = object()
591 params = self._get_params_preserve(missing, header)
592 if params is missing:
593 return failobj
594 if unquote:
595 return [(k, _unquotevalue(v)) for k, v in params]
596 else:
597 return params
598
599 def get_param(self, param, failobj=None, header='content-type',
600 unquote=True):
601 """Return the parameter value if found in the Content-Type header.
602
603 Optional failobj is the object to return if there is no Content-Type
604 header, or the Content-Type header has no such parameter. Optional
605 header is the header to search instead of Content-Type.
606
607 Parameter keys are always compared case insensitively. The return
608 value can either be a string, or a 3-tuple if the parameter was RFC
609 2231 encoded. When it's a 3-tuple, the elements of the value are of
610 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
611 LANGUAGE can be None, in which case you should consider VALUE to be
612 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
613
614 Your application should be prepared to deal with 3-tuple return
615 values, and can convert the parameter to a Unicode string like so:
616
617 param = msg.get_param('foo')
618 if isinstance(param, tuple):
619 param = unicode(param[2], param[0] or 'us-ascii')
620
621 In any case, the parameter value (either the returned string, or the
622 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
623 to False.
624 """
625 if header not in self:
626 return failobj
627 for k, v in self._get_params_preserve(failobj, header):
628 if k.lower() == param.lower():
629 if unquote:
630 return _unquotevalue(v)
631 else:
632 return v
633 return failobj
634
635 def set_param(self, param, value, header='Content-Type', requote=True,
636 charset=None, language=''):
637 """Set a parameter in the Content-Type header.
638
639 If the parameter already exists in the header, its value will be
640 replaced with the new value.
641
642 If header is Content-Type and has not yet been defined for this
643 message, it will be set to "text/plain" and the new parameter and
644 value will be appended as per RFC 2045.
645
646 An alternate header can specified in the header argument, and all
647 parameters will be quoted as necessary unless requote is False.
648
649 If charset is specified, the parameter will be encoded according to RFC
650 2231. Optional language specifies the RFC 2231 language, defaulting
651 to the empty string. Both charset and language should be strings.
652 """
653 if not isinstance(value, tuple) and charset:
654 value = (charset, language, value)
655
656 if header not in self and header.lower() == 'content-type':
657 ctype = 'text/plain'
658 else:
659 ctype = self.get(header)
660 if not self.get_param(param, header=header):
661 if not ctype:
662 ctype = _formatparam(param, value, requote)
663 else:
664 ctype = SEMISPACE.join(
665 [ctype, _formatparam(param, value, requote)])
666 else:
667 ctype = ''
668 for old_param, old_value in self.get_params(header=header,
669 unquote=requote):
670 append_param = ''
671 if old_param.lower() == param.lower():
672 append_param = _formatparam(param, value, requote)
673 else:
674 append_param = _formatparam(old_param, old_value, requote)
675 if not ctype:
676 ctype = append_param
677 else:
678 ctype = SEMISPACE.join([ctype, append_param])
679 if ctype != self.get(header):
680 del self[header]
681 self[header] = ctype
682
683 def del_param(self, param, header='content-type', requote=True):
684 """Remove the given parameter completely from the Content-Type header.
685
686 The header will be re-written in place without the parameter or its
687 value. All values will be quoted as necessary unless requote is
688 False. Optional header specifies an alternative to the Content-Type
689 header.
690 """
691 if header not in self:
692 return
693 new_ctype = ''
694 for p, v in self.get_params(header=header, unquote=requote):
695 if p.lower() != param.lower():
696 if not new_ctype:
697 new_ctype = _formatparam(p, v, requote)
698 else:
699 new_ctype = SEMISPACE.join([new_ctype,
700 _formatparam(p, v, requote)])
701 if new_ctype != self.get(header):
702 del self[header]
703 self[header] = new_ctype
704
705 def set_type(self, type, header='Content-Type', requote=True):
706 """Set the main type and subtype for the Content-Type header.
707
708 type must be a string in the form "maintype/subtype", otherwise a
709 ValueError is raised.
710
711 This method replaces the Content-Type header, keeping all the
712 parameters in place. If requote is False, this leaves the existing
713 header's quoting as is. Otherwise, the parameters will be quoted (the
714 default).
715
716 An alternative header can be specified in the header argument. When
717 the Content-Type header is set, we'll always also add a MIME-Version
718 header.
719 """
720 # BAW: should we be strict?
721 if not type.count('/') == 1:
722 raise ValueError
723 # Set the Content-Type, you get a MIME-Version
724 if header.lower() == 'content-type':
725 del self['mime-version']
726 self['MIME-Version'] = '1.0'
727 if header not in self:
728 self[header] = type
729 return
730 params = self.get_params(header=header, unquote=requote)
731 del self[header]
732 self[header] = type
733 # Skip the first param; it's the old type.
734 for p, v in params[1:]:
735 self.set_param(p, v, header, requote)
736
737 def get_filename(self, failobj=None):
738 """Return the filename associated with the payload if present.
739
740 The filename is extracted from the Content-Disposition header's
741 `filename' parameter, and it is unquoted. If that header is missing
742 the `filename' parameter, this method falls back to looking for the
743 `name' parameter.
744 """
745 missing = object()
746 filename = self.get_param('filename', missing, 'content-disposition')
747 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000748 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000749 if filename is missing:
750 return failobj
751 return utils.collapse_rfc2231_value(filename).strip()
752
753 def get_boundary(self, failobj=None):
754 """Return the boundary associated with the payload if present.
755
756 The boundary is extracted from the Content-Type header's `boundary'
757 parameter, and it is unquoted.
758 """
759 missing = object()
760 boundary = self.get_param('boundary', missing)
761 if boundary is missing:
762 return failobj
763 # RFC 2046 says that boundaries may begin but not end in w/s
764 return utils.collapse_rfc2231_value(boundary).rstrip()
765
766 def set_boundary(self, boundary):
767 """Set the boundary parameter in Content-Type to 'boundary'.
768
769 This is subtly different than deleting the Content-Type header and
770 adding a new one with a new boundary parameter via add_header(). The
771 main difference is that using the set_boundary() method preserves the
772 order of the Content-Type header in the original message.
773
774 HeaderParseError is raised if the message has no Content-Type header.
775 """
776 missing = object()
777 params = self._get_params_preserve(missing, 'content-type')
778 if params is missing:
779 # There was no Content-Type header, and we don't know what type
780 # to set it to, so raise an exception.
781 raise errors.HeaderParseError('No Content-Type header found')
782 newparams = []
783 foundp = False
784 for pk, pv in params:
785 if pk.lower() == 'boundary':
786 newparams.append(('boundary', '"%s"' % boundary))
787 foundp = True
788 else:
789 newparams.append((pk, pv))
790 if not foundp:
791 # The original Content-Type header had no boundary attribute.
792 # Tack one on the end. BAW: should we raise an exception
793 # instead???
794 newparams.append(('boundary', '"%s"' % boundary))
795 # Replace the existing Content-Type header with the new value
796 newheaders = []
797 for h, v in self._headers:
798 if h.lower() == 'content-type':
799 parts = []
800 for k, v in newparams:
801 if v == '':
802 parts.append(k)
803 else:
804 parts.append('%s=%s' % (k, v))
805 newheaders.append((h, SEMISPACE.join(parts)))
806
807 else:
808 newheaders.append((h, v))
809 self._headers = newheaders
810
811 def get_content_charset(self, failobj=None):
812 """Return the charset parameter of the Content-Type header.
813
814 The returned string is always coerced to lower case. If there is no
815 Content-Type header, or if that header has no charset parameter,
816 failobj is returned.
817 """
818 missing = object()
819 charset = self.get_param('charset', missing)
820 if charset is missing:
821 return failobj
822 if isinstance(charset, tuple):
823 # RFC 2231 encoded, so decode it, and it better end up as ascii.
824 pcharset = charset[0] or 'us-ascii'
825 try:
826 # LookupError will be raised if the charset isn't known to
827 # Python. UnicodeError will be raised if the encoded text
828 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000829 as_bytes = charset[2].encode('raw-unicode-escape')
830 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000831 except (LookupError, UnicodeError):
832 charset = charset[2]
833 # charset characters must be in us-ascii range
834 try:
835 charset.encode('us-ascii')
836 except UnicodeError:
837 return failobj
838 # RFC 2046, $4.1.2 says charsets are not case sensitive
839 return charset.lower()
840
841 def get_charsets(self, failobj=None):
842 """Return a list containing the charset(s) used in this message.
843
844 The returned list of items describes the Content-Type headers'
845 charset parameter for this message and all the subparts in its
846 payload.
847
848 Each item will either be a string (the value of the charset parameter
849 in the Content-Type header of that part) or the value of the
850 'failobj' parameter (defaults to None), if the part does not have a
851 main MIME type of "text", or the charset is not defined.
852
853 The list will contain one string for each part of the message, plus
854 one for the container message (i.e. self), so that a non-multipart
855 message will still return a list of length 1.
856 """
857 return [part.get_content_charset(failobj) for part in self.walk()]
858
859 # I.e. def walk(self): ...
860 from email.iterators import walk