blob: 8d68c093cc95f608bc269d67a8f9ba313e6ffcff [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
Guido van Rossum9604e662007-08-30 03:46:43 +000019from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
R. David Murray96fd54e2010-10-08 15:55:28 +000027# How to figure out if we are processing strings that come from a byte
28# source with undecodable characters.
29_has_surrogates = re.compile(
30 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
31
Guido van Rossum8b3febe2007-08-30 01:15:14 +000032
Guido van Rossum8b3febe2007-08-30 01:15:14 +000033# Helper functions
R. David Murray96fd54e2010-10-08 15:55:28 +000034def _sanitize_surrogates(value):
35 # If the value contains surrogates, re-decode and replace the original
36 # non-ascii bytes with '?'s. Used to sanitize header values before letting
37 # them escape as strings.
38 if not isinstance(value, str):
39 # Header object
40 return value
41 if _has_surrogates(value):
42 original_bytes = value.encode('ascii', 'surrogateescape')
43 return original_bytes.decode('ascii', 'replace').replace('\ufffd', '?')
44 else:
45 return value
46
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000047def _splitparam(param):
48 # Split header parameters. BAW: this may be too simple. It isn't
49 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
50 # found in the wild. We may eventually need a full fledged parser
51 # eventually.
52 a, sep, b = param.partition(';')
53 if not sep:
54 return a.strip(), None
55 return a.strip(), b.strip()
56
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057def _formatparam(param, value=None, quote=True):
58 """Convenience function to format and return a key=value pair.
59
R. David Murray7ec754b2010-12-13 23:51:19 +000060 This will quote the value if needed or if quote is true. If value is a
61 three tuple (charset, language, value), it will be encoded according
62 to RFC2231 rules. If it contains non-ascii characters it will likewise
63 be encoded according to RFC2231 rules, using the utf-8 charset and
64 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 """
66 if value is not None and len(value) > 0:
67 # A tuple is used for RFC 2231 encoded parameter values where items
68 # are (charset, language, value). charset is a string, not a Charset
69 # instance.
70 if isinstance(value, tuple):
71 # Encode as per RFC 2231
72 param += '*'
73 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray7ec754b2010-12-13 23:51:19 +000074 else:
75 try:
76 value.encode('ascii')
77 except UnicodeEncodeError:
78 param += '*'
79 value = utils.encode_rfc2231(value, 'utf-8', '')
Guido van Rossum8b3febe2007-08-30 01:15:14 +000080 # BAW: Please check this. I think that if quote is set it should
81 # force quoting even if not necessary.
82 if quote or tspecials.search(value):
83 return '%s="%s"' % (param, utils.quote(value))
84 else:
85 return '%s=%s' % (param, value)
86 else:
87 return param
88
89def _parseparam(s):
90 plist = []
91 while s[:1] == ';':
92 s = s[1:]
93 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000094 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000095 end = s.find(';', end + 1)
96 if end < 0:
97 end = len(s)
98 f = s[:end]
99 if '=' in f:
100 i = f.index('=')
101 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
102 plist.append(f.strip())
103 s = s[end:]
104 return plist
105
106
107def _unquotevalue(value):
108 # This is different than utils.collapse_rfc2231_value() because it doesn't
109 # try to convert the value to a unicode. Message.get_param() and
110 # Message.get_params() are both currently defined to return the tuple in
111 # the face of RFC 2231 parameters.
112 if isinstance(value, tuple):
113 return value[0], value[1], utils.unquote(value[2])
114 else:
115 return utils.unquote(value)
116
117
118
119class Message:
120 """Basic message object.
121
122 A message object is defined as something that has a bunch of RFC 2822
123 headers and a payload. It may optionally have an envelope header
124 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
125 multipart or a message/rfc822), then the payload is a list of Message
126 objects, otherwise it is a string.
127
128 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000129 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000130 do in fact appear multiple times (e.g. Received) and for those headers,
131 you must use the explicit API to set or get all the headers. Not all of
132 the mapping methods are implemented.
133 """
134 def __init__(self):
135 self._headers = []
136 self._unixfrom = None
137 self._payload = None
138 self._charset = None
139 # Defaults for multipart messages
140 self.preamble = self.epilogue = None
141 self.defects = []
142 # Default content type
143 self._default_type = 'text/plain'
144
145 def __str__(self):
146 """Return the entire formatted message as a string.
147 This includes the headers, body, and envelope header.
148 """
149 return self.as_string()
150
151 def as_string(self, unixfrom=False, maxheaderlen=0):
152 """Return the entire formatted message as a string.
153 Optional `unixfrom' when True, means include the Unix From_ envelope
154 header.
155
156 This is a convenience method and may not generate the message exactly
157 as you intend because by default it mangles lines that begin with
158 "From ". For more flexibility, use the flatten() method of a
159 Generator instance.
160 """
161 from email.generator import Generator
162 fp = StringIO()
163 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
164 g.flatten(self, unixfrom=unixfrom)
165 return fp.getvalue()
166
167 def is_multipart(self):
168 """Return True if the message consists of multiple parts."""
169 return isinstance(self._payload, list)
170
171 #
172 # Unix From_ line
173 #
174 def set_unixfrom(self, unixfrom):
175 self._unixfrom = unixfrom
176
177 def get_unixfrom(self):
178 return self._unixfrom
179
180 #
181 # Payload manipulation.
182 #
183 def attach(self, payload):
184 """Add the given payload to the current payload.
185
186 The current payload will always be a list of objects after this method
187 is called. If you want to set the payload to a scalar object, use
188 set_payload() instead.
189 """
190 if self._payload is None:
191 self._payload = [payload]
192 else:
193 self._payload.append(payload)
194
195 def get_payload(self, i=None, decode=False):
196 """Return a reference to the payload.
197
198 The payload will either be a list object or a string. If you mutate
199 the list object, you modify the message's payload in place. Optional
200 i returns that index into the payload.
201
202 Optional decode is a flag indicating whether the payload should be
203 decoded or not, according to the Content-Transfer-Encoding header
204 (default is False).
205
206 When True and the message is not a multipart, the payload will be
207 decoded if this header's value is `quoted-printable' or `base64'. If
208 some other encoding is used, or the header is missing, or if the
209 payload has bogus data (i.e. bogus base64 or uuencoded data), the
210 payload is returned as-is.
211
212 If the message is a multipart and the decode flag is True, then None
213 is returned.
214 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000215 # Here is the logic table for this code, based on the email5.0.0 code:
216 # i decode is_multipart result
217 # ------ ------ ------------ ------------------------------
218 # None True True None
219 # i True True None
220 # None False True _payload (a list)
221 # i False True _payload element i (a Message)
222 # i False False error (not a list)
223 # i True False error (not a list)
224 # None False False _payload
225 # None True False _payload decoded (bytes)
226 # Note that Barry planned to factor out the 'decode' case, but that
227 # isn't so easy now that we handle the 8 bit data, which needs to be
228 # converted in both the decode and non-decode path.
229 if self.is_multipart():
230 if decode:
231 return None
232 if i is None:
233 return self._payload
234 else:
235 return self._payload[i]
236 # For backward compatibility, Use isinstance and this error message
237 # instead of the more logical is_multipart test.
238 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000239 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000240 payload = self._payload
241 cte = self.get('content-transfer-encoding', '').lower()
242 # payload can be bytes here, (I wonder if that is actually a bug?)
243 if isinstance(payload, str):
244 if _has_surrogates(payload):
245 bpayload = payload.encode('ascii', 'surrogateescape')
246 if not decode:
247 try:
248 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
249 except LookupError:
250 payload = bpayload.decode('ascii', 'replace')
251 elif decode:
252 try:
253 bpayload = payload.encode('ascii')
254 except UnicodeError:
255 # This won't happen for RFC compliant messages (messages
256 # containing only ASCII codepoints in the unicode input).
257 # If it does happen, turn the string into bytes in a way
258 # guaranteed not to fail.
259 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 if not decode:
261 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000262 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000263 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264 elif cte == 'base64':
265 try:
R. David Murray96fd54e2010-10-08 15:55:28 +0000266 return base64.b64decode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000267 except binascii.Error:
268 # Incorrect padding
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return bpayload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000271 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000272 out_file = BytesIO()
273 try:
274 uu.decode(in_file, out_file, quiet=True)
275 return out_file.getvalue()
276 except uu.Error:
277 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000278 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000279 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000280 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000281 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000282
283 def set_payload(self, payload, charset=None):
284 """Set the payload to the given value.
285
286 Optional charset sets the message's default character set. See
287 set_charset() for details.
288 """
289 self._payload = payload
290 if charset is not None:
291 self.set_charset(charset)
292
293 def set_charset(self, charset):
294 """Set the charset of the payload to a given character set.
295
296 charset can be a Charset instance, a string naming a character set, or
297 None. If it is a string it will be converted to a Charset instance.
298 If charset is None, the charset parameter will be removed from the
299 Content-Type field. Anything else will generate a TypeError.
300
301 The message will be assumed to be of type text/* encoded with
302 charset.input_charset. It will be converted to charset.output_charset
303 and encoded properly, if needed, when generating the plain text
304 representation of the message. MIME headers (MIME-Version,
305 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000306 """
307 if charset is None:
308 self.del_param('charset')
309 self._charset = None
310 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000311 if not isinstance(charset, Charset):
312 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000313 self._charset = charset
314 if 'MIME-Version' not in self:
315 self.add_header('MIME-Version', '1.0')
316 if 'Content-Type' not in self:
317 self.add_header('Content-Type', 'text/plain',
318 charset=charset.get_output_charset())
319 else:
320 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000321 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000322 self._payload = charset.body_encode(self._payload)
323 if 'Content-Transfer-Encoding' not in self:
324 cte = charset.get_body_encoding()
325 try:
326 cte(self)
327 except TypeError:
328 self._payload = charset.body_encode(self._payload)
329 self.add_header('Content-Transfer-Encoding', cte)
330
331 def get_charset(self):
332 """Return the Charset instance associated with the message's payload.
333 """
334 return self._charset
335
336 #
337 # MAPPING INTERFACE (partial)
338 #
339 def __len__(self):
340 """Return the total number of headers, including duplicates."""
341 return len(self._headers)
342
343 def __getitem__(self, name):
344 """Get a header value.
345
346 Return None if the header is missing instead of raising an exception.
347
348 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000349 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 the values matching a header field name.
351 """
352 return self.get(name)
353
354 def __setitem__(self, name, val):
355 """Set the value of a header.
356
357 Note: this does not overwrite an existing header with the same field
358 name. Use __delitem__() first to delete any existing headers.
359 """
360 self._headers.append((name, val))
361
362 def __delitem__(self, name):
363 """Delete all occurrences of a header, if present.
364
365 Does not raise an exception if the header is missing.
366 """
367 name = name.lower()
368 newheaders = []
369 for k, v in self._headers:
370 if k.lower() != name:
371 newheaders.append((k, v))
372 self._headers = newheaders
373
374 def __contains__(self, name):
375 return name.lower() in [k.lower() for k, v in self._headers]
376
377 def __iter__(self):
378 for field, value in self._headers:
379 yield field
380
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000381 def keys(self):
382 """Return a list of all the message's header field names.
383
384 These will be sorted in the order they appeared in the original
385 message, or were added to the message, and may contain duplicates.
386 Any fields deleted and re-inserted are always appended to the header
387 list.
388 """
389 return [k for k, v in self._headers]
390
391 def values(self):
392 """Return a list of all the message's header values.
393
394 These will be sorted in the order they appeared in the original
395 message, or were added to the message, and may contain duplicates.
396 Any fields deleted and re-inserted are always appended to the header
397 list.
398 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000399 return [_sanitize_surrogates(v) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400
401 def items(self):
402 """Get all the message's header fields and values.
403
404 These will be sorted in the order they appeared in the original
405 message, or were added to the message, and may contain duplicates.
406 Any fields deleted and re-inserted are always appended to the header
407 list.
408 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000409 return [(k, _sanitize_surrogates(v)) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000410
411 def get(self, name, failobj=None):
412 """Get a header value.
413
414 Like __getitem__() but return failobj instead of None when the field
415 is missing.
416 """
417 name = name.lower()
418 for k, v in self._headers:
419 if k.lower() == name:
R. David Murray96fd54e2010-10-08 15:55:28 +0000420 return _sanitize_surrogates(v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000421 return failobj
422
423 #
424 # Additional useful stuff
425 #
426
427 def get_all(self, name, failobj=None):
428 """Return a list of all the values for the named field.
429
430 These will be sorted in the order they appeared in the original
431 message, and may contain duplicates. Any fields deleted and
432 re-inserted are always appended to the header list.
433
434 If no such fields exist, failobj is returned (defaults to None).
435 """
436 values = []
437 name = name.lower()
438 for k, v in self._headers:
439 if k.lower() == name:
R. David Murray96fd54e2010-10-08 15:55:28 +0000440 values.append(_sanitize_surrogates(v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000441 if not values:
442 return failobj
443 return values
444
445 def add_header(self, _name, _value, **_params):
446 """Extended header setting.
447
448 name is the header field to add. keyword arguments can be used to set
449 additional parameters for the header field, with underscores converted
450 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000451 value is None, in which case only the key will be added. If a
452 parameter value contains non-ASCII characters it can be specified as a
453 three-tuple of (charset, language, value), in which case it will be
454 encoded according to RFC2231 rules. Otherwise it will be encoded using
455 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000456
R. David Murray7ec754b2010-12-13 23:51:19 +0000457 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000458
459 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000460 msg.add_header('content-disposition', 'attachment',
461 filename=('utf-8', '', Fußballer.ppt'))
462 msg.add_header('content-disposition', 'attachment',
463 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000464 """
465 parts = []
466 for k, v in _params.items():
467 if v is None:
468 parts.append(k.replace('_', '-'))
469 else:
470 parts.append(_formatparam(k.replace('_', '-'), v))
471 if _value is not None:
472 parts.insert(0, _value)
473 self._headers.append((_name, SEMISPACE.join(parts)))
474
475 def replace_header(self, _name, _value):
476 """Replace a header.
477
478 Replace the first matching header found in the message, retaining
479 header order and case. If no matching header was found, a KeyError is
480 raised.
481 """
482 _name = _name.lower()
483 for i, (k, v) in zip(range(len(self._headers)), self._headers):
484 if k.lower() == _name:
485 self._headers[i] = (k, _value)
486 break
487 else:
488 raise KeyError(_name)
489
490 #
491 # Use these three methods instead of the three above.
492 #
493
494 def get_content_type(self):
495 """Return the message's content type.
496
497 The returned string is coerced to lower case of the form
498 `maintype/subtype'. If there was no Content-Type header in the
499 message, the default type as given by get_default_type() will be
500 returned. Since according to RFC 2045, messages always have a default
501 type this will always return a value.
502
503 RFC 2045 defines a message's default type to be text/plain unless it
504 appears inside a multipart/digest container, in which case it would be
505 message/rfc822.
506 """
507 missing = object()
508 value = self.get('content-type', missing)
509 if value is missing:
510 # This should have no parameters
511 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000512 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000513 # RFC 2045, section 5.2 says if its invalid, use text/plain
514 if ctype.count('/') != 1:
515 return 'text/plain'
516 return ctype
517
518 def get_content_maintype(self):
519 """Return the message's main content type.
520
521 This is the `maintype' part of the string returned by
522 get_content_type().
523 """
524 ctype = self.get_content_type()
525 return ctype.split('/')[0]
526
527 def get_content_subtype(self):
528 """Returns the message's sub-content type.
529
530 This is the `subtype' part of the string returned by
531 get_content_type().
532 """
533 ctype = self.get_content_type()
534 return ctype.split('/')[1]
535
536 def get_default_type(self):
537 """Return the `default' content type.
538
539 Most messages have a default content type of text/plain, except for
540 messages that are subparts of multipart/digest containers. Such
541 subparts have a default content type of message/rfc822.
542 """
543 return self._default_type
544
545 def set_default_type(self, ctype):
546 """Set the `default' content type.
547
548 ctype should be either "text/plain" or "message/rfc822", although this
549 is not enforced. The default content type is not stored in the
550 Content-Type header.
551 """
552 self._default_type = ctype
553
554 def _get_params_preserve(self, failobj, header):
555 # Like get_params() but preserves the quoting of values. BAW:
556 # should this be part of the public interface?
557 missing = object()
558 value = self.get(header, missing)
559 if value is missing:
560 return failobj
561 params = []
562 for p in _parseparam(';' + value):
563 try:
564 name, val = p.split('=', 1)
565 name = name.strip()
566 val = val.strip()
567 except ValueError:
568 # Must have been a bare attribute
569 name = p.strip()
570 val = ''
571 params.append((name, val))
572 params = utils.decode_params(params)
573 return params
574
575 def get_params(self, failobj=None, header='content-type', unquote=True):
576 """Return the message's Content-Type parameters, as a list.
577
578 The elements of the returned list are 2-tuples of key/value pairs, as
579 split on the `=' sign. The left hand side of the `=' is the key,
580 while the right hand side is the value. If there is no `=' sign in
581 the parameter the value is the empty string. The value is as
582 described in the get_param() method.
583
584 Optional failobj is the object to return if there is no Content-Type
585 header. Optional header is the header to search instead of
586 Content-Type. If unquote is True, the value is unquoted.
587 """
588 missing = object()
589 params = self._get_params_preserve(missing, header)
590 if params is missing:
591 return failobj
592 if unquote:
593 return [(k, _unquotevalue(v)) for k, v in params]
594 else:
595 return params
596
597 def get_param(self, param, failobj=None, header='content-type',
598 unquote=True):
599 """Return the parameter value if found in the Content-Type header.
600
601 Optional failobj is the object to return if there is no Content-Type
602 header, or the Content-Type header has no such parameter. Optional
603 header is the header to search instead of Content-Type.
604
605 Parameter keys are always compared case insensitively. The return
606 value can either be a string, or a 3-tuple if the parameter was RFC
607 2231 encoded. When it's a 3-tuple, the elements of the value are of
608 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
609 LANGUAGE can be None, in which case you should consider VALUE to be
610 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
611
612 Your application should be prepared to deal with 3-tuple return
613 values, and can convert the parameter to a Unicode string like so:
614
615 param = msg.get_param('foo')
616 if isinstance(param, tuple):
617 param = unicode(param[2], param[0] or 'us-ascii')
618
619 In any case, the parameter value (either the returned string, or the
620 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
621 to False.
622 """
623 if header not in self:
624 return failobj
625 for k, v in self._get_params_preserve(failobj, header):
626 if k.lower() == param.lower():
627 if unquote:
628 return _unquotevalue(v)
629 else:
630 return v
631 return failobj
632
633 def set_param(self, param, value, header='Content-Type', requote=True,
634 charset=None, language=''):
635 """Set a parameter in the Content-Type header.
636
637 If the parameter already exists in the header, its value will be
638 replaced with the new value.
639
640 If header is Content-Type and has not yet been defined for this
641 message, it will be set to "text/plain" and the new parameter and
642 value will be appended as per RFC 2045.
643
644 An alternate header can specified in the header argument, and all
645 parameters will be quoted as necessary unless requote is False.
646
647 If charset is specified, the parameter will be encoded according to RFC
648 2231. Optional language specifies the RFC 2231 language, defaulting
649 to the empty string. Both charset and language should be strings.
650 """
651 if not isinstance(value, tuple) and charset:
652 value = (charset, language, value)
653
654 if header not in self and header.lower() == 'content-type':
655 ctype = 'text/plain'
656 else:
657 ctype = self.get(header)
658 if not self.get_param(param, header=header):
659 if not ctype:
660 ctype = _formatparam(param, value, requote)
661 else:
662 ctype = SEMISPACE.join(
663 [ctype, _formatparam(param, value, requote)])
664 else:
665 ctype = ''
666 for old_param, old_value in self.get_params(header=header,
667 unquote=requote):
668 append_param = ''
669 if old_param.lower() == param.lower():
670 append_param = _formatparam(param, value, requote)
671 else:
672 append_param = _formatparam(old_param, old_value, requote)
673 if not ctype:
674 ctype = append_param
675 else:
676 ctype = SEMISPACE.join([ctype, append_param])
677 if ctype != self.get(header):
678 del self[header]
679 self[header] = ctype
680
681 def del_param(self, param, header='content-type', requote=True):
682 """Remove the given parameter completely from the Content-Type header.
683
684 The header will be re-written in place without the parameter or its
685 value. All values will be quoted as necessary unless requote is
686 False. Optional header specifies an alternative to the Content-Type
687 header.
688 """
689 if header not in self:
690 return
691 new_ctype = ''
692 for p, v in self.get_params(header=header, unquote=requote):
693 if p.lower() != param.lower():
694 if not new_ctype:
695 new_ctype = _formatparam(p, v, requote)
696 else:
697 new_ctype = SEMISPACE.join([new_ctype,
698 _formatparam(p, v, requote)])
699 if new_ctype != self.get(header):
700 del self[header]
701 self[header] = new_ctype
702
703 def set_type(self, type, header='Content-Type', requote=True):
704 """Set the main type and subtype for the Content-Type header.
705
706 type must be a string in the form "maintype/subtype", otherwise a
707 ValueError is raised.
708
709 This method replaces the Content-Type header, keeping all the
710 parameters in place. If requote is False, this leaves the existing
711 header's quoting as is. Otherwise, the parameters will be quoted (the
712 default).
713
714 An alternative header can be specified in the header argument. When
715 the Content-Type header is set, we'll always also add a MIME-Version
716 header.
717 """
718 # BAW: should we be strict?
719 if not type.count('/') == 1:
720 raise ValueError
721 # Set the Content-Type, you get a MIME-Version
722 if header.lower() == 'content-type':
723 del self['mime-version']
724 self['MIME-Version'] = '1.0'
725 if header not in self:
726 self[header] = type
727 return
728 params = self.get_params(header=header, unquote=requote)
729 del self[header]
730 self[header] = type
731 # Skip the first param; it's the old type.
732 for p, v in params[1:]:
733 self.set_param(p, v, header, requote)
734
735 def get_filename(self, failobj=None):
736 """Return the filename associated with the payload if present.
737
738 The filename is extracted from the Content-Disposition header's
739 `filename' parameter, and it is unquoted. If that header is missing
740 the `filename' parameter, this method falls back to looking for the
741 `name' parameter.
742 """
743 missing = object()
744 filename = self.get_param('filename', missing, 'content-disposition')
745 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000746 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000747 if filename is missing:
748 return failobj
749 return utils.collapse_rfc2231_value(filename).strip()
750
751 def get_boundary(self, failobj=None):
752 """Return the boundary associated with the payload if present.
753
754 The boundary is extracted from the Content-Type header's `boundary'
755 parameter, and it is unquoted.
756 """
757 missing = object()
758 boundary = self.get_param('boundary', missing)
759 if boundary is missing:
760 return failobj
761 # RFC 2046 says that boundaries may begin but not end in w/s
762 return utils.collapse_rfc2231_value(boundary).rstrip()
763
764 def set_boundary(self, boundary):
765 """Set the boundary parameter in Content-Type to 'boundary'.
766
767 This is subtly different than deleting the Content-Type header and
768 adding a new one with a new boundary parameter via add_header(). The
769 main difference is that using the set_boundary() method preserves the
770 order of the Content-Type header in the original message.
771
772 HeaderParseError is raised if the message has no Content-Type header.
773 """
774 missing = object()
775 params = self._get_params_preserve(missing, 'content-type')
776 if params is missing:
777 # There was no Content-Type header, and we don't know what type
778 # to set it to, so raise an exception.
779 raise errors.HeaderParseError('No Content-Type header found')
780 newparams = []
781 foundp = False
782 for pk, pv in params:
783 if pk.lower() == 'boundary':
784 newparams.append(('boundary', '"%s"' % boundary))
785 foundp = True
786 else:
787 newparams.append((pk, pv))
788 if not foundp:
789 # The original Content-Type header had no boundary attribute.
790 # Tack one on the end. BAW: should we raise an exception
791 # instead???
792 newparams.append(('boundary', '"%s"' % boundary))
793 # Replace the existing Content-Type header with the new value
794 newheaders = []
795 for h, v in self._headers:
796 if h.lower() == 'content-type':
797 parts = []
798 for k, v in newparams:
799 if v == '':
800 parts.append(k)
801 else:
802 parts.append('%s=%s' % (k, v))
803 newheaders.append((h, SEMISPACE.join(parts)))
804
805 else:
806 newheaders.append((h, v))
807 self._headers = newheaders
808
809 def get_content_charset(self, failobj=None):
810 """Return the charset parameter of the Content-Type header.
811
812 The returned string is always coerced to lower case. If there is no
813 Content-Type header, or if that header has no charset parameter,
814 failobj is returned.
815 """
816 missing = object()
817 charset = self.get_param('charset', missing)
818 if charset is missing:
819 return failobj
820 if isinstance(charset, tuple):
821 # RFC 2231 encoded, so decode it, and it better end up as ascii.
822 pcharset = charset[0] or 'us-ascii'
823 try:
824 # LookupError will be raised if the charset isn't known to
825 # Python. UnicodeError will be raised if the encoded text
826 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000827 as_bytes = charset[2].encode('raw-unicode-escape')
828 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000829 except (LookupError, UnicodeError):
830 charset = charset[2]
831 # charset characters must be in us-ascii range
832 try:
833 charset.encode('us-ascii')
834 except UnicodeError:
835 return failobj
836 # RFC 2046, $4.1.2 says charsets are not case sensitive
837 return charset.lower()
838
839 def get_charsets(self, failobj=None):
840 """Return a list containing the charset(s) used in this message.
841
842 The returned list of items describes the Content-Type headers'
843 charset parameter for this message and all the subparts in its
844 payload.
845
846 Each item will either be a string (the value of the charset parameter
847 in the Content-Type header of that part) or the value of the
848 'failobj' parameter (defaults to None), if the part does not have a
849 main MIME type of "text", or the charset is not defined.
850
851 The list will contain one string for each part of the message, plus
852 one for the container message (i.e. self), so that a non-multipart
853 message will still return a list of length 1.
854 """
855 return [part.get_content_charset(failobj) for part in self.walk()]
856
857 # I.e. def walk(self): ...
858 from email.iterators import walk