blob: 922617adbbba34b983904e4bee349b21521f7b5f [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
R. David Murray92532142011-01-07 23:25:30 +000019from email import header
20from email import charset as _charset
21Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029# How to figure out if we are processing strings that come from a byte
30# source with undecodable characters.
31_has_surrogates = re.compile(
32 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
33
Guido van Rossum8b3febe2007-08-30 01:15:14 +000034
Guido van Rossum8b3febe2007-08-30 01:15:14 +000035# Helper functions
R. David Murray92532142011-01-07 23:25:30 +000036def _sanitize_header(name, value):
37 # If the header value contains surrogates, return a Header using
38 # the unknown-8bit charset to encode the bytes as encoded words.
R. David Murray96fd54e2010-10-08 15:55:28 +000039 if not isinstance(value, str):
R. David Murray92532142011-01-07 23:25:30 +000040 # Assume it is already a header object
R. David Murray96fd54e2010-10-08 15:55:28 +000041 return value
42 if _has_surrogates(value):
R. David Murray92532142011-01-07 23:25:30 +000043 return header.Header(value, charset=_charset.UNKNOWN8BIT,
44 header_name=name)
R. David Murray96fd54e2010-10-08 15:55:28 +000045 else:
46 return value
47
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000048def _splitparam(param):
49 # Split header parameters. BAW: this may be too simple. It isn't
50 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040051 # found in the wild. We may eventually need a full fledged parser.
52 # RDM: we might have a Header here; for now just stringify it.
53 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000054 if not sep:
55 return a.strip(), None
56 return a.strip(), b.strip()
57
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058def _formatparam(param, value=None, quote=True):
59 """Convenience function to format and return a key=value pair.
60
R. David Murray7ec754b2010-12-13 23:51:19 +000061 This will quote the value if needed or if quote is true. If value is a
62 three tuple (charset, language, value), it will be encoded according
63 to RFC2231 rules. If it contains non-ascii characters it will likewise
64 be encoded according to RFC2231 rules, using the utf-8 charset and
65 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 """
67 if value is not None and len(value) > 0:
68 # A tuple is used for RFC 2231 encoded parameter values where items
69 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000070 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071 if isinstance(value, tuple):
72 # Encode as per RFC 2231
73 param += '*'
74 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000075 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000076 else:
77 try:
78 value.encode('ascii')
79 except UnicodeEncodeError:
80 param += '*'
81 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000082 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000083 # BAW: Please check this. I think that if quote is set it should
84 # force quoting even if not necessary.
85 if quote or tspecials.search(value):
86 return '%s="%s"' % (param, utils.quote(value))
87 else:
88 return '%s=%s' % (param, value)
89 else:
90 return param
91
92def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040093 # RDM This might be a Header, so for now stringify it.
94 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000095 plist = []
96 while s[:1] == ';':
97 s = s[1:]
98 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000099 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000100 end = s.find(';', end + 1)
101 if end < 0:
102 end = len(s)
103 f = s[:end]
104 if '=' in f:
105 i = f.index('=')
106 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
107 plist.append(f.strip())
108 s = s[end:]
109 return plist
110
111
112def _unquotevalue(value):
113 # This is different than utils.collapse_rfc2231_value() because it doesn't
114 # try to convert the value to a unicode. Message.get_param() and
115 # Message.get_params() are both currently defined to return the tuple in
116 # the face of RFC 2231 parameters.
117 if isinstance(value, tuple):
118 return value[0], value[1], utils.unquote(value[2])
119 else:
120 return utils.unquote(value)
121
122
123
124class Message:
125 """Basic message object.
126
127 A message object is defined as something that has a bunch of RFC 2822
128 headers and a payload. It may optionally have an envelope header
129 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
130 multipart or a message/rfc822), then the payload is a list of Message
131 objects, otherwise it is a string.
132
133 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000134 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000135 do in fact appear multiple times (e.g. Received) and for those headers,
136 you must use the explicit API to set or get all the headers. Not all of
137 the mapping methods are implemented.
138 """
139 def __init__(self):
140 self._headers = []
141 self._unixfrom = None
142 self._payload = None
143 self._charset = None
144 # Defaults for multipart messages
145 self.preamble = self.epilogue = None
146 self.defects = []
147 # Default content type
148 self._default_type = 'text/plain'
149
150 def __str__(self):
151 """Return the entire formatted message as a string.
152 This includes the headers, body, and envelope header.
153 """
154 return self.as_string()
155
156 def as_string(self, unixfrom=False, maxheaderlen=0):
157 """Return the entire formatted message as a string.
158 Optional `unixfrom' when True, means include the Unix From_ envelope
159 header.
160
161 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400162 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000163 Generator instance.
164 """
165 from email.generator import Generator
166 fp = StringIO()
167 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
168 g.flatten(self, unixfrom=unixfrom)
169 return fp.getvalue()
170
171 def is_multipart(self):
172 """Return True if the message consists of multiple parts."""
173 return isinstance(self._payload, list)
174
175 #
176 # Unix From_ line
177 #
178 def set_unixfrom(self, unixfrom):
179 self._unixfrom = unixfrom
180
181 def get_unixfrom(self):
182 return self._unixfrom
183
184 #
185 # Payload manipulation.
186 #
187 def attach(self, payload):
188 """Add the given payload to the current payload.
189
190 The current payload will always be a list of objects after this method
191 is called. If you want to set the payload to a scalar object, use
192 set_payload() instead.
193 """
194 if self._payload is None:
195 self._payload = [payload]
196 else:
197 self._payload.append(payload)
198
199 def get_payload(self, i=None, decode=False):
200 """Return a reference to the payload.
201
202 The payload will either be a list object or a string. If you mutate
203 the list object, you modify the message's payload in place. Optional
204 i returns that index into the payload.
205
206 Optional decode is a flag indicating whether the payload should be
207 decoded or not, according to the Content-Transfer-Encoding header
208 (default is False).
209
210 When True and the message is not a multipart, the payload will be
211 decoded if this header's value is `quoted-printable' or `base64'. If
212 some other encoding is used, or the header is missing, or if the
213 payload has bogus data (i.e. bogus base64 or uuencoded data), the
214 payload is returned as-is.
215
216 If the message is a multipart and the decode flag is True, then None
217 is returned.
218 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000219 # Here is the logic table for this code, based on the email5.0.0 code:
220 # i decode is_multipart result
221 # ------ ------ ------------ ------------------------------
222 # None True True None
223 # i True True None
224 # None False True _payload (a list)
225 # i False True _payload element i (a Message)
226 # i False False error (not a list)
227 # i True False error (not a list)
228 # None False False _payload
229 # None True False _payload decoded (bytes)
230 # Note that Barry planned to factor out the 'decode' case, but that
231 # isn't so easy now that we handle the 8 bit data, which needs to be
232 # converted in both the decode and non-decode path.
233 if self.is_multipart():
234 if decode:
235 return None
236 if i is None:
237 return self._payload
238 else:
239 return self._payload[i]
240 # For backward compatibility, Use isinstance and this error message
241 # instead of the more logical is_multipart test.
242 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000244 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400245 # cte might be a Header, so for now stringify it.
246 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400247 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000248 if isinstance(payload, str):
249 if _has_surrogates(payload):
250 bpayload = payload.encode('ascii', 'surrogateescape')
251 if not decode:
252 try:
253 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
254 except LookupError:
255 payload = bpayload.decode('ascii', 'replace')
256 elif decode:
257 try:
258 bpayload = payload.encode('ascii')
259 except UnicodeError:
260 # This won't happen for RFC compliant messages (messages
261 # containing only ASCII codepoints in the unicode input).
262 # If it does happen, turn the string into bytes in a way
263 # guaranteed not to fail.
264 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265 if not decode:
266 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000267 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000268 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000269 elif cte == 'base64':
270 try:
R. David Murray96fd54e2010-10-08 15:55:28 +0000271 return base64.b64decode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000272 except binascii.Error:
273 # Incorrect padding
R. David Murray96fd54e2010-10-08 15:55:28 +0000274 return bpayload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000275 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000276 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000277 out_file = BytesIO()
278 try:
279 uu.decode(in_file, out_file, quiet=True)
280 return out_file.getvalue()
281 except uu.Error:
282 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000283 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000284 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000285 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000286 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000287
288 def set_payload(self, payload, charset=None):
289 """Set the payload to the given value.
290
291 Optional charset sets the message's default character set. See
292 set_charset() for details.
293 """
294 self._payload = payload
295 if charset is not None:
296 self.set_charset(charset)
297
298 def set_charset(self, charset):
299 """Set the charset of the payload to a given character set.
300
301 charset can be a Charset instance, a string naming a character set, or
302 None. If it is a string it will be converted to a Charset instance.
303 If charset is None, the charset parameter will be removed from the
304 Content-Type field. Anything else will generate a TypeError.
305
306 The message will be assumed to be of type text/* encoded with
307 charset.input_charset. It will be converted to charset.output_charset
308 and encoded properly, if needed, when generating the plain text
309 representation of the message. MIME headers (MIME-Version,
310 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000311 """
312 if charset is None:
313 self.del_param('charset')
314 self._charset = None
315 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000316 if not isinstance(charset, Charset):
317 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000318 self._charset = charset
319 if 'MIME-Version' not in self:
320 self.add_header('MIME-Version', '1.0')
321 if 'Content-Type' not in self:
322 self.add_header('Content-Type', 'text/plain',
323 charset=charset.get_output_charset())
324 else:
325 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000326 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000327 self._payload = charset.body_encode(self._payload)
328 if 'Content-Transfer-Encoding' not in self:
329 cte = charset.get_body_encoding()
330 try:
331 cte(self)
332 except TypeError:
333 self._payload = charset.body_encode(self._payload)
334 self.add_header('Content-Transfer-Encoding', cte)
335
336 def get_charset(self):
337 """Return the Charset instance associated with the message's payload.
338 """
339 return self._charset
340
341 #
342 # MAPPING INTERFACE (partial)
343 #
344 def __len__(self):
345 """Return the total number of headers, including duplicates."""
346 return len(self._headers)
347
348 def __getitem__(self, name):
349 """Get a header value.
350
351 Return None if the header is missing instead of raising an exception.
352
353 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000354 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000355 the values matching a header field name.
356 """
357 return self.get(name)
358
359 def __setitem__(self, name, val):
360 """Set the value of a header.
361
362 Note: this does not overwrite an existing header with the same field
363 name. Use __delitem__() first to delete any existing headers.
364 """
365 self._headers.append((name, val))
366
367 def __delitem__(self, name):
368 """Delete all occurrences of a header, if present.
369
370 Does not raise an exception if the header is missing.
371 """
372 name = name.lower()
373 newheaders = []
374 for k, v in self._headers:
375 if k.lower() != name:
376 newheaders.append((k, v))
377 self._headers = newheaders
378
379 def __contains__(self, name):
380 return name.lower() in [k.lower() for k, v in self._headers]
381
382 def __iter__(self):
383 for field, value in self._headers:
384 yield field
385
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000386 def keys(self):
387 """Return a list of all the message's header field names.
388
389 These will be sorted in the order they appeared in the original
390 message, or were added to the message, and may contain duplicates.
391 Any fields deleted and re-inserted are always appended to the header
392 list.
393 """
394 return [k for k, v in self._headers]
395
396 def values(self):
397 """Return a list of all the message's header values.
398
399 These will be sorted in the order they appeared in the original
400 message, or were added to the message, and may contain duplicates.
401 Any fields deleted and re-inserted are always appended to the header
402 list.
403 """
R. David Murray92532142011-01-07 23:25:30 +0000404 return [_sanitize_header(k, v) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000405
406 def items(self):
407 """Get all the message's header fields and values.
408
409 These will be sorted in the order they appeared in the original
410 message, or were added to the message, and may contain duplicates.
411 Any fields deleted and re-inserted are always appended to the header
412 list.
413 """
R. David Murray92532142011-01-07 23:25:30 +0000414 return [(k, _sanitize_header(k, v)) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000415
416 def get(self, name, failobj=None):
417 """Get a header value.
418
419 Like __getitem__() but return failobj instead of None when the field
420 is missing.
421 """
422 name = name.lower()
423 for k, v in self._headers:
424 if k.lower() == name:
R. David Murray92532142011-01-07 23:25:30 +0000425 return _sanitize_header(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000426 return failobj
427
428 #
429 # Additional useful stuff
430 #
431
432 def get_all(self, name, failobj=None):
433 """Return a list of all the values for the named field.
434
435 These will be sorted in the order they appeared in the original
436 message, and may contain duplicates. Any fields deleted and
437 re-inserted are always appended to the header list.
438
439 If no such fields exist, failobj is returned (defaults to None).
440 """
441 values = []
442 name = name.lower()
443 for k, v in self._headers:
444 if k.lower() == name:
R. David Murray92532142011-01-07 23:25:30 +0000445 values.append(_sanitize_header(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000446 if not values:
447 return failobj
448 return values
449
450 def add_header(self, _name, _value, **_params):
451 """Extended header setting.
452
453 name is the header field to add. keyword arguments can be used to set
454 additional parameters for the header field, with underscores converted
455 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000456 value is None, in which case only the key will be added. If a
457 parameter value contains non-ASCII characters it can be specified as a
458 three-tuple of (charset, language, value), in which case it will be
459 encoded according to RFC2231 rules. Otherwise it will be encoded using
460 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000461
R. David Murray7ec754b2010-12-13 23:51:19 +0000462 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000463
464 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000465 msg.add_header('content-disposition', 'attachment',
466 filename=('utf-8', '', Fußballer.ppt'))
467 msg.add_header('content-disposition', 'attachment',
468 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000469 """
470 parts = []
471 for k, v in _params.items():
472 if v is None:
473 parts.append(k.replace('_', '-'))
474 else:
475 parts.append(_formatparam(k.replace('_', '-'), v))
476 if _value is not None:
477 parts.insert(0, _value)
478 self._headers.append((_name, SEMISPACE.join(parts)))
479
480 def replace_header(self, _name, _value):
481 """Replace a header.
482
483 Replace the first matching header found in the message, retaining
484 header order and case. If no matching header was found, a KeyError is
485 raised.
486 """
487 _name = _name.lower()
488 for i, (k, v) in zip(range(len(self._headers)), self._headers):
489 if k.lower() == _name:
490 self._headers[i] = (k, _value)
491 break
492 else:
493 raise KeyError(_name)
494
495 #
496 # Use these three methods instead of the three above.
497 #
498
499 def get_content_type(self):
500 """Return the message's content type.
501
502 The returned string is coerced to lower case of the form
503 `maintype/subtype'. If there was no Content-Type header in the
504 message, the default type as given by get_default_type() will be
505 returned. Since according to RFC 2045, messages always have a default
506 type this will always return a value.
507
508 RFC 2045 defines a message's default type to be text/plain unless it
509 appears inside a multipart/digest container, in which case it would be
510 message/rfc822.
511 """
512 missing = object()
513 value = self.get('content-type', missing)
514 if value is missing:
515 # This should have no parameters
516 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000517 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000518 # RFC 2045, section 5.2 says if its invalid, use text/plain
519 if ctype.count('/') != 1:
520 return 'text/plain'
521 return ctype
522
523 def get_content_maintype(self):
524 """Return the message's main content type.
525
526 This is the `maintype' part of the string returned by
527 get_content_type().
528 """
529 ctype = self.get_content_type()
530 return ctype.split('/')[0]
531
532 def get_content_subtype(self):
533 """Returns the message's sub-content type.
534
535 This is the `subtype' part of the string returned by
536 get_content_type().
537 """
538 ctype = self.get_content_type()
539 return ctype.split('/')[1]
540
541 def get_default_type(self):
542 """Return the `default' content type.
543
544 Most messages have a default content type of text/plain, except for
545 messages that are subparts of multipart/digest containers. Such
546 subparts have a default content type of message/rfc822.
547 """
548 return self._default_type
549
550 def set_default_type(self, ctype):
551 """Set the `default' content type.
552
553 ctype should be either "text/plain" or "message/rfc822", although this
554 is not enforced. The default content type is not stored in the
555 Content-Type header.
556 """
557 self._default_type = ctype
558
559 def _get_params_preserve(self, failobj, header):
560 # Like get_params() but preserves the quoting of values. BAW:
561 # should this be part of the public interface?
562 missing = object()
563 value = self.get(header, missing)
564 if value is missing:
565 return failobj
566 params = []
R David Murraya2150232011-03-16 21:11:23 -0400567 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000568 try:
569 name, val = p.split('=', 1)
570 name = name.strip()
571 val = val.strip()
572 except ValueError:
573 # Must have been a bare attribute
574 name = p.strip()
575 val = ''
576 params.append((name, val))
577 params = utils.decode_params(params)
578 return params
579
580 def get_params(self, failobj=None, header='content-type', unquote=True):
581 """Return the message's Content-Type parameters, as a list.
582
583 The elements of the returned list are 2-tuples of key/value pairs, as
584 split on the `=' sign. The left hand side of the `=' is the key,
585 while the right hand side is the value. If there is no `=' sign in
586 the parameter the value is the empty string. The value is as
587 described in the get_param() method.
588
589 Optional failobj is the object to return if there is no Content-Type
590 header. Optional header is the header to search instead of
591 Content-Type. If unquote is True, the value is unquoted.
592 """
593 missing = object()
594 params = self._get_params_preserve(missing, header)
595 if params is missing:
596 return failobj
597 if unquote:
598 return [(k, _unquotevalue(v)) for k, v in params]
599 else:
600 return params
601
602 def get_param(self, param, failobj=None, header='content-type',
603 unquote=True):
604 """Return the parameter value if found in the Content-Type header.
605
606 Optional failobj is the object to return if there is no Content-Type
607 header, or the Content-Type header has no such parameter. Optional
608 header is the header to search instead of Content-Type.
609
610 Parameter keys are always compared case insensitively. The return
611 value can either be a string, or a 3-tuple if the parameter was RFC
612 2231 encoded. When it's a 3-tuple, the elements of the value are of
613 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
614 LANGUAGE can be None, in which case you should consider VALUE to be
615 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
616
617 Your application should be prepared to deal with 3-tuple return
618 values, and can convert the parameter to a Unicode string like so:
619
620 param = msg.get_param('foo')
621 if isinstance(param, tuple):
622 param = unicode(param[2], param[0] or 'us-ascii')
623
624 In any case, the parameter value (either the returned string, or the
625 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
626 to False.
627 """
628 if header not in self:
629 return failobj
630 for k, v in self._get_params_preserve(failobj, header):
631 if k.lower() == param.lower():
632 if unquote:
633 return _unquotevalue(v)
634 else:
635 return v
636 return failobj
637
638 def set_param(self, param, value, header='Content-Type', requote=True,
639 charset=None, language=''):
640 """Set a parameter in the Content-Type header.
641
642 If the parameter already exists in the header, its value will be
643 replaced with the new value.
644
645 If header is Content-Type and has not yet been defined for this
646 message, it will be set to "text/plain" and the new parameter and
647 value will be appended as per RFC 2045.
648
649 An alternate header can specified in the header argument, and all
650 parameters will be quoted as necessary unless requote is False.
651
652 If charset is specified, the parameter will be encoded according to RFC
653 2231. Optional language specifies the RFC 2231 language, defaulting
654 to the empty string. Both charset and language should be strings.
655 """
656 if not isinstance(value, tuple) and charset:
657 value = (charset, language, value)
658
659 if header not in self and header.lower() == 'content-type':
660 ctype = 'text/plain'
661 else:
662 ctype = self.get(header)
663 if not self.get_param(param, header=header):
664 if not ctype:
665 ctype = _formatparam(param, value, requote)
666 else:
667 ctype = SEMISPACE.join(
668 [ctype, _formatparam(param, value, requote)])
669 else:
670 ctype = ''
671 for old_param, old_value in self.get_params(header=header,
672 unquote=requote):
673 append_param = ''
674 if old_param.lower() == param.lower():
675 append_param = _formatparam(param, value, requote)
676 else:
677 append_param = _formatparam(old_param, old_value, requote)
678 if not ctype:
679 ctype = append_param
680 else:
681 ctype = SEMISPACE.join([ctype, append_param])
682 if ctype != self.get(header):
683 del self[header]
684 self[header] = ctype
685
686 def del_param(self, param, header='content-type', requote=True):
687 """Remove the given parameter completely from the Content-Type header.
688
689 The header will be re-written in place without the parameter or its
690 value. All values will be quoted as necessary unless requote is
691 False. Optional header specifies an alternative to the Content-Type
692 header.
693 """
694 if header not in self:
695 return
696 new_ctype = ''
697 for p, v in self.get_params(header=header, unquote=requote):
698 if p.lower() != param.lower():
699 if not new_ctype:
700 new_ctype = _formatparam(p, v, requote)
701 else:
702 new_ctype = SEMISPACE.join([new_ctype,
703 _formatparam(p, v, requote)])
704 if new_ctype != self.get(header):
705 del self[header]
706 self[header] = new_ctype
707
708 def set_type(self, type, header='Content-Type', requote=True):
709 """Set the main type and subtype for the Content-Type header.
710
711 type must be a string in the form "maintype/subtype", otherwise a
712 ValueError is raised.
713
714 This method replaces the Content-Type header, keeping all the
715 parameters in place. If requote is False, this leaves the existing
716 header's quoting as is. Otherwise, the parameters will be quoted (the
717 default).
718
719 An alternative header can be specified in the header argument. When
720 the Content-Type header is set, we'll always also add a MIME-Version
721 header.
722 """
723 # BAW: should we be strict?
724 if not type.count('/') == 1:
725 raise ValueError
726 # Set the Content-Type, you get a MIME-Version
727 if header.lower() == 'content-type':
728 del self['mime-version']
729 self['MIME-Version'] = '1.0'
730 if header not in self:
731 self[header] = type
732 return
733 params = self.get_params(header=header, unquote=requote)
734 del self[header]
735 self[header] = type
736 # Skip the first param; it's the old type.
737 for p, v in params[1:]:
738 self.set_param(p, v, header, requote)
739
740 def get_filename(self, failobj=None):
741 """Return the filename associated with the payload if present.
742
743 The filename is extracted from the Content-Disposition header's
744 `filename' parameter, and it is unquoted. If that header is missing
745 the `filename' parameter, this method falls back to looking for the
746 `name' parameter.
747 """
748 missing = object()
749 filename = self.get_param('filename', missing, 'content-disposition')
750 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000751 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000752 if filename is missing:
753 return failobj
754 return utils.collapse_rfc2231_value(filename).strip()
755
756 def get_boundary(self, failobj=None):
757 """Return the boundary associated with the payload if present.
758
759 The boundary is extracted from the Content-Type header's `boundary'
760 parameter, and it is unquoted.
761 """
762 missing = object()
763 boundary = self.get_param('boundary', missing)
764 if boundary is missing:
765 return failobj
766 # RFC 2046 says that boundaries may begin but not end in w/s
767 return utils.collapse_rfc2231_value(boundary).rstrip()
768
769 def set_boundary(self, boundary):
770 """Set the boundary parameter in Content-Type to 'boundary'.
771
772 This is subtly different than deleting the Content-Type header and
773 adding a new one with a new boundary parameter via add_header(). The
774 main difference is that using the set_boundary() method preserves the
775 order of the Content-Type header in the original message.
776
777 HeaderParseError is raised if the message has no Content-Type header.
778 """
779 missing = object()
780 params = self._get_params_preserve(missing, 'content-type')
781 if params is missing:
782 # There was no Content-Type header, and we don't know what type
783 # to set it to, so raise an exception.
784 raise errors.HeaderParseError('No Content-Type header found')
785 newparams = []
786 foundp = False
787 for pk, pv in params:
788 if pk.lower() == 'boundary':
789 newparams.append(('boundary', '"%s"' % boundary))
790 foundp = True
791 else:
792 newparams.append((pk, pv))
793 if not foundp:
794 # The original Content-Type header had no boundary attribute.
795 # Tack one on the end. BAW: should we raise an exception
796 # instead???
797 newparams.append(('boundary', '"%s"' % boundary))
798 # Replace the existing Content-Type header with the new value
799 newheaders = []
800 for h, v in self._headers:
801 if h.lower() == 'content-type':
802 parts = []
803 for k, v in newparams:
804 if v == '':
805 parts.append(k)
806 else:
807 parts.append('%s=%s' % (k, v))
808 newheaders.append((h, SEMISPACE.join(parts)))
809
810 else:
811 newheaders.append((h, v))
812 self._headers = newheaders
813
814 def get_content_charset(self, failobj=None):
815 """Return the charset parameter of the Content-Type header.
816
817 The returned string is always coerced to lower case. If there is no
818 Content-Type header, or if that header has no charset parameter,
819 failobj is returned.
820 """
821 missing = object()
822 charset = self.get_param('charset', missing)
823 if charset is missing:
824 return failobj
825 if isinstance(charset, tuple):
826 # RFC 2231 encoded, so decode it, and it better end up as ascii.
827 pcharset = charset[0] or 'us-ascii'
828 try:
829 # LookupError will be raised if the charset isn't known to
830 # Python. UnicodeError will be raised if the encoded text
831 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000832 as_bytes = charset[2].encode('raw-unicode-escape')
833 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000834 except (LookupError, UnicodeError):
835 charset = charset[2]
836 # charset characters must be in us-ascii range
837 try:
838 charset.encode('us-ascii')
839 except UnicodeError:
840 return failobj
841 # RFC 2046, $4.1.2 says charsets are not case sensitive
842 return charset.lower()
843
844 def get_charsets(self, failobj=None):
845 """Return a list containing the charset(s) used in this message.
846
847 The returned list of items describes the Content-Type headers'
848 charset parameter for this message and all the subparts in its
849 payload.
850
851 Each item will either be a string (the value of the charset parameter
852 in the Content-Type header of that part) or the value of the
853 'failobj' parameter (defaults to None), if the part does not have a
854 main MIME type of "text", or the charset is not defined.
855
856 The list will contain one string for each part of the message, plus
857 one for the container message (i.e. self), so that a non-multipart
858 message will still return a list of length 1.
859 """
860 return [part.get_content_charset(failobj) for part in self.walk()]
861
862 # I.e. def walk(self): ...
863 from email.iterators import walk