blob: f1ffcdb4de060278e22473d96514a82e24529a6d [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
R. David Murray92532142011-01-07 23:25:30 +000019from email import header
20from email import charset as _charset
21Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029# How to figure out if we are processing strings that come from a byte
30# source with undecodable characters.
31_has_surrogates = re.compile(
32 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
33
Guido van Rossum8b3febe2007-08-30 01:15:14 +000034
Guido van Rossum8b3febe2007-08-30 01:15:14 +000035# Helper functions
R. David Murray92532142011-01-07 23:25:30 +000036def _sanitize_header(name, value):
37 # If the header value contains surrogates, return a Header using
38 # the unknown-8bit charset to encode the bytes as encoded words.
R. David Murray96fd54e2010-10-08 15:55:28 +000039 if not isinstance(value, str):
R. David Murray92532142011-01-07 23:25:30 +000040 # Assume it is already a header object
R. David Murray96fd54e2010-10-08 15:55:28 +000041 return value
42 if _has_surrogates(value):
R. David Murray92532142011-01-07 23:25:30 +000043 return header.Header(value, charset=_charset.UNKNOWN8BIT,
44 header_name=name)
R. David Murray96fd54e2010-10-08 15:55:28 +000045 else:
46 return value
47
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000048def _splitparam(param):
49 # Split header parameters. BAW: this may be too simple. It isn't
50 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040051 # found in the wild. We may eventually need a full fledged parser.
52 # RDM: we might have a Header here; for now just stringify it.
53 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000054 if not sep:
55 return a.strip(), None
56 return a.strip(), b.strip()
57
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058def _formatparam(param, value=None, quote=True):
59 """Convenience function to format and return a key=value pair.
60
R. David Murray7ec754b2010-12-13 23:51:19 +000061 This will quote the value if needed or if quote is true. If value is a
62 three tuple (charset, language, value), it will be encoded according
63 to RFC2231 rules. If it contains non-ascii characters it will likewise
64 be encoded according to RFC2231 rules, using the utf-8 charset and
65 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 """
67 if value is not None and len(value) > 0:
68 # A tuple is used for RFC 2231 encoded parameter values where items
69 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000070 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071 if isinstance(value, tuple):
72 # Encode as per RFC 2231
73 param += '*'
74 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000075 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000076 else:
77 try:
78 value.encode('ascii')
79 except UnicodeEncodeError:
80 param += '*'
81 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000082 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000083 # BAW: Please check this. I think that if quote is set it should
84 # force quoting even if not necessary.
85 if quote or tspecials.search(value):
86 return '%s="%s"' % (param, utils.quote(value))
87 else:
88 return '%s=%s' % (param, value)
89 else:
90 return param
91
92def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040093 # RDM This might be a Header, so for now stringify it.
94 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000095 plist = []
96 while s[:1] == ';':
97 s = s[1:]
98 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000099 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000100 end = s.find(';', end + 1)
101 if end < 0:
102 end = len(s)
103 f = s[:end]
104 if '=' in f:
105 i = f.index('=')
106 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
107 plist.append(f.strip())
108 s = s[end:]
109 return plist
110
111
112def _unquotevalue(value):
113 # This is different than utils.collapse_rfc2231_value() because it doesn't
114 # try to convert the value to a unicode. Message.get_param() and
115 # Message.get_params() are both currently defined to return the tuple in
116 # the face of RFC 2231 parameters.
117 if isinstance(value, tuple):
118 return value[0], value[1], utils.unquote(value[2])
119 else:
120 return utils.unquote(value)
121
122
123
124class Message:
125 """Basic message object.
126
127 A message object is defined as something that has a bunch of RFC 2822
128 headers and a payload. It may optionally have an envelope header
129 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
130 multipart or a message/rfc822), then the payload is a list of Message
131 objects, otherwise it is a string.
132
133 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000134 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000135 do in fact appear multiple times (e.g. Received) and for those headers,
136 you must use the explicit API to set or get all the headers. Not all of
137 the mapping methods are implemented.
138 """
139 def __init__(self):
140 self._headers = []
141 self._unixfrom = None
142 self._payload = None
143 self._charset = None
144 # Defaults for multipart messages
145 self.preamble = self.epilogue = None
146 self.defects = []
147 # Default content type
148 self._default_type = 'text/plain'
149
150 def __str__(self):
151 """Return the entire formatted message as a string.
152 This includes the headers, body, and envelope header.
153 """
154 return self.as_string()
155
156 def as_string(self, unixfrom=False, maxheaderlen=0):
157 """Return the entire formatted message as a string.
158 Optional `unixfrom' when True, means include the Unix From_ envelope
159 header.
160
161 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400162 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000163 Generator instance.
164 """
165 from email.generator import Generator
166 fp = StringIO()
167 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
168 g.flatten(self, unixfrom=unixfrom)
169 return fp.getvalue()
170
171 def is_multipart(self):
172 """Return True if the message consists of multiple parts."""
173 return isinstance(self._payload, list)
174
175 #
176 # Unix From_ line
177 #
178 def set_unixfrom(self, unixfrom):
179 self._unixfrom = unixfrom
180
181 def get_unixfrom(self):
182 return self._unixfrom
183
184 #
185 # Payload manipulation.
186 #
187 def attach(self, payload):
188 """Add the given payload to the current payload.
189
190 The current payload will always be a list of objects after this method
191 is called. If you want to set the payload to a scalar object, use
192 set_payload() instead.
193 """
194 if self._payload is None:
195 self._payload = [payload]
196 else:
197 self._payload.append(payload)
198
199 def get_payload(self, i=None, decode=False):
200 """Return a reference to the payload.
201
202 The payload will either be a list object or a string. If you mutate
203 the list object, you modify the message's payload in place. Optional
204 i returns that index into the payload.
205
206 Optional decode is a flag indicating whether the payload should be
207 decoded or not, according to the Content-Transfer-Encoding header
208 (default is False).
209
210 When True and the message is not a multipart, the payload will be
211 decoded if this header's value is `quoted-printable' or `base64'. If
212 some other encoding is used, or the header is missing, or if the
213 payload has bogus data (i.e. bogus base64 or uuencoded data), the
214 payload is returned as-is.
215
216 If the message is a multipart and the decode flag is True, then None
217 is returned.
218 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000219 # Here is the logic table for this code, based on the email5.0.0 code:
220 # i decode is_multipart result
221 # ------ ------ ------------ ------------------------------
222 # None True True None
223 # i True True None
224 # None False True _payload (a list)
225 # i False True _payload element i (a Message)
226 # i False False error (not a list)
227 # i True False error (not a list)
228 # None False False _payload
229 # None True False _payload decoded (bytes)
230 # Note that Barry planned to factor out the 'decode' case, but that
231 # isn't so easy now that we handle the 8 bit data, which needs to be
232 # converted in both the decode and non-decode path.
233 if self.is_multipart():
234 if decode:
235 return None
236 if i is None:
237 return self._payload
238 else:
239 return self._payload[i]
240 # For backward compatibility, Use isinstance and this error message
241 # instead of the more logical is_multipart test.
242 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000244 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400245 # cte might be a Header, so for now stringify it.
246 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400247 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000248 if isinstance(payload, str):
249 if _has_surrogates(payload):
250 bpayload = payload.encode('ascii', 'surrogateescape')
251 if not decode:
252 try:
253 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
254 except LookupError:
255 payload = bpayload.decode('ascii', 'replace')
256 elif decode:
257 try:
258 bpayload = payload.encode('ascii')
259 except UnicodeError:
260 # This won't happen for RFC compliant messages (messages
261 # containing only ASCII codepoints in the unicode input).
262 # If it does happen, turn the string into bytes in a way
263 # guaranteed not to fail.
264 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265 if not decode:
266 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000267 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000268 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000269 elif cte == 'base64':
270 try:
R. David Murray96fd54e2010-10-08 15:55:28 +0000271 return base64.b64decode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000272 except binascii.Error:
273 # Incorrect padding
R. David Murray96fd54e2010-10-08 15:55:28 +0000274 return bpayload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000275 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000276 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000277 out_file = BytesIO()
278 try:
279 uu.decode(in_file, out_file, quiet=True)
280 return out_file.getvalue()
281 except uu.Error:
282 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000283 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000284 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000285 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000286 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000287
288 def set_payload(self, payload, charset=None):
289 """Set the payload to the given value.
290
291 Optional charset sets the message's default character set. See
292 set_charset() for details.
293 """
294 self._payload = payload
295 if charset is not None:
296 self.set_charset(charset)
297
298 def set_charset(self, charset):
299 """Set the charset of the payload to a given character set.
300
301 charset can be a Charset instance, a string naming a character set, or
302 None. If it is a string it will be converted to a Charset instance.
303 If charset is None, the charset parameter will be removed from the
304 Content-Type field. Anything else will generate a TypeError.
305
306 The message will be assumed to be of type text/* encoded with
307 charset.input_charset. It will be converted to charset.output_charset
308 and encoded properly, if needed, when generating the plain text
309 representation of the message. MIME headers (MIME-Version,
310 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000311 """
312 if charset is None:
313 self.del_param('charset')
314 self._charset = None
315 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000316 if not isinstance(charset, Charset):
317 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000318 self._charset = charset
319 if 'MIME-Version' not in self:
320 self.add_header('MIME-Version', '1.0')
321 if 'Content-Type' not in self:
322 self.add_header('Content-Type', 'text/plain',
323 charset=charset.get_output_charset())
324 else:
325 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000326 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000327 self._payload = charset.body_encode(self._payload)
328 if 'Content-Transfer-Encoding' not in self:
329 cte = charset.get_body_encoding()
330 try:
331 cte(self)
332 except TypeError:
333 self._payload = charset.body_encode(self._payload)
334 self.add_header('Content-Transfer-Encoding', cte)
335
336 def get_charset(self):
337 """Return the Charset instance associated with the message's payload.
338 """
339 return self._charset
340
341 #
342 # MAPPING INTERFACE (partial)
343 #
344 def __len__(self):
345 """Return the total number of headers, including duplicates."""
346 return len(self._headers)
347
348 def __getitem__(self, name):
349 """Get a header value.
350
351 Return None if the header is missing instead of raising an exception.
352
353 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000354 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000355 the values matching a header field name.
356 """
357 return self.get(name)
358
359 def __setitem__(self, name, val):
360 """Set the value of a header.
361
362 Note: this does not overwrite an existing header with the same field
363 name. Use __delitem__() first to delete any existing headers.
364 """
365 self._headers.append((name, val))
366
367 def __delitem__(self, name):
368 """Delete all occurrences of a header, if present.
369
370 Does not raise an exception if the header is missing.
371 """
372 name = name.lower()
373 newheaders = []
374 for k, v in self._headers:
375 if k.lower() != name:
376 newheaders.append((k, v))
377 self._headers = newheaders
378
379 def __contains__(self, name):
380 return name.lower() in [k.lower() for k, v in self._headers]
381
382 def __iter__(self):
383 for field, value in self._headers:
384 yield field
385
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000386 def keys(self):
387 """Return a list of all the message's header field names.
388
389 These will be sorted in the order they appeared in the original
390 message, or were added to the message, and may contain duplicates.
391 Any fields deleted and re-inserted are always appended to the header
392 list.
393 """
394 return [k for k, v in self._headers]
395
396 def values(self):
397 """Return a list of all the message's header values.
398
399 These will be sorted in the order they appeared in the original
400 message, or were added to the message, and may contain duplicates.
401 Any fields deleted and re-inserted are always appended to the header
402 list.
403 """
R. David Murray92532142011-01-07 23:25:30 +0000404 return [_sanitize_header(k, v) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000405
406 def items(self):
407 """Get all the message's header fields and values.
408
409 These will be sorted in the order they appeared in the original
410 message, or were added to the message, and may contain duplicates.
411 Any fields deleted and re-inserted are always appended to the header
412 list.
413 """
R. David Murray92532142011-01-07 23:25:30 +0000414 return [(k, _sanitize_header(k, v)) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000415
416 def get(self, name, failobj=None):
417 """Get a header value.
418
419 Like __getitem__() but return failobj instead of None when the field
420 is missing.
421 """
422 name = name.lower()
423 for k, v in self._headers:
424 if k.lower() == name:
R. David Murray92532142011-01-07 23:25:30 +0000425 return _sanitize_header(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000426 return failobj
427
428 #
429 # Additional useful stuff
430 #
431
432 def get_all(self, name, failobj=None):
433 """Return a list of all the values for the named field.
434
435 These will be sorted in the order they appeared in the original
436 message, and may contain duplicates. Any fields deleted and
437 re-inserted are always appended to the header list.
438
439 If no such fields exist, failobj is returned (defaults to None).
440 """
441 values = []
442 name = name.lower()
443 for k, v in self._headers:
444 if k.lower() == name:
R. David Murray92532142011-01-07 23:25:30 +0000445 values.append(_sanitize_header(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000446 if not values:
447 return failobj
448 return values
449
450 def add_header(self, _name, _value, **_params):
451 """Extended header setting.
452
453 name is the header field to add. keyword arguments can be used to set
454 additional parameters for the header field, with underscores converted
455 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000456 value is None, in which case only the key will be added. If a
457 parameter value contains non-ASCII characters it can be specified as a
458 three-tuple of (charset, language, value), in which case it will be
459 encoded according to RFC2231 rules. Otherwise it will be encoded using
460 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000461
R. David Murray7ec754b2010-12-13 23:51:19 +0000462 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000463
464 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000465 msg.add_header('content-disposition', 'attachment',
466 filename=('utf-8', '', Fußballer.ppt'))
467 msg.add_header('content-disposition', 'attachment',
468 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000469 """
470 parts = []
471 for k, v in _params.items():
472 if v is None:
473 parts.append(k.replace('_', '-'))
474 else:
475 parts.append(_formatparam(k.replace('_', '-'), v))
476 if _value is not None:
477 parts.insert(0, _value)
478 self._headers.append((_name, SEMISPACE.join(parts)))
479
480 def replace_header(self, _name, _value):
481 """Replace a header.
482
483 Replace the first matching header found in the message, retaining
484 header order and case. If no matching header was found, a KeyError is
485 raised.
486 """
487 _name = _name.lower()
488 for i, (k, v) in zip(range(len(self._headers)), self._headers):
489 if k.lower() == _name:
490 self._headers[i] = (k, _value)
491 break
492 else:
493 raise KeyError(_name)
494
495 #
496 # Use these three methods instead of the three above.
497 #
498
499 def get_content_type(self):
500 """Return the message's content type.
501
502 The returned string is coerced to lower case of the form
503 `maintype/subtype'. If there was no Content-Type header in the
504 message, the default type as given by get_default_type() will be
505 returned. Since according to RFC 2045, messages always have a default
506 type this will always return a value.
507
508 RFC 2045 defines a message's default type to be text/plain unless it
509 appears inside a multipart/digest container, in which case it would be
510 message/rfc822.
511 """
512 missing = object()
513 value = self.get('content-type', missing)
514 if value is missing:
515 # This should have no parameters
516 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000517 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000518 # RFC 2045, section 5.2 says if its invalid, use text/plain
519 if ctype.count('/') != 1:
520 return 'text/plain'
521 return ctype
522
523 def get_content_maintype(self):
524 """Return the message's main content type.
525
526 This is the `maintype' part of the string returned by
527 get_content_type().
528 """
529 ctype = self.get_content_type()
530 return ctype.split('/')[0]
531
532 def get_content_subtype(self):
533 """Returns the message's sub-content type.
534
535 This is the `subtype' part of the string returned by
536 get_content_type().
537 """
538 ctype = self.get_content_type()
539 return ctype.split('/')[1]
540
541 def get_default_type(self):
542 """Return the `default' content type.
543
544 Most messages have a default content type of text/plain, except for
545 messages that are subparts of multipart/digest containers. Such
546 subparts have a default content type of message/rfc822.
547 """
548 return self._default_type
549
550 def set_default_type(self, ctype):
551 """Set the `default' content type.
552
553 ctype should be either "text/plain" or "message/rfc822", although this
554 is not enforced. The default content type is not stored in the
555 Content-Type header.
556 """
557 self._default_type = ctype
558
559 def _get_params_preserve(self, failobj, header):
560 # Like get_params() but preserves the quoting of values. BAW:
561 # should this be part of the public interface?
562 missing = object()
563 value = self.get(header, missing)
564 if value is missing:
565 return failobj
566 params = []
R David Murraya2150232011-03-16 21:11:23 -0400567 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000568 try:
569 name, val = p.split('=', 1)
570 name = name.strip()
571 val = val.strip()
572 except ValueError:
573 # Must have been a bare attribute
574 name = p.strip()
575 val = ''
576 params.append((name, val))
577 params = utils.decode_params(params)
578 return params
579
580 def get_params(self, failobj=None, header='content-type', unquote=True):
581 """Return the message's Content-Type parameters, as a list.
582
583 The elements of the returned list are 2-tuples of key/value pairs, as
584 split on the `=' sign. The left hand side of the `=' is the key,
585 while the right hand side is the value. If there is no `=' sign in
586 the parameter the value is the empty string. The value is as
587 described in the get_param() method.
588
589 Optional failobj is the object to return if there is no Content-Type
590 header. Optional header is the header to search instead of
591 Content-Type. If unquote is True, the value is unquoted.
592 """
593 missing = object()
594 params = self._get_params_preserve(missing, header)
595 if params is missing:
596 return failobj
597 if unquote:
598 return [(k, _unquotevalue(v)) for k, v in params]
599 else:
600 return params
601
602 def get_param(self, param, failobj=None, header='content-type',
603 unquote=True):
604 """Return the parameter value if found in the Content-Type header.
605
606 Optional failobj is the object to return if there is no Content-Type
607 header, or the Content-Type header has no such parameter. Optional
608 header is the header to search instead of Content-Type.
609
610 Parameter keys are always compared case insensitively. The return
611 value can either be a string, or a 3-tuple if the parameter was RFC
612 2231 encoded. When it's a 3-tuple, the elements of the value are of
613 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
614 LANGUAGE can be None, in which case you should consider VALUE to be
615 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400616 The parameter value (either the returned string, or the VALUE item in
617 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000618
R David Murray3ac8c782012-06-17 15:26:35 -0400619 If your application doesn't care whether the parameter was RFC 2231
620 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000621
622 param = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400623 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000624
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000625 """
626 if header not in self:
627 return failobj
628 for k, v in self._get_params_preserve(failobj, header):
629 if k.lower() == param.lower():
630 if unquote:
631 return _unquotevalue(v)
632 else:
633 return v
634 return failobj
635
636 def set_param(self, param, value, header='Content-Type', requote=True,
637 charset=None, language=''):
638 """Set a parameter in the Content-Type header.
639
640 If the parameter already exists in the header, its value will be
641 replaced with the new value.
642
643 If header is Content-Type and has not yet been defined for this
644 message, it will be set to "text/plain" and the new parameter and
645 value will be appended as per RFC 2045.
646
647 An alternate header can specified in the header argument, and all
648 parameters will be quoted as necessary unless requote is False.
649
650 If charset is specified, the parameter will be encoded according to RFC
651 2231. Optional language specifies the RFC 2231 language, defaulting
652 to the empty string. Both charset and language should be strings.
653 """
654 if not isinstance(value, tuple) and charset:
655 value = (charset, language, value)
656
657 if header not in self and header.lower() == 'content-type':
658 ctype = 'text/plain'
659 else:
660 ctype = self.get(header)
661 if not self.get_param(param, header=header):
662 if not ctype:
663 ctype = _formatparam(param, value, requote)
664 else:
665 ctype = SEMISPACE.join(
666 [ctype, _formatparam(param, value, requote)])
667 else:
668 ctype = ''
669 for old_param, old_value in self.get_params(header=header,
670 unquote=requote):
671 append_param = ''
672 if old_param.lower() == param.lower():
673 append_param = _formatparam(param, value, requote)
674 else:
675 append_param = _formatparam(old_param, old_value, requote)
676 if not ctype:
677 ctype = append_param
678 else:
679 ctype = SEMISPACE.join([ctype, append_param])
680 if ctype != self.get(header):
681 del self[header]
682 self[header] = ctype
683
684 def del_param(self, param, header='content-type', requote=True):
685 """Remove the given parameter completely from the Content-Type header.
686
687 The header will be re-written in place without the parameter or its
688 value. All values will be quoted as necessary unless requote is
689 False. Optional header specifies an alternative to the Content-Type
690 header.
691 """
692 if header not in self:
693 return
694 new_ctype = ''
695 for p, v in self.get_params(header=header, unquote=requote):
696 if p.lower() != param.lower():
697 if not new_ctype:
698 new_ctype = _formatparam(p, v, requote)
699 else:
700 new_ctype = SEMISPACE.join([new_ctype,
701 _formatparam(p, v, requote)])
702 if new_ctype != self.get(header):
703 del self[header]
704 self[header] = new_ctype
705
706 def set_type(self, type, header='Content-Type', requote=True):
707 """Set the main type and subtype for the Content-Type header.
708
709 type must be a string in the form "maintype/subtype", otherwise a
710 ValueError is raised.
711
712 This method replaces the Content-Type header, keeping all the
713 parameters in place. If requote is False, this leaves the existing
714 header's quoting as is. Otherwise, the parameters will be quoted (the
715 default).
716
717 An alternative header can be specified in the header argument. When
718 the Content-Type header is set, we'll always also add a MIME-Version
719 header.
720 """
721 # BAW: should we be strict?
722 if not type.count('/') == 1:
723 raise ValueError
724 # Set the Content-Type, you get a MIME-Version
725 if header.lower() == 'content-type':
726 del self['mime-version']
727 self['MIME-Version'] = '1.0'
728 if header not in self:
729 self[header] = type
730 return
731 params = self.get_params(header=header, unquote=requote)
732 del self[header]
733 self[header] = type
734 # Skip the first param; it's the old type.
735 for p, v in params[1:]:
736 self.set_param(p, v, header, requote)
737
738 def get_filename(self, failobj=None):
739 """Return the filename associated with the payload if present.
740
741 The filename is extracted from the Content-Disposition header's
742 `filename' parameter, and it is unquoted. If that header is missing
743 the `filename' parameter, this method falls back to looking for the
744 `name' parameter.
745 """
746 missing = object()
747 filename = self.get_param('filename', missing, 'content-disposition')
748 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000749 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000750 if filename is missing:
751 return failobj
752 return utils.collapse_rfc2231_value(filename).strip()
753
754 def get_boundary(self, failobj=None):
755 """Return the boundary associated with the payload if present.
756
757 The boundary is extracted from the Content-Type header's `boundary'
758 parameter, and it is unquoted.
759 """
760 missing = object()
761 boundary = self.get_param('boundary', missing)
762 if boundary is missing:
763 return failobj
764 # RFC 2046 says that boundaries may begin but not end in w/s
765 return utils.collapse_rfc2231_value(boundary).rstrip()
766
767 def set_boundary(self, boundary):
768 """Set the boundary parameter in Content-Type to 'boundary'.
769
770 This is subtly different than deleting the Content-Type header and
771 adding a new one with a new boundary parameter via add_header(). The
772 main difference is that using the set_boundary() method preserves the
773 order of the Content-Type header in the original message.
774
775 HeaderParseError is raised if the message has no Content-Type header.
776 """
777 missing = object()
778 params = self._get_params_preserve(missing, 'content-type')
779 if params is missing:
780 # There was no Content-Type header, and we don't know what type
781 # to set it to, so raise an exception.
782 raise errors.HeaderParseError('No Content-Type header found')
783 newparams = []
784 foundp = False
785 for pk, pv in params:
786 if pk.lower() == 'boundary':
787 newparams.append(('boundary', '"%s"' % boundary))
788 foundp = True
789 else:
790 newparams.append((pk, pv))
791 if not foundp:
792 # The original Content-Type header had no boundary attribute.
793 # Tack one on the end. BAW: should we raise an exception
794 # instead???
795 newparams.append(('boundary', '"%s"' % boundary))
796 # Replace the existing Content-Type header with the new value
797 newheaders = []
798 for h, v in self._headers:
799 if h.lower() == 'content-type':
800 parts = []
801 for k, v in newparams:
802 if v == '':
803 parts.append(k)
804 else:
805 parts.append('%s=%s' % (k, v))
806 newheaders.append((h, SEMISPACE.join(parts)))
807
808 else:
809 newheaders.append((h, v))
810 self._headers = newheaders
811
812 def get_content_charset(self, failobj=None):
813 """Return the charset parameter of the Content-Type header.
814
815 The returned string is always coerced to lower case. If there is no
816 Content-Type header, or if that header has no charset parameter,
817 failobj is returned.
818 """
819 missing = object()
820 charset = self.get_param('charset', missing)
821 if charset is missing:
822 return failobj
823 if isinstance(charset, tuple):
824 # RFC 2231 encoded, so decode it, and it better end up as ascii.
825 pcharset = charset[0] or 'us-ascii'
826 try:
827 # LookupError will be raised if the charset isn't known to
828 # Python. UnicodeError will be raised if the encoded text
829 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000830 as_bytes = charset[2].encode('raw-unicode-escape')
831 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000832 except (LookupError, UnicodeError):
833 charset = charset[2]
834 # charset characters must be in us-ascii range
835 try:
836 charset.encode('us-ascii')
837 except UnicodeError:
838 return failobj
839 # RFC 2046, $4.1.2 says charsets are not case sensitive
840 return charset.lower()
841
842 def get_charsets(self, failobj=None):
843 """Return a list containing the charset(s) used in this message.
844
845 The returned list of items describes the Content-Type headers'
846 charset parameter for this message and all the subparts in its
847 payload.
848
849 Each item will either be a string (the value of the charset parameter
850 in the Content-Type header of that part) or the value of the
851 'failobj' parameter (defaults to None), if the part does not have a
852 main MIME type of "text", or the charset is not defined.
853
854 The list will contain one string for each part of the message, plus
855 one for the container message (i.e. self), so that a non-multipart
856 message will still return a list of length 1.
857 """
858 return [part.get_content_charset(failobj) for part in self.walk()]
859
860 # I.e. def walk(self): ...
861 from email.iterators import walk