blob: afe350c902c8d74bfda191038bd3e8aa84460b70 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013from io import BytesIO, StringIO
14
15# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from email import utils
17from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040018from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000019from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040020from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000021Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000030def _splitparam(param):
31 # Split header parameters. BAW: this may be too simple. It isn't
32 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040033 # found in the wild. We may eventually need a full fledged parser.
34 # RDM: we might have a Header here; for now just stringify it.
35 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000036 if not sep:
37 return a.strip(), None
38 return a.strip(), b.strip()
39
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040def _formatparam(param, value=None, quote=True):
41 """Convenience function to format and return a key=value pair.
42
R. David Murray7ec754b2010-12-13 23:51:19 +000043 This will quote the value if needed or if quote is true. If value is a
44 three tuple (charset, language, value), it will be encoded according
45 to RFC2231 rules. If it contains non-ascii characters it will likewise
46 be encoded according to RFC2231 rules, using the utf-8 charset and
47 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048 """
49 if value is not None and len(value) > 0:
50 # A tuple is used for RFC 2231 encoded parameter values where items
51 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000052 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 if isinstance(value, tuple):
54 # Encode as per RFC 2231
55 param += '*'
56 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000057 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000058 else:
59 try:
60 value.encode('ascii')
61 except UnicodeEncodeError:
62 param += '*'
63 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000064 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 # BAW: Please check this. I think that if quote is set it should
66 # force quoting even if not necessary.
67 if quote or tspecials.search(value):
68 return '%s="%s"' % (param, utils.quote(value))
69 else:
70 return '%s=%s' % (param, value)
71 else:
72 return param
73
74def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040075 # RDM This might be a Header, so for now stringify it.
76 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 plist = []
78 while s[:1] == ';':
79 s = s[1:]
80 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000081 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 end = s.find(';', end + 1)
83 if end < 0:
84 end = len(s)
85 f = s[:end]
86 if '=' in f:
87 i = f.index('=')
88 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
89 plist.append(f.strip())
90 s = s[end:]
91 return plist
92
93
94def _unquotevalue(value):
95 # This is different than utils.collapse_rfc2231_value() because it doesn't
96 # try to convert the value to a unicode. Message.get_param() and
97 # Message.get_params() are both currently defined to return the tuple in
98 # the face of RFC 2231 parameters.
99 if isinstance(value, tuple):
100 return value[0], value[1], utils.unquote(value[2])
101 else:
102 return utils.unquote(value)
103
104
105
106class Message:
107 """Basic message object.
108
109 A message object is defined as something that has a bunch of RFC 2822
110 headers and a payload. It may optionally have an envelope header
111 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
112 multipart or a message/rfc822), then the payload is a list of Message
113 objects, otherwise it is a string.
114
115 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000116 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000117 do in fact appear multiple times (e.g. Received) and for those headers,
118 you must use the explicit API to set or get all the headers. Not all of
119 the mapping methods are implemented.
120 """
R David Murrayc27e5222012-05-25 15:01:48 -0400121 def __init__(self, policy=compat32):
122 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000123 self._headers = []
124 self._unixfrom = None
125 self._payload = None
126 self._charset = None
127 # Defaults for multipart messages
128 self.preamble = self.epilogue = None
129 self.defects = []
130 # Default content type
131 self._default_type = 'text/plain'
132
133 def __str__(self):
134 """Return the entire formatted message as a string.
135 This includes the headers, body, and envelope header.
136 """
137 return self.as_string()
138
139 def as_string(self, unixfrom=False, maxheaderlen=0):
140 """Return the entire formatted message as a string.
141 Optional `unixfrom' when True, means include the Unix From_ envelope
142 header.
143
144 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400145 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000146 Generator instance.
147 """
148 from email.generator import Generator
149 fp = StringIO()
150 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
151 g.flatten(self, unixfrom=unixfrom)
152 return fp.getvalue()
153
154 def is_multipart(self):
155 """Return True if the message consists of multiple parts."""
156 return isinstance(self._payload, list)
157
158 #
159 # Unix From_ line
160 #
161 def set_unixfrom(self, unixfrom):
162 self._unixfrom = unixfrom
163
164 def get_unixfrom(self):
165 return self._unixfrom
166
167 #
168 # Payload manipulation.
169 #
170 def attach(self, payload):
171 """Add the given payload to the current payload.
172
173 The current payload will always be a list of objects after this method
174 is called. If you want to set the payload to a scalar object, use
175 set_payload() instead.
176 """
177 if self._payload is None:
178 self._payload = [payload]
179 else:
180 self._payload.append(payload)
181
182 def get_payload(self, i=None, decode=False):
183 """Return a reference to the payload.
184
185 The payload will either be a list object or a string. If you mutate
186 the list object, you modify the message's payload in place. Optional
187 i returns that index into the payload.
188
189 Optional decode is a flag indicating whether the payload should be
190 decoded or not, according to the Content-Transfer-Encoding header
191 (default is False).
192
193 When True and the message is not a multipart, the payload will be
194 decoded if this header's value is `quoted-printable' or `base64'. If
195 some other encoding is used, or the header is missing, or if the
196 payload has bogus data (i.e. bogus base64 or uuencoded data), the
197 payload is returned as-is.
198
199 If the message is a multipart and the decode flag is True, then None
200 is returned.
201 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000202 # Here is the logic table for this code, based on the email5.0.0 code:
203 # i decode is_multipart result
204 # ------ ------ ------------ ------------------------------
205 # None True True None
206 # i True True None
207 # None False True _payload (a list)
208 # i False True _payload element i (a Message)
209 # i False False error (not a list)
210 # i True False error (not a list)
211 # None False False _payload
212 # None True False _payload decoded (bytes)
213 # Note that Barry planned to factor out the 'decode' case, but that
214 # isn't so easy now that we handle the 8 bit data, which needs to be
215 # converted in both the decode and non-decode path.
216 if self.is_multipart():
217 if decode:
218 return None
219 if i is None:
220 return self._payload
221 else:
222 return self._payload[i]
223 # For backward compatibility, Use isinstance and this error message
224 # instead of the more logical is_multipart test.
225 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000227 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400228 # cte might be a Header, so for now stringify it.
229 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400230 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000231 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400232 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000233 bpayload = payload.encode('ascii', 'surrogateescape')
234 if not decode:
235 try:
236 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
237 except LookupError:
238 payload = bpayload.decode('ascii', 'replace')
239 elif decode:
240 try:
241 bpayload = payload.encode('ascii')
242 except UnicodeError:
243 # This won't happen for RFC compliant messages (messages
244 # containing only ASCII codepoints in the unicode input).
245 # If it does happen, turn the string into bytes in a way
246 # guaranteed not to fail.
247 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248 if not decode:
249 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000251 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400253 # XXX: this is a bit of a hack; decode_b should probably be factored
254 # out somewhere, but I haven't figured out where yet.
255 value, defects = decode_b(b''.join(bpayload.splitlines()))
256 for defect in defects:
257 self.policy.handle_defect(self, defect)
258 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000260 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 out_file = BytesIO()
262 try:
263 uu.decode(in_file, out_file, quiet=True)
264 return out_file.getvalue()
265 except uu.Error:
266 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000267 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000268 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000270 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000271
272 def set_payload(self, payload, charset=None):
273 """Set the payload to the given value.
274
275 Optional charset sets the message's default character set. See
276 set_charset() for details.
277 """
R David Murrayd5c4c742013-12-11 16:34:34 -0500278 if hasattr(payload, 'encode'):
279 if charset is None:
R David Murrayd5c4c742013-12-11 16:34:34 -0500280 self._payload = payload
281 return
282 if not isinstance(charset, Charset):
283 charset = Charset(charset)
284 payload = payload.encode(charset.output_charset)
285 if hasattr(payload, 'decode'):
286 self._payload = payload.decode('ascii', 'surrogateescape')
287 else:
288 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000289 if charset is not None:
290 self.set_charset(charset)
291
292 def set_charset(self, charset):
293 """Set the charset of the payload to a given character set.
294
295 charset can be a Charset instance, a string naming a character set, or
296 None. If it is a string it will be converted to a Charset instance.
297 If charset is None, the charset parameter will be removed from the
298 Content-Type field. Anything else will generate a TypeError.
299
300 The message will be assumed to be of type text/* encoded with
301 charset.input_charset. It will be converted to charset.output_charset
302 and encoded properly, if needed, when generating the plain text
303 representation of the message. MIME headers (MIME-Version,
304 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000305 """
306 if charset is None:
307 self.del_param('charset')
308 self._charset = None
309 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000310 if not isinstance(charset, Charset):
311 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000312 self._charset = charset
313 if 'MIME-Version' not in self:
314 self.add_header('MIME-Version', '1.0')
315 if 'Content-Type' not in self:
316 self.add_header('Content-Type', 'text/plain',
317 charset=charset.get_output_charset())
318 else:
319 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000320 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000321 self._payload = charset.body_encode(self._payload)
322 if 'Content-Transfer-Encoding' not in self:
323 cte = charset.get_body_encoding()
324 try:
325 cte(self)
326 except TypeError:
R David Murrayfcc00722014-02-07 13:03:08 -0500327 # This 'if' is for backward compatibility, it allows unicode
328 # through even though that won't work correctly if the
329 # message is serialized.
R David Murrayd5c4c742013-12-11 16:34:34 -0500330 payload = self._payload
331 if payload:
332 try:
333 payload = payload.encode('ascii', 'surrogateescape')
334 except UnicodeError:
335 payload = payload.encode(charset.output_charset)
336 self._payload = charset.body_encode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 self.add_header('Content-Transfer-Encoding', cte)
338
339 def get_charset(self):
340 """Return the Charset instance associated with the message's payload.
341 """
342 return self._charset
343
344 #
345 # MAPPING INTERFACE (partial)
346 #
347 def __len__(self):
348 """Return the total number of headers, including duplicates."""
349 return len(self._headers)
350
351 def __getitem__(self, name):
352 """Get a header value.
353
354 Return None if the header is missing instead of raising an exception.
355
356 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000357 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000358 the values matching a header field name.
359 """
360 return self.get(name)
361
362 def __setitem__(self, name, val):
363 """Set the value of a header.
364
365 Note: this does not overwrite an existing header with the same field
366 name. Use __delitem__() first to delete any existing headers.
367 """
R David Murrayabfc3742012-05-29 09:14:44 -0400368 max_count = self.policy.header_max_count(name)
369 if max_count:
370 lname = name.lower()
371 found = 0
372 for k, v in self._headers:
373 if k.lower() == lname:
374 found += 1
375 if found >= max_count:
376 raise ValueError("There may be at most {} {} headers "
377 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400378 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000379
380 def __delitem__(self, name):
381 """Delete all occurrences of a header, if present.
382
383 Does not raise an exception if the header is missing.
384 """
385 name = name.lower()
386 newheaders = []
387 for k, v in self._headers:
388 if k.lower() != name:
389 newheaders.append((k, v))
390 self._headers = newheaders
391
392 def __contains__(self, name):
393 return name.lower() in [k.lower() for k, v in self._headers]
394
395 def __iter__(self):
396 for field, value in self._headers:
397 yield field
398
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000399 def keys(self):
400 """Return a list of all the message's header field names.
401
402 These will be sorted in the order they appeared in the original
403 message, or were added to the message, and may contain duplicates.
404 Any fields deleted and re-inserted are always appended to the header
405 list.
406 """
407 return [k for k, v in self._headers]
408
409 def values(self):
410 """Return a list of all the message's header values.
411
412 These will be sorted in the order they appeared in the original
413 message, or were added to the message, and may contain duplicates.
414 Any fields deleted and re-inserted are always appended to the header
415 list.
416 """
R David Murrayc27e5222012-05-25 15:01:48 -0400417 return [self.policy.header_fetch_parse(k, v)
418 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000419
420 def items(self):
421 """Get all the message's header fields and values.
422
423 These will be sorted in the order they appeared in the original
424 message, or were added to the message, and may contain duplicates.
425 Any fields deleted and re-inserted are always appended to the header
426 list.
427 """
R David Murrayc27e5222012-05-25 15:01:48 -0400428 return [(k, self.policy.header_fetch_parse(k, v))
429 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000430
431 def get(self, name, failobj=None):
432 """Get a header value.
433
434 Like __getitem__() but return failobj instead of None when the field
435 is missing.
436 """
437 name = name.lower()
438 for k, v in self._headers:
439 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400440 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000441 return failobj
442
443 #
R David Murrayc27e5222012-05-25 15:01:48 -0400444 # "Internal" methods (public API, but only intended for use by a parser
445 # or generator, not normal application code.
446 #
447
448 def set_raw(self, name, value):
449 """Store name and value in the model without modification.
450
451 This is an "internal" API, intended only for use by a parser.
452 """
453 self._headers.append((name, value))
454
455 def raw_items(self):
456 """Return the (name, value) header pairs without modification.
457
458 This is an "internal" API, intended only for use by a generator.
459 """
460 return iter(self._headers.copy())
461
462 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000463 # Additional useful stuff
464 #
465
466 def get_all(self, name, failobj=None):
467 """Return a list of all the values for the named field.
468
469 These will be sorted in the order they appeared in the original
470 message, and may contain duplicates. Any fields deleted and
471 re-inserted are always appended to the header list.
472
473 If no such fields exist, failobj is returned (defaults to None).
474 """
475 values = []
476 name = name.lower()
477 for k, v in self._headers:
478 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400479 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000480 if not values:
481 return failobj
482 return values
483
484 def add_header(self, _name, _value, **_params):
485 """Extended header setting.
486
487 name is the header field to add. keyword arguments can be used to set
488 additional parameters for the header field, with underscores converted
489 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000490 value is None, in which case only the key will be added. If a
491 parameter value contains non-ASCII characters it can be specified as a
492 three-tuple of (charset, language, value), in which case it will be
493 encoded according to RFC2231 rules. Otherwise it will be encoded using
494 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000495
R. David Murray7ec754b2010-12-13 23:51:19 +0000496 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000497
498 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000499 msg.add_header('content-disposition', 'attachment',
500 filename=('utf-8', '', Fußballer.ppt'))
501 msg.add_header('content-disposition', 'attachment',
502 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000503 """
504 parts = []
505 for k, v in _params.items():
506 if v is None:
507 parts.append(k.replace('_', '-'))
508 else:
509 parts.append(_formatparam(k.replace('_', '-'), v))
510 if _value is not None:
511 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400512 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000513
514 def replace_header(self, _name, _value):
515 """Replace a header.
516
517 Replace the first matching header found in the message, retaining
518 header order and case. If no matching header was found, a KeyError is
519 raised.
520 """
521 _name = _name.lower()
522 for i, (k, v) in zip(range(len(self._headers)), self._headers):
523 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400524 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000525 break
526 else:
527 raise KeyError(_name)
528
529 #
530 # Use these three methods instead of the three above.
531 #
532
533 def get_content_type(self):
534 """Return the message's content type.
535
536 The returned string is coerced to lower case of the form
537 `maintype/subtype'. If there was no Content-Type header in the
538 message, the default type as given by get_default_type() will be
539 returned. Since according to RFC 2045, messages always have a default
540 type this will always return a value.
541
542 RFC 2045 defines a message's default type to be text/plain unless it
543 appears inside a multipart/digest container, in which case it would be
544 message/rfc822.
545 """
546 missing = object()
547 value = self.get('content-type', missing)
548 if value is missing:
549 # This should have no parameters
550 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000551 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000552 # RFC 2045, section 5.2 says if its invalid, use text/plain
553 if ctype.count('/') != 1:
554 return 'text/plain'
555 return ctype
556
557 def get_content_maintype(self):
558 """Return the message's main content type.
559
560 This is the `maintype' part of the string returned by
561 get_content_type().
562 """
563 ctype = self.get_content_type()
564 return ctype.split('/')[0]
565
566 def get_content_subtype(self):
567 """Returns the message's sub-content type.
568
569 This is the `subtype' part of the string returned by
570 get_content_type().
571 """
572 ctype = self.get_content_type()
573 return ctype.split('/')[1]
574
575 def get_default_type(self):
576 """Return the `default' content type.
577
578 Most messages have a default content type of text/plain, except for
579 messages that are subparts of multipart/digest containers. Such
580 subparts have a default content type of message/rfc822.
581 """
582 return self._default_type
583
584 def set_default_type(self, ctype):
585 """Set the `default' content type.
586
587 ctype should be either "text/plain" or "message/rfc822", although this
588 is not enforced. The default content type is not stored in the
589 Content-Type header.
590 """
591 self._default_type = ctype
592
593 def _get_params_preserve(self, failobj, header):
594 # Like get_params() but preserves the quoting of values. BAW:
595 # should this be part of the public interface?
596 missing = object()
597 value = self.get(header, missing)
598 if value is missing:
599 return failobj
600 params = []
R David Murraya2150232011-03-16 21:11:23 -0400601 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000602 try:
603 name, val = p.split('=', 1)
604 name = name.strip()
605 val = val.strip()
606 except ValueError:
607 # Must have been a bare attribute
608 name = p.strip()
609 val = ''
610 params.append((name, val))
611 params = utils.decode_params(params)
612 return params
613
614 def get_params(self, failobj=None, header='content-type', unquote=True):
615 """Return the message's Content-Type parameters, as a list.
616
617 The elements of the returned list are 2-tuples of key/value pairs, as
618 split on the `=' sign. The left hand side of the `=' is the key,
619 while the right hand side is the value. If there is no `=' sign in
620 the parameter the value is the empty string. The value is as
621 described in the get_param() method.
622
623 Optional failobj is the object to return if there is no Content-Type
624 header. Optional header is the header to search instead of
625 Content-Type. If unquote is True, the value is unquoted.
626 """
627 missing = object()
628 params = self._get_params_preserve(missing, header)
629 if params is missing:
630 return failobj
631 if unquote:
632 return [(k, _unquotevalue(v)) for k, v in params]
633 else:
634 return params
635
636 def get_param(self, param, failobj=None, header='content-type',
637 unquote=True):
638 """Return the parameter value if found in the Content-Type header.
639
640 Optional failobj is the object to return if there is no Content-Type
641 header, or the Content-Type header has no such parameter. Optional
642 header is the header to search instead of Content-Type.
643
644 Parameter keys are always compared case insensitively. The return
645 value can either be a string, or a 3-tuple if the parameter was RFC
646 2231 encoded. When it's a 3-tuple, the elements of the value are of
647 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
648 LANGUAGE can be None, in which case you should consider VALUE to be
649 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400650 The parameter value (either the returned string, or the VALUE item in
651 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000652
R David Murray3ac8c782012-06-17 15:26:35 -0400653 If your application doesn't care whether the parameter was RFC 2231
654 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000655
R David Murray0de4d3e2013-11-03 12:23:23 -0500656 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400657 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000658
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 """
660 if header not in self:
661 return failobj
662 for k, v in self._get_params_preserve(failobj, header):
663 if k.lower() == param.lower():
664 if unquote:
665 return _unquotevalue(v)
666 else:
667 return v
668 return failobj
669
670 def set_param(self, param, value, header='Content-Type', requote=True,
671 charset=None, language=''):
672 """Set a parameter in the Content-Type header.
673
674 If the parameter already exists in the header, its value will be
675 replaced with the new value.
676
677 If header is Content-Type and has not yet been defined for this
678 message, it will be set to "text/plain" and the new parameter and
679 value will be appended as per RFC 2045.
680
681 An alternate header can specified in the header argument, and all
682 parameters will be quoted as necessary unless requote is False.
683
684 If charset is specified, the parameter will be encoded according to RFC
685 2231. Optional language specifies the RFC 2231 language, defaulting
686 to the empty string. Both charset and language should be strings.
687 """
688 if not isinstance(value, tuple) and charset:
689 value = (charset, language, value)
690
691 if header not in self and header.lower() == 'content-type':
692 ctype = 'text/plain'
693 else:
694 ctype = self.get(header)
695 if not self.get_param(param, header=header):
696 if not ctype:
697 ctype = _formatparam(param, value, requote)
698 else:
699 ctype = SEMISPACE.join(
700 [ctype, _formatparam(param, value, requote)])
701 else:
702 ctype = ''
703 for old_param, old_value in self.get_params(header=header,
704 unquote=requote):
705 append_param = ''
706 if old_param.lower() == param.lower():
707 append_param = _formatparam(param, value, requote)
708 else:
709 append_param = _formatparam(old_param, old_value, requote)
710 if not ctype:
711 ctype = append_param
712 else:
713 ctype = SEMISPACE.join([ctype, append_param])
714 if ctype != self.get(header):
715 del self[header]
716 self[header] = ctype
717
718 def del_param(self, param, header='content-type', requote=True):
719 """Remove the given parameter completely from the Content-Type header.
720
721 The header will be re-written in place without the parameter or its
722 value. All values will be quoted as necessary unless requote is
723 False. Optional header specifies an alternative to the Content-Type
724 header.
725 """
726 if header not in self:
727 return
728 new_ctype = ''
729 for p, v in self.get_params(header=header, unquote=requote):
730 if p.lower() != param.lower():
731 if not new_ctype:
732 new_ctype = _formatparam(p, v, requote)
733 else:
734 new_ctype = SEMISPACE.join([new_ctype,
735 _formatparam(p, v, requote)])
736 if new_ctype != self.get(header):
737 del self[header]
738 self[header] = new_ctype
739
740 def set_type(self, type, header='Content-Type', requote=True):
741 """Set the main type and subtype for the Content-Type header.
742
743 type must be a string in the form "maintype/subtype", otherwise a
744 ValueError is raised.
745
746 This method replaces the Content-Type header, keeping all the
747 parameters in place. If requote is False, this leaves the existing
748 header's quoting as is. Otherwise, the parameters will be quoted (the
749 default).
750
751 An alternative header can be specified in the header argument. When
752 the Content-Type header is set, we'll always also add a MIME-Version
753 header.
754 """
755 # BAW: should we be strict?
756 if not type.count('/') == 1:
757 raise ValueError
758 # Set the Content-Type, you get a MIME-Version
759 if header.lower() == 'content-type':
760 del self['mime-version']
761 self['MIME-Version'] = '1.0'
762 if header not in self:
763 self[header] = type
764 return
765 params = self.get_params(header=header, unquote=requote)
766 del self[header]
767 self[header] = type
768 # Skip the first param; it's the old type.
769 for p, v in params[1:]:
770 self.set_param(p, v, header, requote)
771
772 def get_filename(self, failobj=None):
773 """Return the filename associated with the payload if present.
774
775 The filename is extracted from the Content-Disposition header's
776 `filename' parameter, and it is unquoted. If that header is missing
777 the `filename' parameter, this method falls back to looking for the
778 `name' parameter.
779 """
780 missing = object()
781 filename = self.get_param('filename', missing, 'content-disposition')
782 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000783 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000784 if filename is missing:
785 return failobj
786 return utils.collapse_rfc2231_value(filename).strip()
787
788 def get_boundary(self, failobj=None):
789 """Return the boundary associated with the payload if present.
790
791 The boundary is extracted from the Content-Type header's `boundary'
792 parameter, and it is unquoted.
793 """
794 missing = object()
795 boundary = self.get_param('boundary', missing)
796 if boundary is missing:
797 return failobj
798 # RFC 2046 says that boundaries may begin but not end in w/s
799 return utils.collapse_rfc2231_value(boundary).rstrip()
800
801 def set_boundary(self, boundary):
802 """Set the boundary parameter in Content-Type to 'boundary'.
803
804 This is subtly different than deleting the Content-Type header and
805 adding a new one with a new boundary parameter via add_header(). The
806 main difference is that using the set_boundary() method preserves the
807 order of the Content-Type header in the original message.
808
809 HeaderParseError is raised if the message has no Content-Type header.
810 """
811 missing = object()
812 params = self._get_params_preserve(missing, 'content-type')
813 if params is missing:
814 # There was no Content-Type header, and we don't know what type
815 # to set it to, so raise an exception.
816 raise errors.HeaderParseError('No Content-Type header found')
817 newparams = []
818 foundp = False
819 for pk, pv in params:
820 if pk.lower() == 'boundary':
821 newparams.append(('boundary', '"%s"' % boundary))
822 foundp = True
823 else:
824 newparams.append((pk, pv))
825 if not foundp:
826 # The original Content-Type header had no boundary attribute.
827 # Tack one on the end. BAW: should we raise an exception
828 # instead???
829 newparams.append(('boundary', '"%s"' % boundary))
830 # Replace the existing Content-Type header with the new value
831 newheaders = []
832 for h, v in self._headers:
833 if h.lower() == 'content-type':
834 parts = []
835 for k, v in newparams:
836 if v == '':
837 parts.append(k)
838 else:
839 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400840 val = SEMISPACE.join(parts)
841 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000842
843 else:
844 newheaders.append((h, v))
845 self._headers = newheaders
846
847 def get_content_charset(self, failobj=None):
848 """Return the charset parameter of the Content-Type header.
849
850 The returned string is always coerced to lower case. If there is no
851 Content-Type header, or if that header has no charset parameter,
852 failobj is returned.
853 """
854 missing = object()
855 charset = self.get_param('charset', missing)
856 if charset is missing:
857 return failobj
858 if isinstance(charset, tuple):
859 # RFC 2231 encoded, so decode it, and it better end up as ascii.
860 pcharset = charset[0] or 'us-ascii'
861 try:
862 # LookupError will be raised if the charset isn't known to
863 # Python. UnicodeError will be raised if the encoded text
864 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000865 as_bytes = charset[2].encode('raw-unicode-escape')
866 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000867 except (LookupError, UnicodeError):
868 charset = charset[2]
869 # charset characters must be in us-ascii range
870 try:
871 charset.encode('us-ascii')
872 except UnicodeError:
873 return failobj
874 # RFC 2046, $4.1.2 says charsets are not case sensitive
875 return charset.lower()
876
877 def get_charsets(self, failobj=None):
878 """Return a list containing the charset(s) used in this message.
879
880 The returned list of items describes the Content-Type headers'
881 charset parameter for this message and all the subparts in its
882 payload.
883
884 Each item will either be a string (the value of the charset parameter
885 in the Content-Type header of that part) or the value of the
886 'failobj' parameter (defaults to None), if the part does not have a
887 main MIME type of "text", or the charset is not defined.
888
889 The list will contain one string for each part of the message, plus
890 one for the container message (i.e. self), so that a non-multipart
891 message will still return a list of length 1.
892 """
893 return [part.get_content_charset(failobj) for part in self.walk()]
894
895 # I.e. def walk(self): ...
896 from email.iterators import walk