blob: 935061515b533133b2dad4a7297e275e5fd203a2 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011from io import BytesIO, StringIO
12
13# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014from email import utils
15from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040016from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000017from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040018from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000019Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
R. David Murray96fd54e2010-10-08 15:55:28 +000027
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000028def _splitparam(param):
29 # Split header parameters. BAW: this may be too simple. It isn't
30 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040031 # found in the wild. We may eventually need a full fledged parser.
32 # RDM: we might have a Header here; for now just stringify it.
33 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000034 if not sep:
35 return a.strip(), None
36 return a.strip(), b.strip()
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038def _formatparam(param, value=None, quote=True):
39 """Convenience function to format and return a key=value pair.
40
R. David Murray7ec754b2010-12-13 23:51:19 +000041 This will quote the value if needed or if quote is true. If value is a
42 three tuple (charset, language, value), it will be encoded according
43 to RFC2231 rules. If it contains non-ascii characters it will likewise
44 be encoded according to RFC2231 rules, using the utf-8 charset and
45 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000046 """
47 if value is not None and len(value) > 0:
48 # A tuple is used for RFC 2231 encoded parameter values where items
49 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000050 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 if isinstance(value, tuple):
52 # Encode as per RFC 2231
53 param += '*'
54 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000055 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000056 else:
57 try:
58 value.encode('ascii')
59 except UnicodeEncodeError:
60 param += '*'
61 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000062 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000063 # BAW: Please check this. I think that if quote is set it should
64 # force quoting even if not necessary.
65 if quote or tspecials.search(value):
66 return '%s="%s"' % (param, utils.quote(value))
67 else:
68 return '%s=%s' % (param, value)
69 else:
70 return param
71
72def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040073 # RDM This might be a Header, so for now stringify it.
74 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000075 plist = []
76 while s[:1] == ';':
77 s = s[1:]
78 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000079 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000080 end = s.find(';', end + 1)
81 if end < 0:
82 end = len(s)
83 f = s[:end]
84 if '=' in f:
85 i = f.index('=')
86 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
87 plist.append(f.strip())
88 s = s[end:]
89 return plist
90
91
92def _unquotevalue(value):
93 # This is different than utils.collapse_rfc2231_value() because it doesn't
94 # try to convert the value to a unicode. Message.get_param() and
95 # Message.get_params() are both currently defined to return the tuple in
96 # the face of RFC 2231 parameters.
97 if isinstance(value, tuple):
98 return value[0], value[1], utils.unquote(value[2])
99 else:
100 return utils.unquote(value)
101
102
103
104class Message:
105 """Basic message object.
106
107 A message object is defined as something that has a bunch of RFC 2822
108 headers and a payload. It may optionally have an envelope header
109 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
110 multipart or a message/rfc822), then the payload is a list of Message
111 objects, otherwise it is a string.
112
113 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000114 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000115 do in fact appear multiple times (e.g. Received) and for those headers,
116 you must use the explicit API to set or get all the headers. Not all of
117 the mapping methods are implemented.
118 """
R David Murrayc27e5222012-05-25 15:01:48 -0400119 def __init__(self, policy=compat32):
120 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000121 self._headers = []
122 self._unixfrom = None
123 self._payload = None
124 self._charset = None
125 # Defaults for multipart messages
126 self.preamble = self.epilogue = None
127 self.defects = []
128 # Default content type
129 self._default_type = 'text/plain'
130
131 def __str__(self):
132 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000133 """
134 return self.as_string()
135
R David Murraybb17d2b2013-08-09 16:15:28 -0400136 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000137 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000138
R David Murraybb17d2b2013-08-09 16:15:28 -0400139 Optional 'unixfrom', when true, means include the Unix From_ envelope
140 header. For backward compatibility reasons, if maxheaderlen is
141 not specified it defaults to 0, so you must override it explicitly
142 if you want a different maxheaderlen. 'policy' is passed to the
143 Generator instance used to serialize the mesasge; if it is not
144 specified the policy associated with the message instance is used.
145
146 If the message object contains binary data that is not encoded
147 according to RFC standards, the non-compliant data will be replaced by
148 unicode "unknown character" code points.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000149 """
150 from email.generator import Generator
R David Murraybb17d2b2013-08-09 16:15:28 -0400151 policy = self.policy if policy is None else policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000152 fp = StringIO()
R David Murraybb17d2b2013-08-09 16:15:28 -0400153 g = Generator(fp,
154 mangle_from_=False,
155 maxheaderlen=maxheaderlen,
156 policy=policy)
157 g.flatten(self, unixfrom=unixfrom)
158 return fp.getvalue()
159
160 def __bytes__(self):
161 """Return the entire formatted message as a bytes object.
162 """
163 return self.as_bytes()
164
165 def as_bytes(self, unixfrom=False, policy=None):
166 """Return the entire formatted message as a bytes object.
167
168 Optional 'unixfrom', when true, means include the Unix From_ envelope
169 header. 'policy' is passed to the BytesGenerator instance used to
170 serialize the message; if not specified the policy associated with
171 the message instance is used.
172 """
173 from email.generator import BytesGenerator
174 policy = self.policy if policy is None else policy
175 fp = BytesIO()
176 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000177 g.flatten(self, unixfrom=unixfrom)
178 return fp.getvalue()
179
180 def is_multipart(self):
181 """Return True if the message consists of multiple parts."""
182 return isinstance(self._payload, list)
183
184 #
185 # Unix From_ line
186 #
187 def set_unixfrom(self, unixfrom):
188 self._unixfrom = unixfrom
189
190 def get_unixfrom(self):
191 return self._unixfrom
192
193 #
194 # Payload manipulation.
195 #
196 def attach(self, payload):
197 """Add the given payload to the current payload.
198
199 The current payload will always be a list of objects after this method
200 is called. If you want to set the payload to a scalar object, use
201 set_payload() instead.
202 """
203 if self._payload is None:
204 self._payload = [payload]
205 else:
206 self._payload.append(payload)
207
208 def get_payload(self, i=None, decode=False):
209 """Return a reference to the payload.
210
211 The payload will either be a list object or a string. If you mutate
212 the list object, you modify the message's payload in place. Optional
213 i returns that index into the payload.
214
215 Optional decode is a flag indicating whether the payload should be
216 decoded or not, according to the Content-Transfer-Encoding header
217 (default is False).
218
219 When True and the message is not a multipart, the payload will be
220 decoded if this header's value is `quoted-printable' or `base64'. If
221 some other encoding is used, or the header is missing, or if the
222 payload has bogus data (i.e. bogus base64 or uuencoded data), the
223 payload is returned as-is.
224
225 If the message is a multipart and the decode flag is True, then None
226 is returned.
227 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000228 # Here is the logic table for this code, based on the email5.0.0 code:
229 # i decode is_multipart result
230 # ------ ------ ------------ ------------------------------
231 # None True True None
232 # i True True None
233 # None False True _payload (a list)
234 # i False True _payload element i (a Message)
235 # i False False error (not a list)
236 # i True False error (not a list)
237 # None False False _payload
238 # None True False _payload decoded (bytes)
239 # Note that Barry planned to factor out the 'decode' case, but that
240 # isn't so easy now that we handle the 8 bit data, which needs to be
241 # converted in both the decode and non-decode path.
242 if self.is_multipart():
243 if decode:
244 return None
245 if i is None:
246 return self._payload
247 else:
248 return self._payload[i]
249 # For backward compatibility, Use isinstance and this error message
250 # instead of the more logical is_multipart test.
251 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000253 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400254 # cte might be a Header, so for now stringify it.
255 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400256 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000257 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400258 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000259 bpayload = payload.encode('ascii', 'surrogateescape')
260 if not decode:
261 try:
262 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
263 except LookupError:
264 payload = bpayload.decode('ascii', 'replace')
265 elif decode:
266 try:
267 bpayload = payload.encode('ascii')
268 except UnicodeError:
269 # This won't happen for RFC compliant messages (messages
270 # containing only ASCII codepoints in the unicode input).
271 # If it does happen, turn the string into bytes in a way
272 # guaranteed not to fail.
273 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 if not decode:
275 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000276 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000277 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000278 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400279 # XXX: this is a bit of a hack; decode_b should probably be factored
280 # out somewhere, but I haven't figured out where yet.
281 value, defects = decode_b(b''.join(bpayload.splitlines()))
282 for defect in defects:
283 self.policy.handle_defect(self, defect)
284 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000285 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000286 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000287 out_file = BytesIO()
288 try:
289 uu.decode(in_file, out_file, quiet=True)
290 return out_file.getvalue()
291 except uu.Error:
292 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000293 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000294 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000295 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000296 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297
298 def set_payload(self, payload, charset=None):
299 """Set the payload to the given value.
300
301 Optional charset sets the message's default character set. See
302 set_charset() for details.
303 """
R David Murray15a693a2014-02-07 12:46:17 -0500304 if hasattr(payload, 'encode'):
305 if charset is None:
306 # We should check for ASCII-only here, but we can't do that
307 # for backward compatibility reasons. Fixed in 3.4.
308 self._payload = payload
309 return
310 if not isinstance(charset, Charset):
311 charset = Charset(charset)
312 payload = payload.encode(charset.output_charset)
313 if hasattr(payload, 'decode'):
314 self._payload = payload.decode('ascii', 'surrogateescape')
315 else:
316 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000317 if charset is not None:
318 self.set_charset(charset)
319
320 def set_charset(self, charset):
321 """Set the charset of the payload to a given character set.
322
323 charset can be a Charset instance, a string naming a character set, or
324 None. If it is a string it will be converted to a Charset instance.
325 If charset is None, the charset parameter will be removed from the
326 Content-Type field. Anything else will generate a TypeError.
327
328 The message will be assumed to be of type text/* encoded with
329 charset.input_charset. It will be converted to charset.output_charset
330 and encoded properly, if needed, when generating the plain text
331 representation of the message. MIME headers (MIME-Version,
332 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000333 """
334 if charset is None:
335 self.del_param('charset')
336 self._charset = None
337 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000338 if not isinstance(charset, Charset):
339 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000340 self._charset = charset
341 if 'MIME-Version' not in self:
342 self.add_header('MIME-Version', '1.0')
343 if 'Content-Type' not in self:
344 self.add_header('Content-Type', 'text/plain',
345 charset=charset.get_output_charset())
346 else:
347 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000348 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000349 self._payload = charset.body_encode(self._payload)
350 if 'Content-Transfer-Encoding' not in self:
351 cte = charset.get_body_encoding()
352 try:
353 cte(self)
354 except TypeError:
R David Murray15a693a2014-02-07 12:46:17 -0500355 # This if is for backward compatibility and will be removed
356 # in 3.4 when the ascii check is added to set_payload.
357 payload = self._payload
358 if payload:
359 try:
360 payload = payload.encode('ascii', 'surrogateescape')
361 except UnicodeError:
362 payload = payload.encode(charset.output_charset)
363 self._payload = charset.body_encode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000364 self.add_header('Content-Transfer-Encoding', cte)
365
366 def get_charset(self):
367 """Return the Charset instance associated with the message's payload.
368 """
369 return self._charset
370
371 #
372 # MAPPING INTERFACE (partial)
373 #
374 def __len__(self):
375 """Return the total number of headers, including duplicates."""
376 return len(self._headers)
377
378 def __getitem__(self, name):
379 """Get a header value.
380
381 Return None if the header is missing instead of raising an exception.
382
383 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000384 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000385 the values matching a header field name.
386 """
387 return self.get(name)
388
389 def __setitem__(self, name, val):
390 """Set the value of a header.
391
392 Note: this does not overwrite an existing header with the same field
393 name. Use __delitem__() first to delete any existing headers.
394 """
R David Murrayabfc3742012-05-29 09:14:44 -0400395 max_count = self.policy.header_max_count(name)
396 if max_count:
397 lname = name.lower()
398 found = 0
399 for k, v in self._headers:
400 if k.lower() == lname:
401 found += 1
402 if found >= max_count:
403 raise ValueError("There may be at most {} {} headers "
404 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400405 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000406
407 def __delitem__(self, name):
408 """Delete all occurrences of a header, if present.
409
410 Does not raise an exception if the header is missing.
411 """
412 name = name.lower()
413 newheaders = []
414 for k, v in self._headers:
415 if k.lower() != name:
416 newheaders.append((k, v))
417 self._headers = newheaders
418
419 def __contains__(self, name):
420 return name.lower() in [k.lower() for k, v in self._headers]
421
422 def __iter__(self):
423 for field, value in self._headers:
424 yield field
425
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000426 def keys(self):
427 """Return a list of all the message's header field names.
428
429 These will be sorted in the order they appeared in the original
430 message, or were added to the message, and may contain duplicates.
431 Any fields deleted and re-inserted are always appended to the header
432 list.
433 """
434 return [k for k, v in self._headers]
435
436 def values(self):
437 """Return a list of all the message's header values.
438
439 These will be sorted in the order they appeared in the original
440 message, or were added to the message, and may contain duplicates.
441 Any fields deleted and re-inserted are always appended to the header
442 list.
443 """
R David Murrayc27e5222012-05-25 15:01:48 -0400444 return [self.policy.header_fetch_parse(k, v)
445 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000446
447 def items(self):
448 """Get all the message's header fields and values.
449
450 These will be sorted in the order they appeared in the original
451 message, or were added to the message, and may contain duplicates.
452 Any fields deleted and re-inserted are always appended to the header
453 list.
454 """
R David Murrayc27e5222012-05-25 15:01:48 -0400455 return [(k, self.policy.header_fetch_parse(k, v))
456 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000457
458 def get(self, name, failobj=None):
459 """Get a header value.
460
461 Like __getitem__() but return failobj instead of None when the field
462 is missing.
463 """
464 name = name.lower()
465 for k, v in self._headers:
466 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400467 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000468 return failobj
469
470 #
R David Murrayc27e5222012-05-25 15:01:48 -0400471 # "Internal" methods (public API, but only intended for use by a parser
472 # or generator, not normal application code.
473 #
474
475 def set_raw(self, name, value):
476 """Store name and value in the model without modification.
477
478 This is an "internal" API, intended only for use by a parser.
479 """
480 self._headers.append((name, value))
481
482 def raw_items(self):
483 """Return the (name, value) header pairs without modification.
484
485 This is an "internal" API, intended only for use by a generator.
486 """
487 return iter(self._headers.copy())
488
489 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000490 # Additional useful stuff
491 #
492
493 def get_all(self, name, failobj=None):
494 """Return a list of all the values for the named field.
495
496 These will be sorted in the order they appeared in the original
497 message, and may contain duplicates. Any fields deleted and
498 re-inserted are always appended to the header list.
499
500 If no such fields exist, failobj is returned (defaults to None).
501 """
502 values = []
503 name = name.lower()
504 for k, v in self._headers:
505 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400506 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000507 if not values:
508 return failobj
509 return values
510
511 def add_header(self, _name, _value, **_params):
512 """Extended header setting.
513
514 name is the header field to add. keyword arguments can be used to set
515 additional parameters for the header field, with underscores converted
516 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000517 value is None, in which case only the key will be added. If a
518 parameter value contains non-ASCII characters it can be specified as a
519 three-tuple of (charset, language, value), in which case it will be
520 encoded according to RFC2231 rules. Otherwise it will be encoded using
521 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522
R. David Murray7ec754b2010-12-13 23:51:19 +0000523 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000524
525 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000526 msg.add_header('content-disposition', 'attachment',
527 filename=('utf-8', '', Fußballer.ppt'))
528 msg.add_header('content-disposition', 'attachment',
529 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000530 """
531 parts = []
532 for k, v in _params.items():
533 if v is None:
534 parts.append(k.replace('_', '-'))
535 else:
536 parts.append(_formatparam(k.replace('_', '-'), v))
537 if _value is not None:
538 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400539 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000540
541 def replace_header(self, _name, _value):
542 """Replace a header.
543
544 Replace the first matching header found in the message, retaining
545 header order and case. If no matching header was found, a KeyError is
546 raised.
547 """
548 _name = _name.lower()
549 for i, (k, v) in zip(range(len(self._headers)), self._headers):
550 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400551 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000552 break
553 else:
554 raise KeyError(_name)
555
556 #
557 # Use these three methods instead of the three above.
558 #
559
560 def get_content_type(self):
561 """Return the message's content type.
562
563 The returned string is coerced to lower case of the form
564 `maintype/subtype'. If there was no Content-Type header in the
565 message, the default type as given by get_default_type() will be
566 returned. Since according to RFC 2045, messages always have a default
567 type this will always return a value.
568
569 RFC 2045 defines a message's default type to be text/plain unless it
570 appears inside a multipart/digest container, in which case it would be
571 message/rfc822.
572 """
573 missing = object()
574 value = self.get('content-type', missing)
575 if value is missing:
576 # This should have no parameters
577 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000578 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000579 # RFC 2045, section 5.2 says if its invalid, use text/plain
580 if ctype.count('/') != 1:
581 return 'text/plain'
582 return ctype
583
584 def get_content_maintype(self):
585 """Return the message's main content type.
586
587 This is the `maintype' part of the string returned by
588 get_content_type().
589 """
590 ctype = self.get_content_type()
591 return ctype.split('/')[0]
592
593 def get_content_subtype(self):
594 """Returns the message's sub-content type.
595
596 This is the `subtype' part of the string returned by
597 get_content_type().
598 """
599 ctype = self.get_content_type()
600 return ctype.split('/')[1]
601
602 def get_default_type(self):
603 """Return the `default' content type.
604
605 Most messages have a default content type of text/plain, except for
606 messages that are subparts of multipart/digest containers. Such
607 subparts have a default content type of message/rfc822.
608 """
609 return self._default_type
610
611 def set_default_type(self, ctype):
612 """Set the `default' content type.
613
614 ctype should be either "text/plain" or "message/rfc822", although this
615 is not enforced. The default content type is not stored in the
616 Content-Type header.
617 """
618 self._default_type = ctype
619
620 def _get_params_preserve(self, failobj, header):
621 # Like get_params() but preserves the quoting of values. BAW:
622 # should this be part of the public interface?
623 missing = object()
624 value = self.get(header, missing)
625 if value is missing:
626 return failobj
627 params = []
R David Murraya2150232011-03-16 21:11:23 -0400628 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000629 try:
630 name, val = p.split('=', 1)
631 name = name.strip()
632 val = val.strip()
633 except ValueError:
634 # Must have been a bare attribute
635 name = p.strip()
636 val = ''
637 params.append((name, val))
638 params = utils.decode_params(params)
639 return params
640
641 def get_params(self, failobj=None, header='content-type', unquote=True):
642 """Return the message's Content-Type parameters, as a list.
643
644 The elements of the returned list are 2-tuples of key/value pairs, as
645 split on the `=' sign. The left hand side of the `=' is the key,
646 while the right hand side is the value. If there is no `=' sign in
647 the parameter the value is the empty string. The value is as
648 described in the get_param() method.
649
650 Optional failobj is the object to return if there is no Content-Type
651 header. Optional header is the header to search instead of
652 Content-Type. If unquote is True, the value is unquoted.
653 """
654 missing = object()
655 params = self._get_params_preserve(missing, header)
656 if params is missing:
657 return failobj
658 if unquote:
659 return [(k, _unquotevalue(v)) for k, v in params]
660 else:
661 return params
662
663 def get_param(self, param, failobj=None, header='content-type',
664 unquote=True):
665 """Return the parameter value if found in the Content-Type header.
666
667 Optional failobj is the object to return if there is no Content-Type
668 header, or the Content-Type header has no such parameter. Optional
669 header is the header to search instead of Content-Type.
670
671 Parameter keys are always compared case insensitively. The return
672 value can either be a string, or a 3-tuple if the parameter was RFC
673 2231 encoded. When it's a 3-tuple, the elements of the value are of
674 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
675 LANGUAGE can be None, in which case you should consider VALUE to be
676 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400677 The parameter value (either the returned string, or the VALUE item in
678 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679
R David Murray3ac8c782012-06-17 15:26:35 -0400680 If your application doesn't care whether the parameter was RFC 2231
681 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000682
R David Murray0de4d3e2013-11-03 12:23:23 -0500683 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400684 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000685
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000686 """
687 if header not in self:
688 return failobj
689 for k, v in self._get_params_preserve(failobj, header):
690 if k.lower() == param.lower():
691 if unquote:
692 return _unquotevalue(v)
693 else:
694 return v
695 return failobj
696
697 def set_param(self, param, value, header='Content-Type', requote=True,
R David Murray3da240f2013-10-16 22:48:40 -0400698 charset=None, language='', replace=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000699 """Set a parameter in the Content-Type header.
700
701 If the parameter already exists in the header, its value will be
702 replaced with the new value.
703
704 If header is Content-Type and has not yet been defined for this
705 message, it will be set to "text/plain" and the new parameter and
706 value will be appended as per RFC 2045.
707
708 An alternate header can specified in the header argument, and all
709 parameters will be quoted as necessary unless requote is False.
710
711 If charset is specified, the parameter will be encoded according to RFC
712 2231. Optional language specifies the RFC 2231 language, defaulting
713 to the empty string. Both charset and language should be strings.
714 """
715 if not isinstance(value, tuple) and charset:
716 value = (charset, language, value)
717
718 if header not in self and header.lower() == 'content-type':
719 ctype = 'text/plain'
720 else:
721 ctype = self.get(header)
722 if not self.get_param(param, header=header):
723 if not ctype:
724 ctype = _formatparam(param, value, requote)
725 else:
726 ctype = SEMISPACE.join(
727 [ctype, _formatparam(param, value, requote)])
728 else:
729 ctype = ''
730 for old_param, old_value in self.get_params(header=header,
731 unquote=requote):
732 append_param = ''
733 if old_param.lower() == param.lower():
734 append_param = _formatparam(param, value, requote)
735 else:
736 append_param = _formatparam(old_param, old_value, requote)
737 if not ctype:
738 ctype = append_param
739 else:
740 ctype = SEMISPACE.join([ctype, append_param])
741 if ctype != self.get(header):
R David Murray3da240f2013-10-16 22:48:40 -0400742 if replace:
743 self.replace_header(header, ctype)
744 else:
745 del self[header]
746 self[header] = ctype
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000747
748 def del_param(self, param, header='content-type', requote=True):
749 """Remove the given parameter completely from the Content-Type header.
750
751 The header will be re-written in place without the parameter or its
752 value. All values will be quoted as necessary unless requote is
753 False. Optional header specifies an alternative to the Content-Type
754 header.
755 """
756 if header not in self:
757 return
758 new_ctype = ''
759 for p, v in self.get_params(header=header, unquote=requote):
760 if p.lower() != param.lower():
761 if not new_ctype:
762 new_ctype = _formatparam(p, v, requote)
763 else:
764 new_ctype = SEMISPACE.join([new_ctype,
765 _formatparam(p, v, requote)])
766 if new_ctype != self.get(header):
767 del self[header]
768 self[header] = new_ctype
769
770 def set_type(self, type, header='Content-Type', requote=True):
771 """Set the main type and subtype for the Content-Type header.
772
773 type must be a string in the form "maintype/subtype", otherwise a
774 ValueError is raised.
775
776 This method replaces the Content-Type header, keeping all the
777 parameters in place. If requote is False, this leaves the existing
778 header's quoting as is. Otherwise, the parameters will be quoted (the
779 default).
780
781 An alternative header can be specified in the header argument. When
782 the Content-Type header is set, we'll always also add a MIME-Version
783 header.
784 """
785 # BAW: should we be strict?
786 if not type.count('/') == 1:
787 raise ValueError
788 # Set the Content-Type, you get a MIME-Version
789 if header.lower() == 'content-type':
790 del self['mime-version']
791 self['MIME-Version'] = '1.0'
792 if header not in self:
793 self[header] = type
794 return
795 params = self.get_params(header=header, unquote=requote)
796 del self[header]
797 self[header] = type
798 # Skip the first param; it's the old type.
799 for p, v in params[1:]:
800 self.set_param(p, v, header, requote)
801
802 def get_filename(self, failobj=None):
803 """Return the filename associated with the payload if present.
804
805 The filename is extracted from the Content-Disposition header's
806 `filename' parameter, and it is unquoted. If that header is missing
807 the `filename' parameter, this method falls back to looking for the
808 `name' parameter.
809 """
810 missing = object()
811 filename = self.get_param('filename', missing, 'content-disposition')
812 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000813 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000814 if filename is missing:
815 return failobj
816 return utils.collapse_rfc2231_value(filename).strip()
817
818 def get_boundary(self, failobj=None):
819 """Return the boundary associated with the payload if present.
820
821 The boundary is extracted from the Content-Type header's `boundary'
822 parameter, and it is unquoted.
823 """
824 missing = object()
825 boundary = self.get_param('boundary', missing)
826 if boundary is missing:
827 return failobj
828 # RFC 2046 says that boundaries may begin but not end in w/s
829 return utils.collapse_rfc2231_value(boundary).rstrip()
830
831 def set_boundary(self, boundary):
832 """Set the boundary parameter in Content-Type to 'boundary'.
833
834 This is subtly different than deleting the Content-Type header and
835 adding a new one with a new boundary parameter via add_header(). The
836 main difference is that using the set_boundary() method preserves the
837 order of the Content-Type header in the original message.
838
839 HeaderParseError is raised if the message has no Content-Type header.
840 """
841 missing = object()
842 params = self._get_params_preserve(missing, 'content-type')
843 if params is missing:
844 # There was no Content-Type header, and we don't know what type
845 # to set it to, so raise an exception.
846 raise errors.HeaderParseError('No Content-Type header found')
847 newparams = []
848 foundp = False
849 for pk, pv in params:
850 if pk.lower() == 'boundary':
851 newparams.append(('boundary', '"%s"' % boundary))
852 foundp = True
853 else:
854 newparams.append((pk, pv))
855 if not foundp:
856 # The original Content-Type header had no boundary attribute.
857 # Tack one on the end. BAW: should we raise an exception
858 # instead???
859 newparams.append(('boundary', '"%s"' % boundary))
860 # Replace the existing Content-Type header with the new value
861 newheaders = []
862 for h, v in self._headers:
863 if h.lower() == 'content-type':
864 parts = []
865 for k, v in newparams:
866 if v == '':
867 parts.append(k)
868 else:
869 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400870 val = SEMISPACE.join(parts)
871 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000872
873 else:
874 newheaders.append((h, v))
875 self._headers = newheaders
876
877 def get_content_charset(self, failobj=None):
878 """Return the charset parameter of the Content-Type header.
879
880 The returned string is always coerced to lower case. If there is no
881 Content-Type header, or if that header has no charset parameter,
882 failobj is returned.
883 """
884 missing = object()
885 charset = self.get_param('charset', missing)
886 if charset is missing:
887 return failobj
888 if isinstance(charset, tuple):
889 # RFC 2231 encoded, so decode it, and it better end up as ascii.
890 pcharset = charset[0] or 'us-ascii'
891 try:
892 # LookupError will be raised if the charset isn't known to
893 # Python. UnicodeError will be raised if the encoded text
894 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000895 as_bytes = charset[2].encode('raw-unicode-escape')
896 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897 except (LookupError, UnicodeError):
898 charset = charset[2]
899 # charset characters must be in us-ascii range
900 try:
901 charset.encode('us-ascii')
902 except UnicodeError:
903 return failobj
904 # RFC 2046, $4.1.2 says charsets are not case sensitive
905 return charset.lower()
906
907 def get_charsets(self, failobj=None):
908 """Return a list containing the charset(s) used in this message.
909
910 The returned list of items describes the Content-Type headers'
911 charset parameter for this message and all the subparts in its
912 payload.
913
914 Each item will either be a string (the value of the charset parameter
915 in the Content-Type header of that part) or the value of the
916 'failobj' parameter (defaults to None), if the part does not have a
917 main MIME type of "text", or the charset is not defined.
918
919 The list will contain one string for each part of the message, plus
920 one for the container message (i.e. self), so that a non-multipart
921 message will still return a list of length 1.
922 """
923 return [part.get_content_charset(failobj) for part in self.walk()]
924
925 # I.e. def walk(self): ...
926 from email.iterators import walk
R David Murray3da240f2013-10-16 22:48:40 -0400927
928
929class MIMEPart(Message):
930
931 def __init__(self, policy=None):
932 if policy is None:
933 from email.policy import default
934 policy = default
935 Message.__init__(self, policy)
936
937 @property
938 def is_attachment(self):
939 c_d = self.get('content-disposition')
940 if c_d is None:
941 return False
942 return c_d.lower() == 'attachment'
943
944 def _find_body(self, part, preferencelist):
945 if part.is_attachment:
946 return
947 maintype, subtype = part.get_content_type().split('/')
948 if maintype == 'text':
949 if subtype in preferencelist:
950 yield (preferencelist.index(subtype), part)
951 return
952 if maintype != 'multipart':
953 return
954 if subtype != 'related':
955 for subpart in part.iter_parts():
956 yield from self._find_body(subpart, preferencelist)
957 return
958 if 'related' in preferencelist:
959 yield (preferencelist.index('related'), part)
960 candidate = None
961 start = part.get_param('start')
962 if start:
963 for subpart in part.iter_parts():
964 if subpart['content-id'] == start:
965 candidate = subpart
966 break
967 if candidate is None:
968 subparts = part.get_payload()
969 candidate = subparts[0] if subparts else None
970 if candidate is not None:
971 yield from self._find_body(candidate, preferencelist)
972
973 def get_body(self, preferencelist=('related', 'html', 'plain')):
974 """Return best candidate mime part for display as 'body' of message.
975
976 Do a depth first search, starting with self, looking for the first part
977 matching each of the items in preferencelist, and return the part
978 corresponding to the first item that has a match, or None if no items
979 have a match. If 'related' is not included in preferencelist, consider
980 the root part of any multipart/related encountered as a candidate
981 match. Ignore parts with 'Content-Disposition: attachment'.
982 """
983 best_prio = len(preferencelist)
984 body = None
985 for prio, part in self._find_body(self, preferencelist):
986 if prio < best_prio:
987 best_prio = prio
988 body = part
989 if prio == 0:
990 break
991 return body
992
993 _body_types = {('text', 'plain'),
994 ('text', 'html'),
995 ('multipart', 'related'),
996 ('multipart', 'alternative')}
997 def iter_attachments(self):
998 """Return an iterator over the non-main parts of a multipart.
999
1000 Skip the first of each occurrence of text/plain, text/html,
1001 multipart/related, or multipart/alternative in the multipart (unless
1002 they have a 'Content-Disposition: attachment' header) and include all
1003 remaining subparts in the returned iterator. When applied to a
1004 multipart/related, return all parts except the root part. Return an
1005 empty iterator when applied to a multipart/alternative or a
1006 non-multipart.
1007 """
1008 maintype, subtype = self.get_content_type().split('/')
1009 if maintype != 'multipart' or subtype == 'alternative':
1010 return
1011 parts = self.get_payload()
1012 if maintype == 'multipart' and subtype == 'related':
1013 # For related, we treat everything but the root as an attachment.
1014 # The root may be indicated by 'start'; if there's no start or we
1015 # can't find the named start, treat the first subpart as the root.
1016 start = self.get_param('start')
1017 if start:
1018 found = False
1019 attachments = []
1020 for part in parts:
1021 if part.get('content-id') == start:
1022 found = True
1023 else:
1024 attachments.append(part)
1025 if found:
1026 yield from attachments
1027 return
1028 parts.pop(0)
1029 yield from parts
1030 return
1031 # Otherwise we more or less invert the remaining logic in get_body.
1032 # This only really works in edge cases (ex: non-text relateds or
1033 # alternatives) if the sending agent sets content-disposition.
1034 seen = [] # Only skip the first example of each candidate type.
1035 for part in parts:
1036 maintype, subtype = part.get_content_type().split('/')
1037 if ((maintype, subtype) in self._body_types and
1038 not part.is_attachment and subtype not in seen):
1039 seen.append(subtype)
1040 continue
1041 yield part
1042
1043 def iter_parts(self):
1044 """Return an iterator over all immediate subparts of a multipart.
1045
1046 Return an empty iterator for a non-multipart.
1047 """
1048 if self.get_content_maintype() == 'multipart':
1049 yield from self.get_payload()
1050
1051 def get_content(self, *args, content_manager=None, **kw):
1052 if content_manager is None:
1053 content_manager = self.policy.content_manager
1054 return content_manager.get_content(self, *args, **kw)
1055
1056 def set_content(self, *args, content_manager=None, **kw):
1057 if content_manager is None:
1058 content_manager = self.policy.content_manager
1059 content_manager.set_content(self, *args, **kw)
1060
1061 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1062 if self.get_content_maintype() == 'multipart':
1063 existing_subtype = self.get_content_subtype()
1064 disallowed_subtypes = disallowed_subtypes + (subtype,)
1065 if existing_subtype in disallowed_subtypes:
1066 raise ValueError("Cannot convert {} to {}".format(
1067 existing_subtype, subtype))
1068 keep_headers = []
1069 part_headers = []
1070 for name, value in self._headers:
1071 if name.lower().startswith('content-'):
1072 part_headers.append((name, value))
1073 else:
1074 keep_headers.append((name, value))
1075 if part_headers:
1076 # There is existing content, move it to the first subpart.
1077 part = type(self)(policy=self.policy)
1078 part._headers = part_headers
1079 part._payload = self._payload
1080 self._payload = [part]
1081 else:
1082 self._payload = []
1083 self._headers = keep_headers
1084 self['Content-Type'] = 'multipart/' + subtype
1085 if boundary is not None:
1086 self.set_param('boundary', boundary)
1087
1088 def make_related(self, boundary=None):
1089 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1090
1091 def make_alternative(self, boundary=None):
1092 self._make_multipart('alternative', ('mixed',), boundary)
1093
1094 def make_mixed(self, boundary=None):
1095 self._make_multipart('mixed', (), boundary)
1096
1097 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1098 if (self.get_content_maintype() != 'multipart' or
1099 self.get_content_subtype() != _subtype):
1100 getattr(self, 'make_' + _subtype)()
1101 part = type(self)(policy=self.policy)
1102 part.set_content(*args, **kw)
1103 if _disp and 'content-disposition' not in part:
1104 part['Content-Disposition'] = _disp
1105 self.attach(part)
1106
1107 def add_related(self, *args, **kw):
1108 self._add_multipart('related', *args, _disp='inline', **kw)
1109
1110 def add_alternative(self, *args, **kw):
1111 self._add_multipart('alternative', *args, **kw)
1112
1113 def add_attachment(self, *args, **kw):
1114 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1115
1116 def clear(self):
1117 self._headers = []
1118 self._payload = None
1119
1120 def clear_content(self):
1121 self._headers = [(n, v) for n, v in self._headers
1122 if not n.lower().startswith('content-')]
1123 self._payload = None
1124
1125
1126class EmailMessage(MIMEPart):
1127
1128 def set_content(self, *args, **kw):
1129 super().set_content(*args, **kw)
1130 if 'MIME-Version' not in self:
1131 self['MIME-Version'] = '1.0'