blob: 9d295fc3b4196014164038dc67dd0a47a871eaec [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011from io import BytesIO, StringIO
12
13# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014from email import utils
15from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040016from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000017from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040018from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000019Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
R. David Murray96fd54e2010-10-08 15:55:28 +000027
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000028def _splitparam(param):
29 # Split header parameters. BAW: this may be too simple. It isn't
30 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040031 # found in the wild. We may eventually need a full fledged parser.
32 # RDM: we might have a Header here; for now just stringify it.
33 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000034 if not sep:
35 return a.strip(), None
36 return a.strip(), b.strip()
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038def _formatparam(param, value=None, quote=True):
39 """Convenience function to format and return a key=value pair.
40
R. David Murray7ec754b2010-12-13 23:51:19 +000041 This will quote the value if needed or if quote is true. If value is a
42 three tuple (charset, language, value), it will be encoded according
43 to RFC2231 rules. If it contains non-ascii characters it will likewise
44 be encoded according to RFC2231 rules, using the utf-8 charset and
45 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000046 """
47 if value is not None and len(value) > 0:
48 # A tuple is used for RFC 2231 encoded parameter values where items
49 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000050 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 if isinstance(value, tuple):
52 # Encode as per RFC 2231
53 param += '*'
54 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000055 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000056 else:
57 try:
58 value.encode('ascii')
59 except UnicodeEncodeError:
60 param += '*'
61 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000062 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000063 # BAW: Please check this. I think that if quote is set it should
64 # force quoting even if not necessary.
65 if quote or tspecials.search(value):
66 return '%s="%s"' % (param, utils.quote(value))
67 else:
68 return '%s=%s' % (param, value)
69 else:
70 return param
71
72def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040073 # RDM This might be a Header, so for now stringify it.
74 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000075 plist = []
76 while s[:1] == ';':
77 s = s[1:]
78 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000079 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000080 end = s.find(';', end + 1)
81 if end < 0:
82 end = len(s)
83 f = s[:end]
84 if '=' in f:
85 i = f.index('=')
86 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
87 plist.append(f.strip())
88 s = s[end:]
89 return plist
90
91
92def _unquotevalue(value):
93 # This is different than utils.collapse_rfc2231_value() because it doesn't
94 # try to convert the value to a unicode. Message.get_param() and
95 # Message.get_params() are both currently defined to return the tuple in
96 # the face of RFC 2231 parameters.
97 if isinstance(value, tuple):
98 return value[0], value[1], utils.unquote(value[2])
99 else:
100 return utils.unquote(value)
101
102
103
104class Message:
105 """Basic message object.
106
107 A message object is defined as something that has a bunch of RFC 2822
108 headers and a payload. It may optionally have an envelope header
109 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
110 multipart or a message/rfc822), then the payload is a list of Message
111 objects, otherwise it is a string.
112
113 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000114 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000115 do in fact appear multiple times (e.g. Received) and for those headers,
116 you must use the explicit API to set or get all the headers. Not all of
117 the mapping methods are implemented.
118 """
R David Murrayc27e5222012-05-25 15:01:48 -0400119 def __init__(self, policy=compat32):
120 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000121 self._headers = []
122 self._unixfrom = None
123 self._payload = None
124 self._charset = None
125 # Defaults for multipart messages
126 self.preamble = self.epilogue = None
127 self.defects = []
128 # Default content type
129 self._default_type = 'text/plain'
130
131 def __str__(self):
132 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000133 """
134 return self.as_string()
135
R David Murraybb17d2b2013-08-09 16:15:28 -0400136 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000137 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000138
R David Murraybb17d2b2013-08-09 16:15:28 -0400139 Optional 'unixfrom', when true, means include the Unix From_ envelope
140 header. For backward compatibility reasons, if maxheaderlen is
141 not specified it defaults to 0, so you must override it explicitly
142 if you want a different maxheaderlen. 'policy' is passed to the
143 Generator instance used to serialize the mesasge; if it is not
144 specified the policy associated with the message instance is used.
145
146 If the message object contains binary data that is not encoded
147 according to RFC standards, the non-compliant data will be replaced by
148 unicode "unknown character" code points.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000149 """
150 from email.generator import Generator
R David Murraybb17d2b2013-08-09 16:15:28 -0400151 policy = self.policy if policy is None else policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000152 fp = StringIO()
R David Murraybb17d2b2013-08-09 16:15:28 -0400153 g = Generator(fp,
154 mangle_from_=False,
155 maxheaderlen=maxheaderlen,
156 policy=policy)
157 g.flatten(self, unixfrom=unixfrom)
158 return fp.getvalue()
159
160 def __bytes__(self):
161 """Return the entire formatted message as a bytes object.
162 """
163 return self.as_bytes()
164
165 def as_bytes(self, unixfrom=False, policy=None):
166 """Return the entire formatted message as a bytes object.
167
168 Optional 'unixfrom', when true, means include the Unix From_ envelope
169 header. 'policy' is passed to the BytesGenerator instance used to
170 serialize the message; if not specified the policy associated with
171 the message instance is used.
172 """
173 from email.generator import BytesGenerator
174 policy = self.policy if policy is None else policy
175 fp = BytesIO()
176 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000177 g.flatten(self, unixfrom=unixfrom)
178 return fp.getvalue()
179
180 def is_multipart(self):
181 """Return True if the message consists of multiple parts."""
182 return isinstance(self._payload, list)
183
184 #
185 # Unix From_ line
186 #
187 def set_unixfrom(self, unixfrom):
188 self._unixfrom = unixfrom
189
190 def get_unixfrom(self):
191 return self._unixfrom
192
193 #
194 # Payload manipulation.
195 #
196 def attach(self, payload):
197 """Add the given payload to the current payload.
198
199 The current payload will always be a list of objects after this method
200 is called. If you want to set the payload to a scalar object, use
201 set_payload() instead.
202 """
203 if self._payload is None:
204 self._payload = [payload]
205 else:
206 self._payload.append(payload)
207
208 def get_payload(self, i=None, decode=False):
209 """Return a reference to the payload.
210
211 The payload will either be a list object or a string. If you mutate
212 the list object, you modify the message's payload in place. Optional
213 i returns that index into the payload.
214
215 Optional decode is a flag indicating whether the payload should be
216 decoded or not, according to the Content-Transfer-Encoding header
217 (default is False).
218
219 When True and the message is not a multipart, the payload will be
220 decoded if this header's value is `quoted-printable' or `base64'. If
221 some other encoding is used, or the header is missing, or if the
222 payload has bogus data (i.e. bogus base64 or uuencoded data), the
223 payload is returned as-is.
224
225 If the message is a multipart and the decode flag is True, then None
226 is returned.
227 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000228 # Here is the logic table for this code, based on the email5.0.0 code:
229 # i decode is_multipart result
230 # ------ ------ ------------ ------------------------------
231 # None True True None
232 # i True True None
233 # None False True _payload (a list)
234 # i False True _payload element i (a Message)
235 # i False False error (not a list)
236 # i True False error (not a list)
237 # None False False _payload
238 # None True False _payload decoded (bytes)
239 # Note that Barry planned to factor out the 'decode' case, but that
240 # isn't so easy now that we handle the 8 bit data, which needs to be
241 # converted in both the decode and non-decode path.
242 if self.is_multipart():
243 if decode:
244 return None
245 if i is None:
246 return self._payload
247 else:
248 return self._payload[i]
249 # For backward compatibility, Use isinstance and this error message
250 # instead of the more logical is_multipart test.
251 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000253 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400254 # cte might be a Header, so for now stringify it.
255 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400256 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000257 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400258 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000259 bpayload = payload.encode('ascii', 'surrogateescape')
260 if not decode:
261 try:
262 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
263 except LookupError:
264 payload = bpayload.decode('ascii', 'replace')
265 elif decode:
266 try:
267 bpayload = payload.encode('ascii')
268 except UnicodeError:
269 # This won't happen for RFC compliant messages (messages
270 # containing only ASCII codepoints in the unicode input).
271 # If it does happen, turn the string into bytes in a way
272 # guaranteed not to fail.
273 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 if not decode:
275 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000276 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000277 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000278 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400279 # XXX: this is a bit of a hack; decode_b should probably be factored
280 # out somewhere, but I haven't figured out where yet.
281 value, defects = decode_b(b''.join(bpayload.splitlines()))
282 for defect in defects:
283 self.policy.handle_defect(self, defect)
284 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000285 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000286 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000287 out_file = BytesIO()
288 try:
289 uu.decode(in_file, out_file, quiet=True)
290 return out_file.getvalue()
291 except uu.Error:
292 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000293 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000294 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000295 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000296 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297
298 def set_payload(self, payload, charset=None):
299 """Set the payload to the given value.
300
301 Optional charset sets the message's default character set. See
302 set_charset() for details.
303 """
R David Murray50bfbb92013-12-11 16:52:11 -0500304 if hasattr(payload, 'encode'):
305 if charset is None:
306 try:
307 payload.encode('ascii', 'surrogateescape')
308 except UnicodeError:
309 raise TypeError("charset argument must be specified"
310 " when non-ASCII characters are used in the"
311 " payload") from None
312 self._payload = payload
313 return
314 if not isinstance(charset, Charset):
315 charset = Charset(charset)
316 payload = payload.encode(charset.output_charset)
317 if hasattr(payload, 'decode'):
318 self._payload = payload.decode('ascii', 'surrogateescape')
319 else:
320 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000321 if charset is not None:
322 self.set_charset(charset)
323
324 def set_charset(self, charset):
325 """Set the charset of the payload to a given character set.
326
327 charset can be a Charset instance, a string naming a character set, or
328 None. If it is a string it will be converted to a Charset instance.
329 If charset is None, the charset parameter will be removed from the
330 Content-Type field. Anything else will generate a TypeError.
331
332 The message will be assumed to be of type text/* encoded with
333 charset.input_charset. It will be converted to charset.output_charset
334 and encoded properly, if needed, when generating the plain text
335 representation of the message. MIME headers (MIME-Version,
336 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 """
338 if charset is None:
339 self.del_param('charset')
340 self._charset = None
341 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000342 if not isinstance(charset, Charset):
343 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000344 self._charset = charset
345 if 'MIME-Version' not in self:
346 self.add_header('MIME-Version', '1.0')
347 if 'Content-Type' not in self:
348 self.add_header('Content-Type', 'text/plain',
349 charset=charset.get_output_charset())
350 else:
351 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000352 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353 self._payload = charset.body_encode(self._payload)
354 if 'Content-Transfer-Encoding' not in self:
355 cte = charset.get_body_encoding()
356 try:
357 cte(self)
358 except TypeError:
R David Murray50bfbb92013-12-11 16:52:11 -0500359 self._payload = charset.body_encode(self.get_payload(decode=True))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000360 self.add_header('Content-Transfer-Encoding', cte)
361
362 def get_charset(self):
363 """Return the Charset instance associated with the message's payload.
364 """
365 return self._charset
366
367 #
368 # MAPPING INTERFACE (partial)
369 #
370 def __len__(self):
371 """Return the total number of headers, including duplicates."""
372 return len(self._headers)
373
374 def __getitem__(self, name):
375 """Get a header value.
376
377 Return None if the header is missing instead of raising an exception.
378
379 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000380 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000381 the values matching a header field name.
382 """
383 return self.get(name)
384
385 def __setitem__(self, name, val):
386 """Set the value of a header.
387
388 Note: this does not overwrite an existing header with the same field
389 name. Use __delitem__() first to delete any existing headers.
390 """
R David Murrayabfc3742012-05-29 09:14:44 -0400391 max_count = self.policy.header_max_count(name)
392 if max_count:
393 lname = name.lower()
394 found = 0
395 for k, v in self._headers:
396 if k.lower() == lname:
397 found += 1
398 if found >= max_count:
399 raise ValueError("There may be at most {} {} headers "
400 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400401 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000402
403 def __delitem__(self, name):
404 """Delete all occurrences of a header, if present.
405
406 Does not raise an exception if the header is missing.
407 """
408 name = name.lower()
409 newheaders = []
410 for k, v in self._headers:
411 if k.lower() != name:
412 newheaders.append((k, v))
413 self._headers = newheaders
414
415 def __contains__(self, name):
416 return name.lower() in [k.lower() for k, v in self._headers]
417
418 def __iter__(self):
419 for field, value in self._headers:
420 yield field
421
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000422 def keys(self):
423 """Return a list of all the message's header field names.
424
425 These will be sorted in the order they appeared in the original
426 message, or were added to the message, and may contain duplicates.
427 Any fields deleted and re-inserted are always appended to the header
428 list.
429 """
430 return [k for k, v in self._headers]
431
432 def values(self):
433 """Return a list of all the message's header values.
434
435 These will be sorted in the order they appeared in the original
436 message, or were added to the message, and may contain duplicates.
437 Any fields deleted and re-inserted are always appended to the header
438 list.
439 """
R David Murrayc27e5222012-05-25 15:01:48 -0400440 return [self.policy.header_fetch_parse(k, v)
441 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000442
443 def items(self):
444 """Get all the message's header fields and values.
445
446 These will be sorted in the order they appeared in the original
447 message, or were added to the message, and may contain duplicates.
448 Any fields deleted and re-inserted are always appended to the header
449 list.
450 """
R David Murrayc27e5222012-05-25 15:01:48 -0400451 return [(k, self.policy.header_fetch_parse(k, v))
452 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000453
454 def get(self, name, failobj=None):
455 """Get a header value.
456
457 Like __getitem__() but return failobj instead of None when the field
458 is missing.
459 """
460 name = name.lower()
461 for k, v in self._headers:
462 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400463 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000464 return failobj
465
466 #
R David Murrayc27e5222012-05-25 15:01:48 -0400467 # "Internal" methods (public API, but only intended for use by a parser
468 # or generator, not normal application code.
469 #
470
471 def set_raw(self, name, value):
472 """Store name and value in the model without modification.
473
474 This is an "internal" API, intended only for use by a parser.
475 """
476 self._headers.append((name, value))
477
478 def raw_items(self):
479 """Return the (name, value) header pairs without modification.
480
481 This is an "internal" API, intended only for use by a generator.
482 """
483 return iter(self._headers.copy())
484
485 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000486 # Additional useful stuff
487 #
488
489 def get_all(self, name, failobj=None):
490 """Return a list of all the values for the named field.
491
492 These will be sorted in the order they appeared in the original
493 message, and may contain duplicates. Any fields deleted and
494 re-inserted are always appended to the header list.
495
496 If no such fields exist, failobj is returned (defaults to None).
497 """
498 values = []
499 name = name.lower()
500 for k, v in self._headers:
501 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400502 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000503 if not values:
504 return failobj
505 return values
506
507 def add_header(self, _name, _value, **_params):
508 """Extended header setting.
509
510 name is the header field to add. keyword arguments can be used to set
511 additional parameters for the header field, with underscores converted
512 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000513 value is None, in which case only the key will be added. If a
514 parameter value contains non-ASCII characters it can be specified as a
515 three-tuple of (charset, language, value), in which case it will be
516 encoded according to RFC2231 rules. Otherwise it will be encoded using
517 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000518
R. David Murray7ec754b2010-12-13 23:51:19 +0000519 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000520
521 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000522 msg.add_header('content-disposition', 'attachment',
523 filename=('utf-8', '', Fußballer.ppt'))
524 msg.add_header('content-disposition', 'attachment',
525 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000526 """
527 parts = []
528 for k, v in _params.items():
529 if v is None:
530 parts.append(k.replace('_', '-'))
531 else:
532 parts.append(_formatparam(k.replace('_', '-'), v))
533 if _value is not None:
534 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400535 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000536
537 def replace_header(self, _name, _value):
538 """Replace a header.
539
540 Replace the first matching header found in the message, retaining
541 header order and case. If no matching header was found, a KeyError is
542 raised.
543 """
544 _name = _name.lower()
545 for i, (k, v) in zip(range(len(self._headers)), self._headers):
546 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400547 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000548 break
549 else:
550 raise KeyError(_name)
551
552 #
553 # Use these three methods instead of the three above.
554 #
555
556 def get_content_type(self):
557 """Return the message's content type.
558
559 The returned string is coerced to lower case of the form
560 `maintype/subtype'. If there was no Content-Type header in the
561 message, the default type as given by get_default_type() will be
562 returned. Since according to RFC 2045, messages always have a default
563 type this will always return a value.
564
565 RFC 2045 defines a message's default type to be text/plain unless it
566 appears inside a multipart/digest container, in which case it would be
567 message/rfc822.
568 """
569 missing = object()
570 value = self.get('content-type', missing)
571 if value is missing:
572 # This should have no parameters
573 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000574 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000575 # RFC 2045, section 5.2 says if its invalid, use text/plain
576 if ctype.count('/') != 1:
577 return 'text/plain'
578 return ctype
579
580 def get_content_maintype(self):
581 """Return the message's main content type.
582
583 This is the `maintype' part of the string returned by
584 get_content_type().
585 """
586 ctype = self.get_content_type()
587 return ctype.split('/')[0]
588
589 def get_content_subtype(self):
590 """Returns the message's sub-content type.
591
592 This is the `subtype' part of the string returned by
593 get_content_type().
594 """
595 ctype = self.get_content_type()
596 return ctype.split('/')[1]
597
598 def get_default_type(self):
599 """Return the `default' content type.
600
601 Most messages have a default content type of text/plain, except for
602 messages that are subparts of multipart/digest containers. Such
603 subparts have a default content type of message/rfc822.
604 """
605 return self._default_type
606
607 def set_default_type(self, ctype):
608 """Set the `default' content type.
609
610 ctype should be either "text/plain" or "message/rfc822", although this
611 is not enforced. The default content type is not stored in the
612 Content-Type header.
613 """
614 self._default_type = ctype
615
616 def _get_params_preserve(self, failobj, header):
617 # Like get_params() but preserves the quoting of values. BAW:
618 # should this be part of the public interface?
619 missing = object()
620 value = self.get(header, missing)
621 if value is missing:
622 return failobj
623 params = []
R David Murraya2150232011-03-16 21:11:23 -0400624 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000625 try:
626 name, val = p.split('=', 1)
627 name = name.strip()
628 val = val.strip()
629 except ValueError:
630 # Must have been a bare attribute
631 name = p.strip()
632 val = ''
633 params.append((name, val))
634 params = utils.decode_params(params)
635 return params
636
637 def get_params(self, failobj=None, header='content-type', unquote=True):
638 """Return the message's Content-Type parameters, as a list.
639
640 The elements of the returned list are 2-tuples of key/value pairs, as
641 split on the `=' sign. The left hand side of the `=' is the key,
642 while the right hand side is the value. If there is no `=' sign in
643 the parameter the value is the empty string. The value is as
644 described in the get_param() method.
645
646 Optional failobj is the object to return if there is no Content-Type
647 header. Optional header is the header to search instead of
648 Content-Type. If unquote is True, the value is unquoted.
649 """
650 missing = object()
651 params = self._get_params_preserve(missing, header)
652 if params is missing:
653 return failobj
654 if unquote:
655 return [(k, _unquotevalue(v)) for k, v in params]
656 else:
657 return params
658
659 def get_param(self, param, failobj=None, header='content-type',
660 unquote=True):
661 """Return the parameter value if found in the Content-Type header.
662
663 Optional failobj is the object to return if there is no Content-Type
664 header, or the Content-Type header has no such parameter. Optional
665 header is the header to search instead of Content-Type.
666
667 Parameter keys are always compared case insensitively. The return
668 value can either be a string, or a 3-tuple if the parameter was RFC
669 2231 encoded. When it's a 3-tuple, the elements of the value are of
670 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
671 LANGUAGE can be None, in which case you should consider VALUE to be
672 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400673 The parameter value (either the returned string, or the VALUE item in
674 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000675
R David Murray3ac8c782012-06-17 15:26:35 -0400676 If your application doesn't care whether the parameter was RFC 2231
677 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000678
R David Murray0de4d3e2013-11-03 12:23:23 -0500679 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400680 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000682 """
683 if header not in self:
684 return failobj
685 for k, v in self._get_params_preserve(failobj, header):
686 if k.lower() == param.lower():
687 if unquote:
688 return _unquotevalue(v)
689 else:
690 return v
691 return failobj
692
693 def set_param(self, param, value, header='Content-Type', requote=True,
R David Murray3da240f2013-10-16 22:48:40 -0400694 charset=None, language='', replace=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000695 """Set a parameter in the Content-Type header.
696
697 If the parameter already exists in the header, its value will be
698 replaced with the new value.
699
700 If header is Content-Type and has not yet been defined for this
701 message, it will be set to "text/plain" and the new parameter and
702 value will be appended as per RFC 2045.
703
704 An alternate header can specified in the header argument, and all
705 parameters will be quoted as necessary unless requote is False.
706
707 If charset is specified, the parameter will be encoded according to RFC
708 2231. Optional language specifies the RFC 2231 language, defaulting
709 to the empty string. Both charset and language should be strings.
710 """
711 if not isinstance(value, tuple) and charset:
712 value = (charset, language, value)
713
714 if header not in self and header.lower() == 'content-type':
715 ctype = 'text/plain'
716 else:
717 ctype = self.get(header)
718 if not self.get_param(param, header=header):
719 if not ctype:
720 ctype = _formatparam(param, value, requote)
721 else:
722 ctype = SEMISPACE.join(
723 [ctype, _formatparam(param, value, requote)])
724 else:
725 ctype = ''
726 for old_param, old_value in self.get_params(header=header,
727 unquote=requote):
728 append_param = ''
729 if old_param.lower() == param.lower():
730 append_param = _formatparam(param, value, requote)
731 else:
732 append_param = _formatparam(old_param, old_value, requote)
733 if not ctype:
734 ctype = append_param
735 else:
736 ctype = SEMISPACE.join([ctype, append_param])
737 if ctype != self.get(header):
R David Murray3da240f2013-10-16 22:48:40 -0400738 if replace:
739 self.replace_header(header, ctype)
740 else:
741 del self[header]
742 self[header] = ctype
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000743
744 def del_param(self, param, header='content-type', requote=True):
745 """Remove the given parameter completely from the Content-Type header.
746
747 The header will be re-written in place without the parameter or its
748 value. All values will be quoted as necessary unless requote is
749 False. Optional header specifies an alternative to the Content-Type
750 header.
751 """
752 if header not in self:
753 return
754 new_ctype = ''
755 for p, v in self.get_params(header=header, unquote=requote):
756 if p.lower() != param.lower():
757 if not new_ctype:
758 new_ctype = _formatparam(p, v, requote)
759 else:
760 new_ctype = SEMISPACE.join([new_ctype,
761 _formatparam(p, v, requote)])
762 if new_ctype != self.get(header):
763 del self[header]
764 self[header] = new_ctype
765
766 def set_type(self, type, header='Content-Type', requote=True):
767 """Set the main type and subtype for the Content-Type header.
768
769 type must be a string in the form "maintype/subtype", otherwise a
770 ValueError is raised.
771
772 This method replaces the Content-Type header, keeping all the
773 parameters in place. If requote is False, this leaves the existing
774 header's quoting as is. Otherwise, the parameters will be quoted (the
775 default).
776
777 An alternative header can be specified in the header argument. When
778 the Content-Type header is set, we'll always also add a MIME-Version
779 header.
780 """
781 # BAW: should we be strict?
782 if not type.count('/') == 1:
783 raise ValueError
784 # Set the Content-Type, you get a MIME-Version
785 if header.lower() == 'content-type':
786 del self['mime-version']
787 self['MIME-Version'] = '1.0'
788 if header not in self:
789 self[header] = type
790 return
791 params = self.get_params(header=header, unquote=requote)
792 del self[header]
793 self[header] = type
794 # Skip the first param; it's the old type.
795 for p, v in params[1:]:
796 self.set_param(p, v, header, requote)
797
798 def get_filename(self, failobj=None):
799 """Return the filename associated with the payload if present.
800
801 The filename is extracted from the Content-Disposition header's
802 `filename' parameter, and it is unquoted. If that header is missing
803 the `filename' parameter, this method falls back to looking for the
804 `name' parameter.
805 """
806 missing = object()
807 filename = self.get_param('filename', missing, 'content-disposition')
808 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000809 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000810 if filename is missing:
811 return failobj
812 return utils.collapse_rfc2231_value(filename).strip()
813
814 def get_boundary(self, failobj=None):
815 """Return the boundary associated with the payload if present.
816
817 The boundary is extracted from the Content-Type header's `boundary'
818 parameter, and it is unquoted.
819 """
820 missing = object()
821 boundary = self.get_param('boundary', missing)
822 if boundary is missing:
823 return failobj
824 # RFC 2046 says that boundaries may begin but not end in w/s
825 return utils.collapse_rfc2231_value(boundary).rstrip()
826
827 def set_boundary(self, boundary):
828 """Set the boundary parameter in Content-Type to 'boundary'.
829
830 This is subtly different than deleting the Content-Type header and
831 adding a new one with a new boundary parameter via add_header(). The
832 main difference is that using the set_boundary() method preserves the
833 order of the Content-Type header in the original message.
834
835 HeaderParseError is raised if the message has no Content-Type header.
836 """
837 missing = object()
838 params = self._get_params_preserve(missing, 'content-type')
839 if params is missing:
840 # There was no Content-Type header, and we don't know what type
841 # to set it to, so raise an exception.
842 raise errors.HeaderParseError('No Content-Type header found')
843 newparams = []
844 foundp = False
845 for pk, pv in params:
846 if pk.lower() == 'boundary':
847 newparams.append(('boundary', '"%s"' % boundary))
848 foundp = True
849 else:
850 newparams.append((pk, pv))
851 if not foundp:
852 # The original Content-Type header had no boundary attribute.
853 # Tack one on the end. BAW: should we raise an exception
854 # instead???
855 newparams.append(('boundary', '"%s"' % boundary))
856 # Replace the existing Content-Type header with the new value
857 newheaders = []
858 for h, v in self._headers:
859 if h.lower() == 'content-type':
860 parts = []
861 for k, v in newparams:
862 if v == '':
863 parts.append(k)
864 else:
865 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400866 val = SEMISPACE.join(parts)
867 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000868
869 else:
870 newheaders.append((h, v))
871 self._headers = newheaders
872
873 def get_content_charset(self, failobj=None):
874 """Return the charset parameter of the Content-Type header.
875
876 The returned string is always coerced to lower case. If there is no
877 Content-Type header, or if that header has no charset parameter,
878 failobj is returned.
879 """
880 missing = object()
881 charset = self.get_param('charset', missing)
882 if charset is missing:
883 return failobj
884 if isinstance(charset, tuple):
885 # RFC 2231 encoded, so decode it, and it better end up as ascii.
886 pcharset = charset[0] or 'us-ascii'
887 try:
888 # LookupError will be raised if the charset isn't known to
889 # Python. UnicodeError will be raised if the encoded text
890 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000891 as_bytes = charset[2].encode('raw-unicode-escape')
892 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000893 except (LookupError, UnicodeError):
894 charset = charset[2]
895 # charset characters must be in us-ascii range
896 try:
897 charset.encode('us-ascii')
898 except UnicodeError:
899 return failobj
900 # RFC 2046, $4.1.2 says charsets are not case sensitive
901 return charset.lower()
902
903 def get_charsets(self, failobj=None):
904 """Return a list containing the charset(s) used in this message.
905
906 The returned list of items describes the Content-Type headers'
907 charset parameter for this message and all the subparts in its
908 payload.
909
910 Each item will either be a string (the value of the charset parameter
911 in the Content-Type header of that part) or the value of the
912 'failobj' parameter (defaults to None), if the part does not have a
913 main MIME type of "text", or the charset is not defined.
914
915 The list will contain one string for each part of the message, plus
916 one for the container message (i.e. self), so that a non-multipart
917 message will still return a list of length 1.
918 """
919 return [part.get_content_charset(failobj) for part in self.walk()]
920
921 # I.e. def walk(self): ...
922 from email.iterators import walk
R David Murray3da240f2013-10-16 22:48:40 -0400923
924
925class MIMEPart(Message):
926
927 def __init__(self, policy=None):
928 if policy is None:
929 from email.policy import default
930 policy = default
931 Message.__init__(self, policy)
932
933 @property
934 def is_attachment(self):
935 c_d = self.get('content-disposition')
936 if c_d is None:
937 return False
938 return c_d.lower() == 'attachment'
939
940 def _find_body(self, part, preferencelist):
941 if part.is_attachment:
942 return
943 maintype, subtype = part.get_content_type().split('/')
944 if maintype == 'text':
945 if subtype in preferencelist:
946 yield (preferencelist.index(subtype), part)
947 return
948 if maintype != 'multipart':
949 return
950 if subtype != 'related':
951 for subpart in part.iter_parts():
952 yield from self._find_body(subpart, preferencelist)
953 return
954 if 'related' in preferencelist:
955 yield (preferencelist.index('related'), part)
956 candidate = None
957 start = part.get_param('start')
958 if start:
959 for subpart in part.iter_parts():
960 if subpart['content-id'] == start:
961 candidate = subpart
962 break
963 if candidate is None:
964 subparts = part.get_payload()
965 candidate = subparts[0] if subparts else None
966 if candidate is not None:
967 yield from self._find_body(candidate, preferencelist)
968
969 def get_body(self, preferencelist=('related', 'html', 'plain')):
970 """Return best candidate mime part for display as 'body' of message.
971
972 Do a depth first search, starting with self, looking for the first part
973 matching each of the items in preferencelist, and return the part
974 corresponding to the first item that has a match, or None if no items
975 have a match. If 'related' is not included in preferencelist, consider
976 the root part of any multipart/related encountered as a candidate
977 match. Ignore parts with 'Content-Disposition: attachment'.
978 """
979 best_prio = len(preferencelist)
980 body = None
981 for prio, part in self._find_body(self, preferencelist):
982 if prio < best_prio:
983 best_prio = prio
984 body = part
985 if prio == 0:
986 break
987 return body
988
989 _body_types = {('text', 'plain'),
990 ('text', 'html'),
991 ('multipart', 'related'),
992 ('multipart', 'alternative')}
993 def iter_attachments(self):
994 """Return an iterator over the non-main parts of a multipart.
995
996 Skip the first of each occurrence of text/plain, text/html,
997 multipart/related, or multipart/alternative in the multipart (unless
998 they have a 'Content-Disposition: attachment' header) and include all
999 remaining subparts in the returned iterator. When applied to a
1000 multipart/related, return all parts except the root part. Return an
1001 empty iterator when applied to a multipart/alternative or a
1002 non-multipart.
1003 """
1004 maintype, subtype = self.get_content_type().split('/')
1005 if maintype != 'multipart' or subtype == 'alternative':
1006 return
1007 parts = self.get_payload()
1008 if maintype == 'multipart' and subtype == 'related':
1009 # For related, we treat everything but the root as an attachment.
1010 # The root may be indicated by 'start'; if there's no start or we
1011 # can't find the named start, treat the first subpart as the root.
1012 start = self.get_param('start')
1013 if start:
1014 found = False
1015 attachments = []
1016 for part in parts:
1017 if part.get('content-id') == start:
1018 found = True
1019 else:
1020 attachments.append(part)
1021 if found:
1022 yield from attachments
1023 return
1024 parts.pop(0)
1025 yield from parts
1026 return
1027 # Otherwise we more or less invert the remaining logic in get_body.
1028 # This only really works in edge cases (ex: non-text relateds or
1029 # alternatives) if the sending agent sets content-disposition.
1030 seen = [] # Only skip the first example of each candidate type.
1031 for part in parts:
1032 maintype, subtype = part.get_content_type().split('/')
1033 if ((maintype, subtype) in self._body_types and
1034 not part.is_attachment and subtype not in seen):
1035 seen.append(subtype)
1036 continue
1037 yield part
1038
1039 def iter_parts(self):
1040 """Return an iterator over all immediate subparts of a multipart.
1041
1042 Return an empty iterator for a non-multipart.
1043 """
1044 if self.get_content_maintype() == 'multipart':
1045 yield from self.get_payload()
1046
1047 def get_content(self, *args, content_manager=None, **kw):
1048 if content_manager is None:
1049 content_manager = self.policy.content_manager
1050 return content_manager.get_content(self, *args, **kw)
1051
1052 def set_content(self, *args, content_manager=None, **kw):
1053 if content_manager is None:
1054 content_manager = self.policy.content_manager
1055 content_manager.set_content(self, *args, **kw)
1056
1057 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1058 if self.get_content_maintype() == 'multipart':
1059 existing_subtype = self.get_content_subtype()
1060 disallowed_subtypes = disallowed_subtypes + (subtype,)
1061 if existing_subtype in disallowed_subtypes:
1062 raise ValueError("Cannot convert {} to {}".format(
1063 existing_subtype, subtype))
1064 keep_headers = []
1065 part_headers = []
1066 for name, value in self._headers:
1067 if name.lower().startswith('content-'):
1068 part_headers.append((name, value))
1069 else:
1070 keep_headers.append((name, value))
1071 if part_headers:
1072 # There is existing content, move it to the first subpart.
1073 part = type(self)(policy=self.policy)
1074 part._headers = part_headers
1075 part._payload = self._payload
1076 self._payload = [part]
1077 else:
1078 self._payload = []
1079 self._headers = keep_headers
1080 self['Content-Type'] = 'multipart/' + subtype
1081 if boundary is not None:
1082 self.set_param('boundary', boundary)
1083
1084 def make_related(self, boundary=None):
1085 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1086
1087 def make_alternative(self, boundary=None):
1088 self._make_multipart('alternative', ('mixed',), boundary)
1089
1090 def make_mixed(self, boundary=None):
1091 self._make_multipart('mixed', (), boundary)
1092
1093 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1094 if (self.get_content_maintype() != 'multipart' or
1095 self.get_content_subtype() != _subtype):
1096 getattr(self, 'make_' + _subtype)()
1097 part = type(self)(policy=self.policy)
1098 part.set_content(*args, **kw)
1099 if _disp and 'content-disposition' not in part:
1100 part['Content-Disposition'] = _disp
1101 self.attach(part)
1102
1103 def add_related(self, *args, **kw):
1104 self._add_multipart('related', *args, _disp='inline', **kw)
1105
1106 def add_alternative(self, *args, **kw):
1107 self._add_multipart('alternative', *args, **kw)
1108
1109 def add_attachment(self, *args, **kw):
1110 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1111
1112 def clear(self):
1113 self._headers = []
1114 self._payload = None
1115
1116 def clear_content(self):
1117 self._headers = [(n, v) for n, v in self._headers
1118 if not n.lower().startswith('content-')]
1119 self._payload = None
1120
1121
1122class EmailMessage(MIMEPart):
1123
1124 def set_content(self, *args, **kw):
1125 super().set_content(*args, **kw)
1126 if 'MIME-Version' not in self:
1127 self['MIME-Version'] = '1.0'