blob: 88b5fa3552404744590fd693949710a44a32958b [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011from io import BytesIO, StringIO
12
13# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014from email import utils
15from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040016from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000017from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040018from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000019Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
R. David Murray96fd54e2010-10-08 15:55:28 +000027
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000028def _splitparam(param):
29 # Split header parameters. BAW: this may be too simple. It isn't
30 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040031 # found in the wild. We may eventually need a full fledged parser.
32 # RDM: we might have a Header here; for now just stringify it.
33 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000034 if not sep:
35 return a.strip(), None
36 return a.strip(), b.strip()
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038def _formatparam(param, value=None, quote=True):
39 """Convenience function to format and return a key=value pair.
40
R. David Murray7ec754b2010-12-13 23:51:19 +000041 This will quote the value if needed or if quote is true. If value is a
42 three tuple (charset, language, value), it will be encoded according
43 to RFC2231 rules. If it contains non-ascii characters it will likewise
44 be encoded according to RFC2231 rules, using the utf-8 charset and
45 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000046 """
47 if value is not None and len(value) > 0:
48 # A tuple is used for RFC 2231 encoded parameter values where items
49 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000050 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 if isinstance(value, tuple):
52 # Encode as per RFC 2231
53 param += '*'
54 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000055 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000056 else:
57 try:
58 value.encode('ascii')
59 except UnicodeEncodeError:
60 param += '*'
61 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000062 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000063 # BAW: Please check this. I think that if quote is set it should
64 # force quoting even if not necessary.
65 if quote or tspecials.search(value):
66 return '%s="%s"' % (param, utils.quote(value))
67 else:
68 return '%s=%s' % (param, value)
69 else:
70 return param
71
72def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040073 # RDM This might be a Header, so for now stringify it.
74 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000075 plist = []
76 while s[:1] == ';':
77 s = s[1:]
78 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000079 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000080 end = s.find(';', end + 1)
81 if end < 0:
82 end = len(s)
83 f = s[:end]
84 if '=' in f:
85 i = f.index('=')
86 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
87 plist.append(f.strip())
88 s = s[end:]
89 return plist
90
91
92def _unquotevalue(value):
93 # This is different than utils.collapse_rfc2231_value() because it doesn't
94 # try to convert the value to a unicode. Message.get_param() and
95 # Message.get_params() are both currently defined to return the tuple in
96 # the face of RFC 2231 parameters.
97 if isinstance(value, tuple):
98 return value[0], value[1], utils.unquote(value[2])
99 else:
100 return utils.unquote(value)
101
102
103
104class Message:
105 """Basic message object.
106
107 A message object is defined as something that has a bunch of RFC 2822
108 headers and a payload. It may optionally have an envelope header
109 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
110 multipart or a message/rfc822), then the payload is a list of Message
111 objects, otherwise it is a string.
112
113 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000114 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000115 do in fact appear multiple times (e.g. Received) and for those headers,
116 you must use the explicit API to set or get all the headers. Not all of
117 the mapping methods are implemented.
118 """
R David Murrayc27e5222012-05-25 15:01:48 -0400119 def __init__(self, policy=compat32):
120 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000121 self._headers = []
122 self._unixfrom = None
123 self._payload = None
124 self._charset = None
125 # Defaults for multipart messages
126 self.preamble = self.epilogue = None
127 self.defects = []
128 # Default content type
129 self._default_type = 'text/plain'
130
131 def __str__(self):
132 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000133 """
134 return self.as_string()
135
R David Murraybb17d2b2013-08-09 16:15:28 -0400136 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000137 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000138
R David Murraybb17d2b2013-08-09 16:15:28 -0400139 Optional 'unixfrom', when true, means include the Unix From_ envelope
140 header. For backward compatibility reasons, if maxheaderlen is
141 not specified it defaults to 0, so you must override it explicitly
142 if you want a different maxheaderlen. 'policy' is passed to the
143 Generator instance used to serialize the mesasge; if it is not
144 specified the policy associated with the message instance is used.
145
146 If the message object contains binary data that is not encoded
147 according to RFC standards, the non-compliant data will be replaced by
148 unicode "unknown character" code points.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000149 """
150 from email.generator import Generator
R David Murraybb17d2b2013-08-09 16:15:28 -0400151 policy = self.policy if policy is None else policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000152 fp = StringIO()
R David Murraybb17d2b2013-08-09 16:15:28 -0400153 g = Generator(fp,
154 mangle_from_=False,
155 maxheaderlen=maxheaderlen,
156 policy=policy)
157 g.flatten(self, unixfrom=unixfrom)
158 return fp.getvalue()
159
160 def __bytes__(self):
161 """Return the entire formatted message as a bytes object.
162 """
163 return self.as_bytes()
164
165 def as_bytes(self, unixfrom=False, policy=None):
166 """Return the entire formatted message as a bytes object.
167
168 Optional 'unixfrom', when true, means include the Unix From_ envelope
169 header. 'policy' is passed to the BytesGenerator instance used to
170 serialize the message; if not specified the policy associated with
171 the message instance is used.
172 """
173 from email.generator import BytesGenerator
174 policy = self.policy if policy is None else policy
175 fp = BytesIO()
176 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000177 g.flatten(self, unixfrom=unixfrom)
178 return fp.getvalue()
179
180 def is_multipart(self):
181 """Return True if the message consists of multiple parts."""
182 return isinstance(self._payload, list)
183
184 #
185 # Unix From_ line
186 #
187 def set_unixfrom(self, unixfrom):
188 self._unixfrom = unixfrom
189
190 def get_unixfrom(self):
191 return self._unixfrom
192
193 #
194 # Payload manipulation.
195 #
196 def attach(self, payload):
197 """Add the given payload to the current payload.
198
199 The current payload will always be a list of objects after this method
200 is called. If you want to set the payload to a scalar object, use
201 set_payload() instead.
202 """
203 if self._payload is None:
204 self._payload = [payload]
205 else:
206 self._payload.append(payload)
207
208 def get_payload(self, i=None, decode=False):
209 """Return a reference to the payload.
210
211 The payload will either be a list object or a string. If you mutate
212 the list object, you modify the message's payload in place. Optional
213 i returns that index into the payload.
214
215 Optional decode is a flag indicating whether the payload should be
216 decoded or not, according to the Content-Transfer-Encoding header
217 (default is False).
218
219 When True and the message is not a multipart, the payload will be
220 decoded if this header's value is `quoted-printable' or `base64'. If
221 some other encoding is used, or the header is missing, or if the
222 payload has bogus data (i.e. bogus base64 or uuencoded data), the
223 payload is returned as-is.
224
225 If the message is a multipart and the decode flag is True, then None
226 is returned.
227 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000228 # Here is the logic table for this code, based on the email5.0.0 code:
229 # i decode is_multipart result
230 # ------ ------ ------------ ------------------------------
231 # None True True None
232 # i True True None
233 # None False True _payload (a list)
234 # i False True _payload element i (a Message)
235 # i False False error (not a list)
236 # i True False error (not a list)
237 # None False False _payload
238 # None True False _payload decoded (bytes)
239 # Note that Barry planned to factor out the 'decode' case, but that
240 # isn't so easy now that we handle the 8 bit data, which needs to be
241 # converted in both the decode and non-decode path.
242 if self.is_multipart():
243 if decode:
244 return None
245 if i is None:
246 return self._payload
247 else:
248 return self._payload[i]
249 # For backward compatibility, Use isinstance and this error message
250 # instead of the more logical is_multipart test.
251 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000253 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400254 # cte might be a Header, so for now stringify it.
255 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400256 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000257 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400258 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000259 bpayload = payload.encode('ascii', 'surrogateescape')
260 if not decode:
261 try:
262 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
263 except LookupError:
264 payload = bpayload.decode('ascii', 'replace')
265 elif decode:
266 try:
267 bpayload = payload.encode('ascii')
268 except UnicodeError:
269 # This won't happen for RFC compliant messages (messages
270 # containing only ASCII codepoints in the unicode input).
271 # If it does happen, turn the string into bytes in a way
272 # guaranteed not to fail.
273 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 if not decode:
275 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000276 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000277 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000278 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400279 # XXX: this is a bit of a hack; decode_b should probably be factored
280 # out somewhere, but I haven't figured out where yet.
281 value, defects = decode_b(b''.join(bpayload.splitlines()))
282 for defect in defects:
283 self.policy.handle_defect(self, defect)
284 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000285 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000286 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000287 out_file = BytesIO()
288 try:
289 uu.decode(in_file, out_file, quiet=True)
290 return out_file.getvalue()
291 except uu.Error:
292 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000293 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000294 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000295 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000296 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297
298 def set_payload(self, payload, charset=None):
299 """Set the payload to the given value.
300
301 Optional charset sets the message's default character set. See
302 set_charset() for details.
303 """
R David Murray15a693a2014-02-07 12:46:17 -0500304 if hasattr(payload, 'encode'):
305 if charset is None:
R David Murray15a693a2014-02-07 12:46:17 -0500306 self._payload = payload
307 return
308 if not isinstance(charset, Charset):
309 charset = Charset(charset)
310 payload = payload.encode(charset.output_charset)
311 if hasattr(payload, 'decode'):
312 self._payload = payload.decode('ascii', 'surrogateescape')
313 else:
314 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000315 if charset is not None:
316 self.set_charset(charset)
317
318 def set_charset(self, charset):
319 """Set the charset of the payload to a given character set.
320
321 charset can be a Charset instance, a string naming a character set, or
322 None. If it is a string it will be converted to a Charset instance.
323 If charset is None, the charset parameter will be removed from the
324 Content-Type field. Anything else will generate a TypeError.
325
326 The message will be assumed to be of type text/* encoded with
327 charset.input_charset. It will be converted to charset.output_charset
328 and encoded properly, if needed, when generating the plain text
329 representation of the message. MIME headers (MIME-Version,
330 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000331 """
332 if charset is None:
333 self.del_param('charset')
334 self._charset = None
335 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000336 if not isinstance(charset, Charset):
337 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 self._charset = charset
339 if 'MIME-Version' not in self:
340 self.add_header('MIME-Version', '1.0')
341 if 'Content-Type' not in self:
342 self.add_header('Content-Type', 'text/plain',
343 charset=charset.get_output_charset())
344 else:
345 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000346 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000347 self._payload = charset.body_encode(self._payload)
348 if 'Content-Transfer-Encoding' not in self:
349 cte = charset.get_body_encoding()
350 try:
351 cte(self)
352 except TypeError:
R David Murrayfcc00722014-02-07 13:03:08 -0500353 # This 'if' is for backward compatibility, it allows unicode
354 # through even though that won't work correctly if the
355 # message is serialized.
R David Murray15a693a2014-02-07 12:46:17 -0500356 payload = self._payload
357 if payload:
358 try:
359 payload = payload.encode('ascii', 'surrogateescape')
360 except UnicodeError:
361 payload = payload.encode(charset.output_charset)
362 self._payload = charset.body_encode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000363 self.add_header('Content-Transfer-Encoding', cte)
364
365 def get_charset(self):
366 """Return the Charset instance associated with the message's payload.
367 """
368 return self._charset
369
370 #
371 # MAPPING INTERFACE (partial)
372 #
373 def __len__(self):
374 """Return the total number of headers, including duplicates."""
375 return len(self._headers)
376
377 def __getitem__(self, name):
378 """Get a header value.
379
380 Return None if the header is missing instead of raising an exception.
381
382 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000383 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000384 the values matching a header field name.
385 """
386 return self.get(name)
387
388 def __setitem__(self, name, val):
389 """Set the value of a header.
390
391 Note: this does not overwrite an existing header with the same field
392 name. Use __delitem__() first to delete any existing headers.
393 """
R David Murrayabfc3742012-05-29 09:14:44 -0400394 max_count = self.policy.header_max_count(name)
395 if max_count:
396 lname = name.lower()
397 found = 0
398 for k, v in self._headers:
399 if k.lower() == lname:
400 found += 1
401 if found >= max_count:
402 raise ValueError("There may be at most {} {} headers "
403 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400404 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000405
406 def __delitem__(self, name):
407 """Delete all occurrences of a header, if present.
408
409 Does not raise an exception if the header is missing.
410 """
411 name = name.lower()
412 newheaders = []
413 for k, v in self._headers:
414 if k.lower() != name:
415 newheaders.append((k, v))
416 self._headers = newheaders
417
418 def __contains__(self, name):
419 return name.lower() in [k.lower() for k, v in self._headers]
420
421 def __iter__(self):
422 for field, value in self._headers:
423 yield field
424
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000425 def keys(self):
426 """Return a list of all the message's header field names.
427
428 These will be sorted in the order they appeared in the original
429 message, or were added to the message, and may contain duplicates.
430 Any fields deleted and re-inserted are always appended to the header
431 list.
432 """
433 return [k for k, v in self._headers]
434
435 def values(self):
436 """Return a list of all the message's header values.
437
438 These will be sorted in the order they appeared in the original
439 message, or were added to the message, and may contain duplicates.
440 Any fields deleted and re-inserted are always appended to the header
441 list.
442 """
R David Murrayc27e5222012-05-25 15:01:48 -0400443 return [self.policy.header_fetch_parse(k, v)
444 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000445
446 def items(self):
447 """Get all the message's header fields and values.
448
449 These will be sorted in the order they appeared in the original
450 message, or were added to the message, and may contain duplicates.
451 Any fields deleted and re-inserted are always appended to the header
452 list.
453 """
R David Murrayc27e5222012-05-25 15:01:48 -0400454 return [(k, self.policy.header_fetch_parse(k, v))
455 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000456
457 def get(self, name, failobj=None):
458 """Get a header value.
459
460 Like __getitem__() but return failobj instead of None when the field
461 is missing.
462 """
463 name = name.lower()
464 for k, v in self._headers:
465 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400466 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000467 return failobj
468
469 #
R David Murrayc27e5222012-05-25 15:01:48 -0400470 # "Internal" methods (public API, but only intended for use by a parser
471 # or generator, not normal application code.
472 #
473
474 def set_raw(self, name, value):
475 """Store name and value in the model without modification.
476
477 This is an "internal" API, intended only for use by a parser.
478 """
479 self._headers.append((name, value))
480
481 def raw_items(self):
482 """Return the (name, value) header pairs without modification.
483
484 This is an "internal" API, intended only for use by a generator.
485 """
486 return iter(self._headers.copy())
487
488 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000489 # Additional useful stuff
490 #
491
492 def get_all(self, name, failobj=None):
493 """Return a list of all the values for the named field.
494
495 These will be sorted in the order they appeared in the original
496 message, and may contain duplicates. Any fields deleted and
497 re-inserted are always appended to the header list.
498
499 If no such fields exist, failobj is returned (defaults to None).
500 """
501 values = []
502 name = name.lower()
503 for k, v in self._headers:
504 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400505 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000506 if not values:
507 return failobj
508 return values
509
510 def add_header(self, _name, _value, **_params):
511 """Extended header setting.
512
513 name is the header field to add. keyword arguments can be used to set
514 additional parameters for the header field, with underscores converted
515 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000516 value is None, in which case only the key will be added. If a
517 parameter value contains non-ASCII characters it can be specified as a
518 three-tuple of (charset, language, value), in which case it will be
519 encoded according to RFC2231 rules. Otherwise it will be encoded using
520 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000521
R. David Murray7ec754b2010-12-13 23:51:19 +0000522 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
524 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000525 msg.add_header('content-disposition', 'attachment',
526 filename=('utf-8', '', Fußballer.ppt'))
527 msg.add_header('content-disposition', 'attachment',
528 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000529 """
530 parts = []
531 for k, v in _params.items():
532 if v is None:
533 parts.append(k.replace('_', '-'))
534 else:
535 parts.append(_formatparam(k.replace('_', '-'), v))
536 if _value is not None:
537 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400538 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000539
540 def replace_header(self, _name, _value):
541 """Replace a header.
542
543 Replace the first matching header found in the message, retaining
544 header order and case. If no matching header was found, a KeyError is
545 raised.
546 """
547 _name = _name.lower()
548 for i, (k, v) in zip(range(len(self._headers)), self._headers):
549 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400550 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000551 break
552 else:
553 raise KeyError(_name)
554
555 #
556 # Use these three methods instead of the three above.
557 #
558
559 def get_content_type(self):
560 """Return the message's content type.
561
562 The returned string is coerced to lower case of the form
563 `maintype/subtype'. If there was no Content-Type header in the
564 message, the default type as given by get_default_type() will be
565 returned. Since according to RFC 2045, messages always have a default
566 type this will always return a value.
567
568 RFC 2045 defines a message's default type to be text/plain unless it
569 appears inside a multipart/digest container, in which case it would be
570 message/rfc822.
571 """
572 missing = object()
573 value = self.get('content-type', missing)
574 if value is missing:
575 # This should have no parameters
576 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000577 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000578 # RFC 2045, section 5.2 says if its invalid, use text/plain
579 if ctype.count('/') != 1:
580 return 'text/plain'
581 return ctype
582
583 def get_content_maintype(self):
584 """Return the message's main content type.
585
586 This is the `maintype' part of the string returned by
587 get_content_type().
588 """
589 ctype = self.get_content_type()
590 return ctype.split('/')[0]
591
592 def get_content_subtype(self):
593 """Returns the message's sub-content type.
594
595 This is the `subtype' part of the string returned by
596 get_content_type().
597 """
598 ctype = self.get_content_type()
599 return ctype.split('/')[1]
600
601 def get_default_type(self):
602 """Return the `default' content type.
603
604 Most messages have a default content type of text/plain, except for
605 messages that are subparts of multipart/digest containers. Such
606 subparts have a default content type of message/rfc822.
607 """
608 return self._default_type
609
610 def set_default_type(self, ctype):
611 """Set the `default' content type.
612
613 ctype should be either "text/plain" or "message/rfc822", although this
614 is not enforced. The default content type is not stored in the
615 Content-Type header.
616 """
617 self._default_type = ctype
618
619 def _get_params_preserve(self, failobj, header):
620 # Like get_params() but preserves the quoting of values. BAW:
621 # should this be part of the public interface?
622 missing = object()
623 value = self.get(header, missing)
624 if value is missing:
625 return failobj
626 params = []
R David Murraya2150232011-03-16 21:11:23 -0400627 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000628 try:
629 name, val = p.split('=', 1)
630 name = name.strip()
631 val = val.strip()
632 except ValueError:
633 # Must have been a bare attribute
634 name = p.strip()
635 val = ''
636 params.append((name, val))
637 params = utils.decode_params(params)
638 return params
639
640 def get_params(self, failobj=None, header='content-type', unquote=True):
641 """Return the message's Content-Type parameters, as a list.
642
643 The elements of the returned list are 2-tuples of key/value pairs, as
644 split on the `=' sign. The left hand side of the `=' is the key,
645 while the right hand side is the value. If there is no `=' sign in
646 the parameter the value is the empty string. The value is as
647 described in the get_param() method.
648
649 Optional failobj is the object to return if there is no Content-Type
650 header. Optional header is the header to search instead of
651 Content-Type. If unquote is True, the value is unquoted.
652 """
653 missing = object()
654 params = self._get_params_preserve(missing, header)
655 if params is missing:
656 return failobj
657 if unquote:
658 return [(k, _unquotevalue(v)) for k, v in params]
659 else:
660 return params
661
662 def get_param(self, param, failobj=None, header='content-type',
663 unquote=True):
664 """Return the parameter value if found in the Content-Type header.
665
666 Optional failobj is the object to return if there is no Content-Type
667 header, or the Content-Type header has no such parameter. Optional
668 header is the header to search instead of Content-Type.
669
670 Parameter keys are always compared case insensitively. The return
671 value can either be a string, or a 3-tuple if the parameter was RFC
672 2231 encoded. When it's a 3-tuple, the elements of the value are of
673 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
674 LANGUAGE can be None, in which case you should consider VALUE to be
675 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400676 The parameter value (either the returned string, or the VALUE item in
677 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000678
R David Murray3ac8c782012-06-17 15:26:35 -0400679 If your application doesn't care whether the parameter was RFC 2231
680 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681
R David Murray0de4d3e2013-11-03 12:23:23 -0500682 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400683 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000684
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000685 """
686 if header not in self:
687 return failobj
688 for k, v in self._get_params_preserve(failobj, header):
689 if k.lower() == param.lower():
690 if unquote:
691 return _unquotevalue(v)
692 else:
693 return v
694 return failobj
695
696 def set_param(self, param, value, header='Content-Type', requote=True,
R David Murray3da240f2013-10-16 22:48:40 -0400697 charset=None, language='', replace=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000698 """Set a parameter in the Content-Type header.
699
700 If the parameter already exists in the header, its value will be
701 replaced with the new value.
702
703 If header is Content-Type and has not yet been defined for this
704 message, it will be set to "text/plain" and the new parameter and
705 value will be appended as per RFC 2045.
706
707 An alternate header can specified in the header argument, and all
708 parameters will be quoted as necessary unless requote is False.
709
710 If charset is specified, the parameter will be encoded according to RFC
711 2231. Optional language specifies the RFC 2231 language, defaulting
712 to the empty string. Both charset and language should be strings.
713 """
714 if not isinstance(value, tuple) and charset:
715 value = (charset, language, value)
716
717 if header not in self and header.lower() == 'content-type':
718 ctype = 'text/plain'
719 else:
720 ctype = self.get(header)
721 if not self.get_param(param, header=header):
722 if not ctype:
723 ctype = _formatparam(param, value, requote)
724 else:
725 ctype = SEMISPACE.join(
726 [ctype, _formatparam(param, value, requote)])
727 else:
728 ctype = ''
729 for old_param, old_value in self.get_params(header=header,
730 unquote=requote):
731 append_param = ''
732 if old_param.lower() == param.lower():
733 append_param = _formatparam(param, value, requote)
734 else:
735 append_param = _formatparam(old_param, old_value, requote)
736 if not ctype:
737 ctype = append_param
738 else:
739 ctype = SEMISPACE.join([ctype, append_param])
740 if ctype != self.get(header):
R David Murray3da240f2013-10-16 22:48:40 -0400741 if replace:
742 self.replace_header(header, ctype)
743 else:
744 del self[header]
745 self[header] = ctype
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000746
747 def del_param(self, param, header='content-type', requote=True):
748 """Remove the given parameter completely from the Content-Type header.
749
750 The header will be re-written in place without the parameter or its
751 value. All values will be quoted as necessary unless requote is
752 False. Optional header specifies an alternative to the Content-Type
753 header.
754 """
755 if header not in self:
756 return
757 new_ctype = ''
758 for p, v in self.get_params(header=header, unquote=requote):
759 if p.lower() != param.lower():
760 if not new_ctype:
761 new_ctype = _formatparam(p, v, requote)
762 else:
763 new_ctype = SEMISPACE.join([new_ctype,
764 _formatparam(p, v, requote)])
765 if new_ctype != self.get(header):
766 del self[header]
767 self[header] = new_ctype
768
769 def set_type(self, type, header='Content-Type', requote=True):
770 """Set the main type and subtype for the Content-Type header.
771
772 type must be a string in the form "maintype/subtype", otherwise a
773 ValueError is raised.
774
775 This method replaces the Content-Type header, keeping all the
776 parameters in place. If requote is False, this leaves the existing
777 header's quoting as is. Otherwise, the parameters will be quoted (the
778 default).
779
780 An alternative header can be specified in the header argument. When
781 the Content-Type header is set, we'll always also add a MIME-Version
782 header.
783 """
784 # BAW: should we be strict?
785 if not type.count('/') == 1:
786 raise ValueError
787 # Set the Content-Type, you get a MIME-Version
788 if header.lower() == 'content-type':
789 del self['mime-version']
790 self['MIME-Version'] = '1.0'
791 if header not in self:
792 self[header] = type
793 return
794 params = self.get_params(header=header, unquote=requote)
795 del self[header]
796 self[header] = type
797 # Skip the first param; it's the old type.
798 for p, v in params[1:]:
799 self.set_param(p, v, header, requote)
800
801 def get_filename(self, failobj=None):
802 """Return the filename associated with the payload if present.
803
804 The filename is extracted from the Content-Disposition header's
805 `filename' parameter, and it is unquoted. If that header is missing
806 the `filename' parameter, this method falls back to looking for the
807 `name' parameter.
808 """
809 missing = object()
810 filename = self.get_param('filename', missing, 'content-disposition')
811 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000812 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000813 if filename is missing:
814 return failobj
815 return utils.collapse_rfc2231_value(filename).strip()
816
817 def get_boundary(self, failobj=None):
818 """Return the boundary associated with the payload if present.
819
820 The boundary is extracted from the Content-Type header's `boundary'
821 parameter, and it is unquoted.
822 """
823 missing = object()
824 boundary = self.get_param('boundary', missing)
825 if boundary is missing:
826 return failobj
827 # RFC 2046 says that boundaries may begin but not end in w/s
828 return utils.collapse_rfc2231_value(boundary).rstrip()
829
830 def set_boundary(self, boundary):
831 """Set the boundary parameter in Content-Type to 'boundary'.
832
833 This is subtly different than deleting the Content-Type header and
834 adding a new one with a new boundary parameter via add_header(). The
835 main difference is that using the set_boundary() method preserves the
836 order of the Content-Type header in the original message.
837
838 HeaderParseError is raised if the message has no Content-Type header.
839 """
840 missing = object()
841 params = self._get_params_preserve(missing, 'content-type')
842 if params is missing:
843 # There was no Content-Type header, and we don't know what type
844 # to set it to, so raise an exception.
845 raise errors.HeaderParseError('No Content-Type header found')
846 newparams = []
847 foundp = False
848 for pk, pv in params:
849 if pk.lower() == 'boundary':
850 newparams.append(('boundary', '"%s"' % boundary))
851 foundp = True
852 else:
853 newparams.append((pk, pv))
854 if not foundp:
855 # The original Content-Type header had no boundary attribute.
856 # Tack one on the end. BAW: should we raise an exception
857 # instead???
858 newparams.append(('boundary', '"%s"' % boundary))
859 # Replace the existing Content-Type header with the new value
860 newheaders = []
861 for h, v in self._headers:
862 if h.lower() == 'content-type':
863 parts = []
864 for k, v in newparams:
865 if v == '':
866 parts.append(k)
867 else:
868 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400869 val = SEMISPACE.join(parts)
870 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871
872 else:
873 newheaders.append((h, v))
874 self._headers = newheaders
875
876 def get_content_charset(self, failobj=None):
877 """Return the charset parameter of the Content-Type header.
878
879 The returned string is always coerced to lower case. If there is no
880 Content-Type header, or if that header has no charset parameter,
881 failobj is returned.
882 """
883 missing = object()
884 charset = self.get_param('charset', missing)
885 if charset is missing:
886 return failobj
887 if isinstance(charset, tuple):
888 # RFC 2231 encoded, so decode it, and it better end up as ascii.
889 pcharset = charset[0] or 'us-ascii'
890 try:
891 # LookupError will be raised if the charset isn't known to
892 # Python. UnicodeError will be raised if the encoded text
893 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000894 as_bytes = charset[2].encode('raw-unicode-escape')
895 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896 except (LookupError, UnicodeError):
897 charset = charset[2]
898 # charset characters must be in us-ascii range
899 try:
900 charset.encode('us-ascii')
901 except UnicodeError:
902 return failobj
903 # RFC 2046, $4.1.2 says charsets are not case sensitive
904 return charset.lower()
905
906 def get_charsets(self, failobj=None):
907 """Return a list containing the charset(s) used in this message.
908
909 The returned list of items describes the Content-Type headers'
910 charset parameter for this message and all the subparts in its
911 payload.
912
913 Each item will either be a string (the value of the charset parameter
914 in the Content-Type header of that part) or the value of the
915 'failobj' parameter (defaults to None), if the part does not have a
916 main MIME type of "text", or the charset is not defined.
917
918 The list will contain one string for each part of the message, plus
919 one for the container message (i.e. self), so that a non-multipart
920 message will still return a list of length 1.
921 """
922 return [part.get_content_charset(failobj) for part in self.walk()]
923
924 # I.e. def walk(self): ...
925 from email.iterators import walk
R David Murray3da240f2013-10-16 22:48:40 -0400926
927
928class MIMEPart(Message):
929
930 def __init__(self, policy=None):
931 if policy is None:
932 from email.policy import default
933 policy = default
934 Message.__init__(self, policy)
935
936 @property
937 def is_attachment(self):
938 c_d = self.get('content-disposition')
939 if c_d is None:
940 return False
941 return c_d.lower() == 'attachment'
942
943 def _find_body(self, part, preferencelist):
944 if part.is_attachment:
945 return
946 maintype, subtype = part.get_content_type().split('/')
947 if maintype == 'text':
948 if subtype in preferencelist:
949 yield (preferencelist.index(subtype), part)
950 return
951 if maintype != 'multipart':
952 return
953 if subtype != 'related':
954 for subpart in part.iter_parts():
955 yield from self._find_body(subpart, preferencelist)
956 return
957 if 'related' in preferencelist:
958 yield (preferencelist.index('related'), part)
959 candidate = None
960 start = part.get_param('start')
961 if start:
962 for subpart in part.iter_parts():
963 if subpart['content-id'] == start:
964 candidate = subpart
965 break
966 if candidate is None:
967 subparts = part.get_payload()
968 candidate = subparts[0] if subparts else None
969 if candidate is not None:
970 yield from self._find_body(candidate, preferencelist)
971
972 def get_body(self, preferencelist=('related', 'html', 'plain')):
973 """Return best candidate mime part for display as 'body' of message.
974
975 Do a depth first search, starting with self, looking for the first part
976 matching each of the items in preferencelist, and return the part
977 corresponding to the first item that has a match, or None if no items
978 have a match. If 'related' is not included in preferencelist, consider
979 the root part of any multipart/related encountered as a candidate
980 match. Ignore parts with 'Content-Disposition: attachment'.
981 """
982 best_prio = len(preferencelist)
983 body = None
984 for prio, part in self._find_body(self, preferencelist):
985 if prio < best_prio:
986 best_prio = prio
987 body = part
988 if prio == 0:
989 break
990 return body
991
992 _body_types = {('text', 'plain'),
993 ('text', 'html'),
994 ('multipart', 'related'),
995 ('multipart', 'alternative')}
996 def iter_attachments(self):
997 """Return an iterator over the non-main parts of a multipart.
998
999 Skip the first of each occurrence of text/plain, text/html,
1000 multipart/related, or multipart/alternative in the multipart (unless
1001 they have a 'Content-Disposition: attachment' header) and include all
1002 remaining subparts in the returned iterator. When applied to a
1003 multipart/related, return all parts except the root part. Return an
1004 empty iterator when applied to a multipart/alternative or a
1005 non-multipart.
1006 """
1007 maintype, subtype = self.get_content_type().split('/')
1008 if maintype != 'multipart' or subtype == 'alternative':
1009 return
1010 parts = self.get_payload()
1011 if maintype == 'multipart' and subtype == 'related':
1012 # For related, we treat everything but the root as an attachment.
1013 # The root may be indicated by 'start'; if there's no start or we
1014 # can't find the named start, treat the first subpart as the root.
1015 start = self.get_param('start')
1016 if start:
1017 found = False
1018 attachments = []
1019 for part in parts:
1020 if part.get('content-id') == start:
1021 found = True
1022 else:
1023 attachments.append(part)
1024 if found:
1025 yield from attachments
1026 return
1027 parts.pop(0)
1028 yield from parts
1029 return
1030 # Otherwise we more or less invert the remaining logic in get_body.
1031 # This only really works in edge cases (ex: non-text relateds or
1032 # alternatives) if the sending agent sets content-disposition.
1033 seen = [] # Only skip the first example of each candidate type.
1034 for part in parts:
1035 maintype, subtype = part.get_content_type().split('/')
1036 if ((maintype, subtype) in self._body_types and
1037 not part.is_attachment and subtype not in seen):
1038 seen.append(subtype)
1039 continue
1040 yield part
1041
1042 def iter_parts(self):
1043 """Return an iterator over all immediate subparts of a multipart.
1044
1045 Return an empty iterator for a non-multipart.
1046 """
1047 if self.get_content_maintype() == 'multipart':
1048 yield from self.get_payload()
1049
1050 def get_content(self, *args, content_manager=None, **kw):
1051 if content_manager is None:
1052 content_manager = self.policy.content_manager
1053 return content_manager.get_content(self, *args, **kw)
1054
1055 def set_content(self, *args, content_manager=None, **kw):
1056 if content_manager is None:
1057 content_manager = self.policy.content_manager
1058 content_manager.set_content(self, *args, **kw)
1059
1060 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1061 if self.get_content_maintype() == 'multipart':
1062 existing_subtype = self.get_content_subtype()
1063 disallowed_subtypes = disallowed_subtypes + (subtype,)
1064 if existing_subtype in disallowed_subtypes:
1065 raise ValueError("Cannot convert {} to {}".format(
1066 existing_subtype, subtype))
1067 keep_headers = []
1068 part_headers = []
1069 for name, value in self._headers:
1070 if name.lower().startswith('content-'):
1071 part_headers.append((name, value))
1072 else:
1073 keep_headers.append((name, value))
1074 if part_headers:
1075 # There is existing content, move it to the first subpart.
1076 part = type(self)(policy=self.policy)
1077 part._headers = part_headers
1078 part._payload = self._payload
1079 self._payload = [part]
1080 else:
1081 self._payload = []
1082 self._headers = keep_headers
1083 self['Content-Type'] = 'multipart/' + subtype
1084 if boundary is not None:
1085 self.set_param('boundary', boundary)
1086
1087 def make_related(self, boundary=None):
1088 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1089
1090 def make_alternative(self, boundary=None):
1091 self._make_multipart('alternative', ('mixed',), boundary)
1092
1093 def make_mixed(self, boundary=None):
1094 self._make_multipart('mixed', (), boundary)
1095
1096 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1097 if (self.get_content_maintype() != 'multipart' or
1098 self.get_content_subtype() != _subtype):
1099 getattr(self, 'make_' + _subtype)()
1100 part = type(self)(policy=self.policy)
1101 part.set_content(*args, **kw)
1102 if _disp and 'content-disposition' not in part:
1103 part['Content-Disposition'] = _disp
1104 self.attach(part)
1105
1106 def add_related(self, *args, **kw):
1107 self._add_multipart('related', *args, _disp='inline', **kw)
1108
1109 def add_alternative(self, *args, **kw):
1110 self._add_multipart('alternative', *args, **kw)
1111
1112 def add_attachment(self, *args, **kw):
1113 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1114
1115 def clear(self):
1116 self._headers = []
1117 self._payload = None
1118
1119 def clear_content(self):
1120 self._headers = [(n, v) for n, v in self._headers
1121 if not n.lower().startswith('content-')]
1122 self._payload = None
1123
1124
1125class EmailMessage(MIMEPart):
1126
1127 def set_content(self, *args, **kw):
1128 super().set_content(*args, **kw)
1129 if 'MIME-Version' not in self:
1130 self['MIME-Version'] = '1.0'