blob: aa46debeba51ad8802b3a04eee400261e1eac8ef [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
R David Murray95a8dfb2014-03-23 14:18:44 -040011import quopri
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from io import BytesIO, StringIO
13
14# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015from email import utils
16from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040017from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000018from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040019from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000020Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021
22SEMISPACE = '; '
23
Guido van Rossum8b3febe2007-08-30 01:15:14 +000024# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000025# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000026tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
R. David Murray96fd54e2010-10-08 15:55:28 +000028
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000029def _splitparam(param):
30 # Split header parameters. BAW: this may be too simple. It isn't
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040032 # found in the wild. We may eventually need a full fledged parser.
33 # RDM: we might have a Header here; for now just stringify it.
34 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000035 if not sep:
36 return a.strip(), None
37 return a.strip(), b.strip()
38
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039def _formatparam(param, value=None, quote=True):
40 """Convenience function to format and return a key=value pair.
41
R. David Murray7ec754b2010-12-13 23:51:19 +000042 This will quote the value if needed or if quote is true. If value is a
43 three tuple (charset, language, value), it will be encoded according
44 to RFC2231 rules. If it contains non-ascii characters it will likewise
45 be encoded according to RFC2231 rules, using the utf-8 charset and
46 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047 """
48 if value is not None and len(value) > 0:
49 # A tuple is used for RFC 2231 encoded parameter values where items
50 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000051 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000052 if isinstance(value, tuple):
53 # Encode as per RFC 2231
54 param += '*'
55 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000056 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000057 else:
58 try:
59 value.encode('ascii')
60 except UnicodeEncodeError:
61 param += '*'
62 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000063 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000064 # BAW: Please check this. I think that if quote is set it should
65 # force quoting even if not necessary.
66 if quote or tspecials.search(value):
67 return '%s="%s"' % (param, utils.quote(value))
68 else:
69 return '%s=%s' % (param, value)
70 else:
71 return param
72
73def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040074 # RDM This might be a Header, so for now stringify it.
75 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000076 plist = []
77 while s[:1] == ';':
78 s = s[1:]
79 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000080 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000081 end = s.find(';', end + 1)
82 if end < 0:
83 end = len(s)
84 f = s[:end]
85 if '=' in f:
86 i = f.index('=')
87 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
88 plist.append(f.strip())
89 s = s[end:]
90 return plist
91
92
93def _unquotevalue(value):
94 # This is different than utils.collapse_rfc2231_value() because it doesn't
95 # try to convert the value to a unicode. Message.get_param() and
96 # Message.get_params() are both currently defined to return the tuple in
97 # the face of RFC 2231 parameters.
98 if isinstance(value, tuple):
99 return value[0], value[1], utils.unquote(value[2])
100 else:
101 return utils.unquote(value)
102
103
104
105class Message:
106 """Basic message object.
107
108 A message object is defined as something that has a bunch of RFC 2822
109 headers and a payload. It may optionally have an envelope header
110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
111 multipart or a message/rfc822), then the payload is a list of Message
112 objects, otherwise it is a string.
113
114 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000115 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000116 do in fact appear multiple times (e.g. Received) and for those headers,
117 you must use the explicit API to set or get all the headers. Not all of
118 the mapping methods are implemented.
119 """
R David Murrayc27e5222012-05-25 15:01:48 -0400120 def __init__(self, policy=compat32):
121 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000122 self._headers = []
123 self._unixfrom = None
124 self._payload = None
125 self._charset = None
126 # Defaults for multipart messages
127 self.preamble = self.epilogue = None
128 self.defects = []
129 # Default content type
130 self._default_type = 'text/plain'
131
132 def __str__(self):
133 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000134 """
135 return self.as_string()
136
R David Murraybb17d2b2013-08-09 16:15:28 -0400137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000138 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000139
R David Murraybb17d2b2013-08-09 16:15:28 -0400140 Optional 'unixfrom', when true, means include the Unix From_ envelope
141 header. For backward compatibility reasons, if maxheaderlen is
142 not specified it defaults to 0, so you must override it explicitly
143 if you want a different maxheaderlen. 'policy' is passed to the
144 Generator instance used to serialize the mesasge; if it is not
145 specified the policy associated with the message instance is used.
146
147 If the message object contains binary data that is not encoded
148 according to RFC standards, the non-compliant data will be replaced by
149 unicode "unknown character" code points.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000150 """
151 from email.generator import Generator
R David Murraybb17d2b2013-08-09 16:15:28 -0400152 policy = self.policy if policy is None else policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000153 fp = StringIO()
R David Murraybb17d2b2013-08-09 16:15:28 -0400154 g = Generator(fp,
155 mangle_from_=False,
156 maxheaderlen=maxheaderlen,
157 policy=policy)
158 g.flatten(self, unixfrom=unixfrom)
159 return fp.getvalue()
160
161 def __bytes__(self):
162 """Return the entire formatted message as a bytes object.
163 """
164 return self.as_bytes()
165
166 def as_bytes(self, unixfrom=False, policy=None):
167 """Return the entire formatted message as a bytes object.
168
169 Optional 'unixfrom', when true, means include the Unix From_ envelope
170 header. 'policy' is passed to the BytesGenerator instance used to
171 serialize the message; if not specified the policy associated with
172 the message instance is used.
173 """
174 from email.generator import BytesGenerator
175 policy = self.policy if policy is None else policy
176 fp = BytesIO()
177 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000178 g.flatten(self, unixfrom=unixfrom)
179 return fp.getvalue()
180
181 def is_multipart(self):
182 """Return True if the message consists of multiple parts."""
183 return isinstance(self._payload, list)
184
185 #
186 # Unix From_ line
187 #
188 def set_unixfrom(self, unixfrom):
189 self._unixfrom = unixfrom
190
191 def get_unixfrom(self):
192 return self._unixfrom
193
194 #
195 # Payload manipulation.
196 #
197 def attach(self, payload):
198 """Add the given payload to the current payload.
199
200 The current payload will always be a list of objects after this method
201 is called. If you want to set the payload to a scalar object, use
202 set_payload() instead.
203 """
204 if self._payload is None:
205 self._payload = [payload]
206 else:
R David Murray5dda1242014-03-06 11:44:17 -0500207 try:
208 self._payload.append(payload)
209 except AttributeError:
210 raise TypeError("Attach is not valid on a message with a"
211 " non-multipart payload")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212
213 def get_payload(self, i=None, decode=False):
214 """Return a reference to the payload.
215
216 The payload will either be a list object or a string. If you mutate
217 the list object, you modify the message's payload in place. Optional
218 i returns that index into the payload.
219
220 Optional decode is a flag indicating whether the payload should be
221 decoded or not, according to the Content-Transfer-Encoding header
222 (default is False).
223
224 When True and the message is not a multipart, the payload will be
225 decoded if this header's value is `quoted-printable' or `base64'. If
226 some other encoding is used, or the header is missing, or if the
227 payload has bogus data (i.e. bogus base64 or uuencoded data), the
228 payload is returned as-is.
229
230 If the message is a multipart and the decode flag is True, then None
231 is returned.
232 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000233 # Here is the logic table for this code, based on the email5.0.0 code:
234 # i decode is_multipart result
235 # ------ ------ ------------ ------------------------------
236 # None True True None
237 # i True True None
238 # None False True _payload (a list)
239 # i False True _payload element i (a Message)
240 # i False False error (not a list)
241 # i True False error (not a list)
242 # None False False _payload
243 # None True False _payload decoded (bytes)
244 # Note that Barry planned to factor out the 'decode' case, but that
245 # isn't so easy now that we handle the 8 bit data, which needs to be
246 # converted in both the decode and non-decode path.
247 if self.is_multipart():
248 if decode:
249 return None
250 if i is None:
251 return self._payload
252 else:
253 return self._payload[i]
254 # For backward compatibility, Use isinstance and this error message
255 # instead of the more logical is_multipart test.
256 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000257 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000258 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400259 # cte might be a Header, so for now stringify it.
260 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400261 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000262 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400263 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000264 bpayload = payload.encode('ascii', 'surrogateescape')
265 if not decode:
266 try:
267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
268 except LookupError:
269 payload = bpayload.decode('ascii', 'replace')
270 elif decode:
271 try:
272 bpayload = payload.encode('ascii')
273 except UnicodeError:
274 # This won't happen for RFC compliant messages (messages
275 # containing only ASCII codepoints in the unicode input).
276 # If it does happen, turn the string into bytes in a way
277 # guaranteed not to fail.
278 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000279 if not decode:
280 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000281 if cte == 'quoted-printable':
R David Murray95a8dfb2014-03-23 14:18:44 -0400282 return quopri.decodestring(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000283 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400284 # XXX: this is a bit of a hack; decode_b should probably be factored
285 # out somewhere, but I haven't figured out where yet.
286 value, defects = decode_b(b''.join(bpayload.splitlines()))
287 for defect in defects:
288 self.policy.handle_defect(self, defect)
289 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000291 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000292 out_file = BytesIO()
293 try:
294 uu.decode(in_file, out_file, quiet=True)
295 return out_file.getvalue()
296 except uu.Error:
297 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000298 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000299 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000300 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000301 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000302
303 def set_payload(self, payload, charset=None):
304 """Set the payload to the given value.
305
306 Optional charset sets the message's default character set. See
307 set_charset() for details.
308 """
R David Murray15a693a2014-02-07 12:46:17 -0500309 if hasattr(payload, 'encode'):
310 if charset is None:
R David Murray15a693a2014-02-07 12:46:17 -0500311 self._payload = payload
312 return
313 if not isinstance(charset, Charset):
314 charset = Charset(charset)
315 payload = payload.encode(charset.output_charset)
316 if hasattr(payload, 'decode'):
317 self._payload = payload.decode('ascii', 'surrogateescape')
318 else:
319 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 if charset is not None:
321 self.set_charset(charset)
322
323 def set_charset(self, charset):
324 """Set the charset of the payload to a given character set.
325
326 charset can be a Charset instance, a string naming a character set, or
327 None. If it is a string it will be converted to a Charset instance.
328 If charset is None, the charset parameter will be removed from the
329 Content-Type field. Anything else will generate a TypeError.
330
331 The message will be assumed to be of type text/* encoded with
332 charset.input_charset. It will be converted to charset.output_charset
333 and encoded properly, if needed, when generating the plain text
334 representation of the message. MIME headers (MIME-Version,
335 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 """
337 if charset is None:
338 self.del_param('charset')
339 self._charset = None
340 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000341 if not isinstance(charset, Charset):
342 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 self._charset = charset
344 if 'MIME-Version' not in self:
345 self.add_header('MIME-Version', '1.0')
346 if 'Content-Type' not in self:
347 self.add_header('Content-Type', 'text/plain',
348 charset=charset.get_output_charset())
349 else:
350 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000351 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 self._payload = charset.body_encode(self._payload)
353 if 'Content-Transfer-Encoding' not in self:
354 cte = charset.get_body_encoding()
355 try:
356 cte(self)
357 except TypeError:
R David Murrayfcc00722014-02-07 13:03:08 -0500358 # This 'if' is for backward compatibility, it allows unicode
359 # through even though that won't work correctly if the
360 # message is serialized.
R David Murray15a693a2014-02-07 12:46:17 -0500361 payload = self._payload
362 if payload:
363 try:
364 payload = payload.encode('ascii', 'surrogateescape')
365 except UnicodeError:
366 payload = payload.encode(charset.output_charset)
367 self._payload = charset.body_encode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000368 self.add_header('Content-Transfer-Encoding', cte)
369
370 def get_charset(self):
371 """Return the Charset instance associated with the message's payload.
372 """
373 return self._charset
374
375 #
376 # MAPPING INTERFACE (partial)
377 #
378 def __len__(self):
379 """Return the total number of headers, including duplicates."""
380 return len(self._headers)
381
382 def __getitem__(self, name):
383 """Get a header value.
384
385 Return None if the header is missing instead of raising an exception.
386
387 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000388 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000389 the values matching a header field name.
390 """
391 return self.get(name)
392
393 def __setitem__(self, name, val):
394 """Set the value of a header.
395
396 Note: this does not overwrite an existing header with the same field
397 name. Use __delitem__() first to delete any existing headers.
398 """
R David Murrayabfc3742012-05-29 09:14:44 -0400399 max_count = self.policy.header_max_count(name)
400 if max_count:
401 lname = name.lower()
402 found = 0
403 for k, v in self._headers:
404 if k.lower() == lname:
405 found += 1
406 if found >= max_count:
407 raise ValueError("There may be at most {} {} headers "
408 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400409 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000410
411 def __delitem__(self, name):
412 """Delete all occurrences of a header, if present.
413
414 Does not raise an exception if the header is missing.
415 """
416 name = name.lower()
417 newheaders = []
418 for k, v in self._headers:
419 if k.lower() != name:
420 newheaders.append((k, v))
421 self._headers = newheaders
422
423 def __contains__(self, name):
424 return name.lower() in [k.lower() for k, v in self._headers]
425
426 def __iter__(self):
427 for field, value in self._headers:
428 yield field
429
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000430 def keys(self):
431 """Return a list of all the message's header field names.
432
433 These will be sorted in the order they appeared in the original
434 message, or were added to the message, and may contain duplicates.
435 Any fields deleted and re-inserted are always appended to the header
436 list.
437 """
438 return [k for k, v in self._headers]
439
440 def values(self):
441 """Return a list of all the message's header values.
442
443 These will be sorted in the order they appeared in the original
444 message, or were added to the message, and may contain duplicates.
445 Any fields deleted and re-inserted are always appended to the header
446 list.
447 """
R David Murrayc27e5222012-05-25 15:01:48 -0400448 return [self.policy.header_fetch_parse(k, v)
449 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000450
451 def items(self):
452 """Get all the message's header fields and values.
453
454 These will be sorted in the order they appeared in the original
455 message, or were added to the message, and may contain duplicates.
456 Any fields deleted and re-inserted are always appended to the header
457 list.
458 """
R David Murrayc27e5222012-05-25 15:01:48 -0400459 return [(k, self.policy.header_fetch_parse(k, v))
460 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000461
462 def get(self, name, failobj=None):
463 """Get a header value.
464
465 Like __getitem__() but return failobj instead of None when the field
466 is missing.
467 """
468 name = name.lower()
469 for k, v in self._headers:
470 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400471 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000472 return failobj
473
474 #
R David Murrayc27e5222012-05-25 15:01:48 -0400475 # "Internal" methods (public API, but only intended for use by a parser
476 # or generator, not normal application code.
477 #
478
479 def set_raw(self, name, value):
480 """Store name and value in the model without modification.
481
482 This is an "internal" API, intended only for use by a parser.
483 """
484 self._headers.append((name, value))
485
486 def raw_items(self):
487 """Return the (name, value) header pairs without modification.
488
489 This is an "internal" API, intended only for use by a generator.
490 """
491 return iter(self._headers.copy())
492
493 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000494 # Additional useful stuff
495 #
496
497 def get_all(self, name, failobj=None):
498 """Return a list of all the values for the named field.
499
500 These will be sorted in the order they appeared in the original
501 message, and may contain duplicates. Any fields deleted and
502 re-inserted are always appended to the header list.
503
504 If no such fields exist, failobj is returned (defaults to None).
505 """
506 values = []
507 name = name.lower()
508 for k, v in self._headers:
509 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400510 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000511 if not values:
512 return failobj
513 return values
514
515 def add_header(self, _name, _value, **_params):
516 """Extended header setting.
517
518 name is the header field to add. keyword arguments can be used to set
519 additional parameters for the header field, with underscores converted
520 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000521 value is None, in which case only the key will be added. If a
522 parameter value contains non-ASCII characters it can be specified as a
523 three-tuple of (charset, language, value), in which case it will be
524 encoded according to RFC2231 rules. Otherwise it will be encoded using
525 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000526
R. David Murray7ec754b2010-12-13 23:51:19 +0000527 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000528
529 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000530 msg.add_header('content-disposition', 'attachment',
531 filename=('utf-8', '', Fußballer.ppt'))
532 msg.add_header('content-disposition', 'attachment',
533 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000534 """
535 parts = []
536 for k, v in _params.items():
537 if v is None:
538 parts.append(k.replace('_', '-'))
539 else:
540 parts.append(_formatparam(k.replace('_', '-'), v))
541 if _value is not None:
542 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400543 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000544
545 def replace_header(self, _name, _value):
546 """Replace a header.
547
548 Replace the first matching header found in the message, retaining
549 header order and case. If no matching header was found, a KeyError is
550 raised.
551 """
552 _name = _name.lower()
553 for i, (k, v) in zip(range(len(self._headers)), self._headers):
554 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400555 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000556 break
557 else:
558 raise KeyError(_name)
559
560 #
561 # Use these three methods instead of the three above.
562 #
563
564 def get_content_type(self):
565 """Return the message's content type.
566
567 The returned string is coerced to lower case of the form
568 `maintype/subtype'. If there was no Content-Type header in the
569 message, the default type as given by get_default_type() will be
570 returned. Since according to RFC 2045, messages always have a default
571 type this will always return a value.
572
573 RFC 2045 defines a message's default type to be text/plain unless it
574 appears inside a multipart/digest container, in which case it would be
575 message/rfc822.
576 """
577 missing = object()
578 value = self.get('content-type', missing)
579 if value is missing:
580 # This should have no parameters
581 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000582 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000583 # RFC 2045, section 5.2 says if its invalid, use text/plain
584 if ctype.count('/') != 1:
585 return 'text/plain'
586 return ctype
587
588 def get_content_maintype(self):
589 """Return the message's main content type.
590
591 This is the `maintype' part of the string returned by
592 get_content_type().
593 """
594 ctype = self.get_content_type()
595 return ctype.split('/')[0]
596
597 def get_content_subtype(self):
598 """Returns the message's sub-content type.
599
600 This is the `subtype' part of the string returned by
601 get_content_type().
602 """
603 ctype = self.get_content_type()
604 return ctype.split('/')[1]
605
606 def get_default_type(self):
607 """Return the `default' content type.
608
609 Most messages have a default content type of text/plain, except for
610 messages that are subparts of multipart/digest containers. Such
611 subparts have a default content type of message/rfc822.
612 """
613 return self._default_type
614
615 def set_default_type(self, ctype):
616 """Set the `default' content type.
617
618 ctype should be either "text/plain" or "message/rfc822", although this
619 is not enforced. The default content type is not stored in the
620 Content-Type header.
621 """
622 self._default_type = ctype
623
624 def _get_params_preserve(self, failobj, header):
625 # Like get_params() but preserves the quoting of values. BAW:
626 # should this be part of the public interface?
627 missing = object()
628 value = self.get(header, missing)
629 if value is missing:
630 return failobj
631 params = []
R David Murraya2150232011-03-16 21:11:23 -0400632 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000633 try:
634 name, val = p.split('=', 1)
635 name = name.strip()
636 val = val.strip()
637 except ValueError:
638 # Must have been a bare attribute
639 name = p.strip()
640 val = ''
641 params.append((name, val))
642 params = utils.decode_params(params)
643 return params
644
645 def get_params(self, failobj=None, header='content-type', unquote=True):
646 """Return the message's Content-Type parameters, as a list.
647
648 The elements of the returned list are 2-tuples of key/value pairs, as
649 split on the `=' sign. The left hand side of the `=' is the key,
650 while the right hand side is the value. If there is no `=' sign in
651 the parameter the value is the empty string. The value is as
652 described in the get_param() method.
653
654 Optional failobj is the object to return if there is no Content-Type
655 header. Optional header is the header to search instead of
656 Content-Type. If unquote is True, the value is unquoted.
657 """
658 missing = object()
659 params = self._get_params_preserve(missing, header)
660 if params is missing:
661 return failobj
662 if unquote:
663 return [(k, _unquotevalue(v)) for k, v in params]
664 else:
665 return params
666
667 def get_param(self, param, failobj=None, header='content-type',
668 unquote=True):
669 """Return the parameter value if found in the Content-Type header.
670
671 Optional failobj is the object to return if there is no Content-Type
672 header, or the Content-Type header has no such parameter. Optional
673 header is the header to search instead of Content-Type.
674
675 Parameter keys are always compared case insensitively. The return
676 value can either be a string, or a 3-tuple if the parameter was RFC
677 2231 encoded. When it's a 3-tuple, the elements of the value are of
678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
679 LANGUAGE can be None, in which case you should consider VALUE to be
680 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400681 The parameter value (either the returned string, or the VALUE item in
682 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000683
R David Murray3ac8c782012-06-17 15:26:35 -0400684 If your application doesn't care whether the parameter was RFC 2231
685 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000686
R David Murray0de4d3e2013-11-03 12:23:23 -0500687 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400688 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000689
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000690 """
691 if header not in self:
692 return failobj
693 for k, v in self._get_params_preserve(failobj, header):
694 if k.lower() == param.lower():
695 if unquote:
696 return _unquotevalue(v)
697 else:
698 return v
699 return failobj
700
701 def set_param(self, param, value, header='Content-Type', requote=True,
R David Murray3da240f2013-10-16 22:48:40 -0400702 charset=None, language='', replace=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000703 """Set a parameter in the Content-Type header.
704
705 If the parameter already exists in the header, its value will be
706 replaced with the new value.
707
708 If header is Content-Type and has not yet been defined for this
709 message, it will be set to "text/plain" and the new parameter and
710 value will be appended as per RFC 2045.
711
712 An alternate header can specified in the header argument, and all
713 parameters will be quoted as necessary unless requote is False.
714
715 If charset is specified, the parameter will be encoded according to RFC
716 2231. Optional language specifies the RFC 2231 language, defaulting
717 to the empty string. Both charset and language should be strings.
718 """
719 if not isinstance(value, tuple) and charset:
720 value = (charset, language, value)
721
722 if header not in self and header.lower() == 'content-type':
723 ctype = 'text/plain'
724 else:
725 ctype = self.get(header)
726 if not self.get_param(param, header=header):
727 if not ctype:
728 ctype = _formatparam(param, value, requote)
729 else:
730 ctype = SEMISPACE.join(
731 [ctype, _formatparam(param, value, requote)])
732 else:
733 ctype = ''
734 for old_param, old_value in self.get_params(header=header,
735 unquote=requote):
736 append_param = ''
737 if old_param.lower() == param.lower():
738 append_param = _formatparam(param, value, requote)
739 else:
740 append_param = _formatparam(old_param, old_value, requote)
741 if not ctype:
742 ctype = append_param
743 else:
744 ctype = SEMISPACE.join([ctype, append_param])
745 if ctype != self.get(header):
R David Murray3da240f2013-10-16 22:48:40 -0400746 if replace:
747 self.replace_header(header, ctype)
748 else:
749 del self[header]
750 self[header] = ctype
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000751
752 def del_param(self, param, header='content-type', requote=True):
753 """Remove the given parameter completely from the Content-Type header.
754
755 The header will be re-written in place without the parameter or its
756 value. All values will be quoted as necessary unless requote is
757 False. Optional header specifies an alternative to the Content-Type
758 header.
759 """
760 if header not in self:
761 return
762 new_ctype = ''
763 for p, v in self.get_params(header=header, unquote=requote):
764 if p.lower() != param.lower():
765 if not new_ctype:
766 new_ctype = _formatparam(p, v, requote)
767 else:
768 new_ctype = SEMISPACE.join([new_ctype,
769 _formatparam(p, v, requote)])
770 if new_ctype != self.get(header):
771 del self[header]
772 self[header] = new_ctype
773
774 def set_type(self, type, header='Content-Type', requote=True):
775 """Set the main type and subtype for the Content-Type header.
776
777 type must be a string in the form "maintype/subtype", otherwise a
778 ValueError is raised.
779
780 This method replaces the Content-Type header, keeping all the
781 parameters in place. If requote is False, this leaves the existing
782 header's quoting as is. Otherwise, the parameters will be quoted (the
783 default).
784
785 An alternative header can be specified in the header argument. When
786 the Content-Type header is set, we'll always also add a MIME-Version
787 header.
788 """
789 # BAW: should we be strict?
790 if not type.count('/') == 1:
791 raise ValueError
792 # Set the Content-Type, you get a MIME-Version
793 if header.lower() == 'content-type':
794 del self['mime-version']
795 self['MIME-Version'] = '1.0'
796 if header not in self:
797 self[header] = type
798 return
799 params = self.get_params(header=header, unquote=requote)
800 del self[header]
801 self[header] = type
802 # Skip the first param; it's the old type.
803 for p, v in params[1:]:
804 self.set_param(p, v, header, requote)
805
806 def get_filename(self, failobj=None):
807 """Return the filename associated with the payload if present.
808
809 The filename is extracted from the Content-Disposition header's
810 `filename' parameter, and it is unquoted. If that header is missing
811 the `filename' parameter, this method falls back to looking for the
812 `name' parameter.
813 """
814 missing = object()
815 filename = self.get_param('filename', missing, 'content-disposition')
816 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000817 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000818 if filename is missing:
819 return failobj
820 return utils.collapse_rfc2231_value(filename).strip()
821
822 def get_boundary(self, failobj=None):
823 """Return the boundary associated with the payload if present.
824
825 The boundary is extracted from the Content-Type header's `boundary'
826 parameter, and it is unquoted.
827 """
828 missing = object()
829 boundary = self.get_param('boundary', missing)
830 if boundary is missing:
831 return failobj
832 # RFC 2046 says that boundaries may begin but not end in w/s
833 return utils.collapse_rfc2231_value(boundary).rstrip()
834
835 def set_boundary(self, boundary):
836 """Set the boundary parameter in Content-Type to 'boundary'.
837
838 This is subtly different than deleting the Content-Type header and
839 adding a new one with a new boundary parameter via add_header(). The
840 main difference is that using the set_boundary() method preserves the
841 order of the Content-Type header in the original message.
842
843 HeaderParseError is raised if the message has no Content-Type header.
844 """
845 missing = object()
846 params = self._get_params_preserve(missing, 'content-type')
847 if params is missing:
848 # There was no Content-Type header, and we don't know what type
849 # to set it to, so raise an exception.
850 raise errors.HeaderParseError('No Content-Type header found')
851 newparams = []
852 foundp = False
853 for pk, pv in params:
854 if pk.lower() == 'boundary':
855 newparams.append(('boundary', '"%s"' % boundary))
856 foundp = True
857 else:
858 newparams.append((pk, pv))
859 if not foundp:
860 # The original Content-Type header had no boundary attribute.
861 # Tack one on the end. BAW: should we raise an exception
862 # instead???
863 newparams.append(('boundary', '"%s"' % boundary))
864 # Replace the existing Content-Type header with the new value
865 newheaders = []
866 for h, v in self._headers:
867 if h.lower() == 'content-type':
868 parts = []
869 for k, v in newparams:
870 if v == '':
871 parts.append(k)
872 else:
873 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400874 val = SEMISPACE.join(parts)
875 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000876
877 else:
878 newheaders.append((h, v))
879 self._headers = newheaders
880
881 def get_content_charset(self, failobj=None):
882 """Return the charset parameter of the Content-Type header.
883
884 The returned string is always coerced to lower case. If there is no
885 Content-Type header, or if that header has no charset parameter,
886 failobj is returned.
887 """
888 missing = object()
889 charset = self.get_param('charset', missing)
890 if charset is missing:
891 return failobj
892 if isinstance(charset, tuple):
893 # RFC 2231 encoded, so decode it, and it better end up as ascii.
894 pcharset = charset[0] or 'us-ascii'
895 try:
896 # LookupError will be raised if the charset isn't known to
897 # Python. UnicodeError will be raised if the encoded text
898 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000899 as_bytes = charset[2].encode('raw-unicode-escape')
900 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000901 except (LookupError, UnicodeError):
902 charset = charset[2]
903 # charset characters must be in us-ascii range
904 try:
905 charset.encode('us-ascii')
906 except UnicodeError:
907 return failobj
908 # RFC 2046, $4.1.2 says charsets are not case sensitive
909 return charset.lower()
910
911 def get_charsets(self, failobj=None):
912 """Return a list containing the charset(s) used in this message.
913
914 The returned list of items describes the Content-Type headers'
915 charset parameter for this message and all the subparts in its
916 payload.
917
918 Each item will either be a string (the value of the charset parameter
919 in the Content-Type header of that part) or the value of the
920 'failobj' parameter (defaults to None), if the part does not have a
921 main MIME type of "text", or the charset is not defined.
922
923 The list will contain one string for each part of the message, plus
924 one for the container message (i.e. self), so that a non-multipart
925 message will still return a list of length 1.
926 """
927 return [part.get_content_charset(failobj) for part in self.walk()]
928
929 # I.e. def walk(self): ...
930 from email.iterators import walk
R David Murray3da240f2013-10-16 22:48:40 -0400931
932
933class MIMEPart(Message):
934
935 def __init__(self, policy=None):
936 if policy is None:
937 from email.policy import default
938 policy = default
939 Message.__init__(self, policy)
940
941 @property
942 def is_attachment(self):
943 c_d = self.get('content-disposition')
944 if c_d is None:
945 return False
946 return c_d.lower() == 'attachment'
947
948 def _find_body(self, part, preferencelist):
949 if part.is_attachment:
950 return
951 maintype, subtype = part.get_content_type().split('/')
952 if maintype == 'text':
953 if subtype in preferencelist:
954 yield (preferencelist.index(subtype), part)
955 return
956 if maintype != 'multipart':
957 return
958 if subtype != 'related':
959 for subpart in part.iter_parts():
960 yield from self._find_body(subpart, preferencelist)
961 return
962 if 'related' in preferencelist:
963 yield (preferencelist.index('related'), part)
964 candidate = None
965 start = part.get_param('start')
966 if start:
967 for subpart in part.iter_parts():
968 if subpart['content-id'] == start:
969 candidate = subpart
970 break
971 if candidate is None:
972 subparts = part.get_payload()
973 candidate = subparts[0] if subparts else None
974 if candidate is not None:
975 yield from self._find_body(candidate, preferencelist)
976
977 def get_body(self, preferencelist=('related', 'html', 'plain')):
978 """Return best candidate mime part for display as 'body' of message.
979
980 Do a depth first search, starting with self, looking for the first part
981 matching each of the items in preferencelist, and return the part
982 corresponding to the first item that has a match, or None if no items
983 have a match. If 'related' is not included in preferencelist, consider
984 the root part of any multipart/related encountered as a candidate
985 match. Ignore parts with 'Content-Disposition: attachment'.
986 """
987 best_prio = len(preferencelist)
988 body = None
989 for prio, part in self._find_body(self, preferencelist):
990 if prio < best_prio:
991 best_prio = prio
992 body = part
993 if prio == 0:
994 break
995 return body
996
997 _body_types = {('text', 'plain'),
998 ('text', 'html'),
999 ('multipart', 'related'),
1000 ('multipart', 'alternative')}
1001 def iter_attachments(self):
1002 """Return an iterator over the non-main parts of a multipart.
1003
1004 Skip the first of each occurrence of text/plain, text/html,
1005 multipart/related, or multipart/alternative in the multipart (unless
1006 they have a 'Content-Disposition: attachment' header) and include all
1007 remaining subparts in the returned iterator. When applied to a
1008 multipart/related, return all parts except the root part. Return an
1009 empty iterator when applied to a multipart/alternative or a
1010 non-multipart.
1011 """
1012 maintype, subtype = self.get_content_type().split('/')
1013 if maintype != 'multipart' or subtype == 'alternative':
1014 return
1015 parts = self.get_payload()
1016 if maintype == 'multipart' and subtype == 'related':
1017 # For related, we treat everything but the root as an attachment.
1018 # The root may be indicated by 'start'; if there's no start or we
1019 # can't find the named start, treat the first subpart as the root.
1020 start = self.get_param('start')
1021 if start:
1022 found = False
1023 attachments = []
1024 for part in parts:
1025 if part.get('content-id') == start:
1026 found = True
1027 else:
1028 attachments.append(part)
1029 if found:
1030 yield from attachments
1031 return
1032 parts.pop(0)
1033 yield from parts
1034 return
1035 # Otherwise we more or less invert the remaining logic in get_body.
1036 # This only really works in edge cases (ex: non-text relateds or
1037 # alternatives) if the sending agent sets content-disposition.
1038 seen = [] # Only skip the first example of each candidate type.
1039 for part in parts:
1040 maintype, subtype = part.get_content_type().split('/')
1041 if ((maintype, subtype) in self._body_types and
1042 not part.is_attachment and subtype not in seen):
1043 seen.append(subtype)
1044 continue
1045 yield part
1046
1047 def iter_parts(self):
1048 """Return an iterator over all immediate subparts of a multipart.
1049
1050 Return an empty iterator for a non-multipart.
1051 """
1052 if self.get_content_maintype() == 'multipart':
1053 yield from self.get_payload()
1054
1055 def get_content(self, *args, content_manager=None, **kw):
1056 if content_manager is None:
1057 content_manager = self.policy.content_manager
1058 return content_manager.get_content(self, *args, **kw)
1059
1060 def set_content(self, *args, content_manager=None, **kw):
1061 if content_manager is None:
1062 content_manager = self.policy.content_manager
1063 content_manager.set_content(self, *args, **kw)
1064
1065 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1066 if self.get_content_maintype() == 'multipart':
1067 existing_subtype = self.get_content_subtype()
1068 disallowed_subtypes = disallowed_subtypes + (subtype,)
1069 if existing_subtype in disallowed_subtypes:
1070 raise ValueError("Cannot convert {} to {}".format(
1071 existing_subtype, subtype))
1072 keep_headers = []
1073 part_headers = []
1074 for name, value in self._headers:
1075 if name.lower().startswith('content-'):
1076 part_headers.append((name, value))
1077 else:
1078 keep_headers.append((name, value))
1079 if part_headers:
1080 # There is existing content, move it to the first subpart.
1081 part = type(self)(policy=self.policy)
1082 part._headers = part_headers
1083 part._payload = self._payload
1084 self._payload = [part]
1085 else:
1086 self._payload = []
1087 self._headers = keep_headers
1088 self['Content-Type'] = 'multipart/' + subtype
1089 if boundary is not None:
1090 self.set_param('boundary', boundary)
1091
1092 def make_related(self, boundary=None):
1093 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1094
1095 def make_alternative(self, boundary=None):
1096 self._make_multipart('alternative', ('mixed',), boundary)
1097
1098 def make_mixed(self, boundary=None):
1099 self._make_multipart('mixed', (), boundary)
1100
1101 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1102 if (self.get_content_maintype() != 'multipart' or
1103 self.get_content_subtype() != _subtype):
1104 getattr(self, 'make_' + _subtype)()
1105 part = type(self)(policy=self.policy)
1106 part.set_content(*args, **kw)
1107 if _disp and 'content-disposition' not in part:
1108 part['Content-Disposition'] = _disp
1109 self.attach(part)
1110
1111 def add_related(self, *args, **kw):
1112 self._add_multipart('related', *args, _disp='inline', **kw)
1113
1114 def add_alternative(self, *args, **kw):
1115 self._add_multipart('alternative', *args, **kw)
1116
1117 def add_attachment(self, *args, **kw):
1118 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1119
1120 def clear(self):
1121 self._headers = []
1122 self._payload = None
1123
1124 def clear_content(self):
1125 self._headers = [(n, v) for n, v in self._headers
1126 if not n.lower().startswith('content-')]
1127 self._payload = None
1128
1129
1130class EmailMessage(MIMEPart):
1131
1132 def set_content(self, *args, **kw):
1133 super().set_content(*args, **kw)
1134 if 'MIME-Version' not in self:
1135 self['MIME-Version'] = '1.0'