blob: b4bc8cbc9e54ee56fd8b6c377d00f861bc5742c9 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011from io import BytesIO, StringIO
12
13# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014from email import utils
15from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040016from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000017from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040018from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000019Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
R. David Murray96fd54e2010-10-08 15:55:28 +000027
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000028def _splitparam(param):
29 # Split header parameters. BAW: this may be too simple. It isn't
30 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040031 # found in the wild. We may eventually need a full fledged parser.
32 # RDM: we might have a Header here; for now just stringify it.
33 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000034 if not sep:
35 return a.strip(), None
36 return a.strip(), b.strip()
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038def _formatparam(param, value=None, quote=True):
39 """Convenience function to format and return a key=value pair.
40
R. David Murray7ec754b2010-12-13 23:51:19 +000041 This will quote the value if needed or if quote is true. If value is a
42 three tuple (charset, language, value), it will be encoded according
43 to RFC2231 rules. If it contains non-ascii characters it will likewise
44 be encoded according to RFC2231 rules, using the utf-8 charset and
45 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000046 """
47 if value is not None and len(value) > 0:
48 # A tuple is used for RFC 2231 encoded parameter values where items
49 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000050 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 if isinstance(value, tuple):
52 # Encode as per RFC 2231
53 param += '*'
54 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000055 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000056 else:
57 try:
58 value.encode('ascii')
59 except UnicodeEncodeError:
60 param += '*'
61 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000062 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000063 # BAW: Please check this. I think that if quote is set it should
64 # force quoting even if not necessary.
65 if quote or tspecials.search(value):
66 return '%s="%s"' % (param, utils.quote(value))
67 else:
68 return '%s=%s' % (param, value)
69 else:
70 return param
71
72def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040073 # RDM This might be a Header, so for now stringify it.
74 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000075 plist = []
76 while s[:1] == ';':
77 s = s[1:]
78 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000079 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000080 end = s.find(';', end + 1)
81 if end < 0:
82 end = len(s)
83 f = s[:end]
84 if '=' in f:
85 i = f.index('=')
86 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
87 plist.append(f.strip())
88 s = s[end:]
89 return plist
90
91
92def _unquotevalue(value):
93 # This is different than utils.collapse_rfc2231_value() because it doesn't
94 # try to convert the value to a unicode. Message.get_param() and
95 # Message.get_params() are both currently defined to return the tuple in
96 # the face of RFC 2231 parameters.
97 if isinstance(value, tuple):
98 return value[0], value[1], utils.unquote(value[2])
99 else:
100 return utils.unquote(value)
101
102
103
104class Message:
105 """Basic message object.
106
107 A message object is defined as something that has a bunch of RFC 2822
108 headers and a payload. It may optionally have an envelope header
109 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
110 multipart or a message/rfc822), then the payload is a list of Message
111 objects, otherwise it is a string.
112
113 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000114 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000115 do in fact appear multiple times (e.g. Received) and for those headers,
116 you must use the explicit API to set or get all the headers. Not all of
117 the mapping methods are implemented.
118 """
R David Murrayc27e5222012-05-25 15:01:48 -0400119 def __init__(self, policy=compat32):
120 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000121 self._headers = []
122 self._unixfrom = None
123 self._payload = None
124 self._charset = None
125 # Defaults for multipart messages
126 self.preamble = self.epilogue = None
127 self.defects = []
128 # Default content type
129 self._default_type = 'text/plain'
130
131 def __str__(self):
132 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000133 """
134 return self.as_string()
135
R David Murraybb17d2b2013-08-09 16:15:28 -0400136 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000137 """Return the entire formatted message as a string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000138
R David Murraybb17d2b2013-08-09 16:15:28 -0400139 Optional 'unixfrom', when true, means include the Unix From_ envelope
140 header. For backward compatibility reasons, if maxheaderlen is
141 not specified it defaults to 0, so you must override it explicitly
142 if you want a different maxheaderlen. 'policy' is passed to the
143 Generator instance used to serialize the mesasge; if it is not
144 specified the policy associated with the message instance is used.
145
146 If the message object contains binary data that is not encoded
147 according to RFC standards, the non-compliant data will be replaced by
148 unicode "unknown character" code points.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000149 """
150 from email.generator import Generator
R David Murraybb17d2b2013-08-09 16:15:28 -0400151 policy = self.policy if policy is None else policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000152 fp = StringIO()
R David Murraybb17d2b2013-08-09 16:15:28 -0400153 g = Generator(fp,
154 mangle_from_=False,
155 maxheaderlen=maxheaderlen,
156 policy=policy)
157 g.flatten(self, unixfrom=unixfrom)
158 return fp.getvalue()
159
160 def __bytes__(self):
161 """Return the entire formatted message as a bytes object.
162 """
163 return self.as_bytes()
164
165 def as_bytes(self, unixfrom=False, policy=None):
166 """Return the entire formatted message as a bytes object.
167
168 Optional 'unixfrom', when true, means include the Unix From_ envelope
169 header. 'policy' is passed to the BytesGenerator instance used to
170 serialize the message; if not specified the policy associated with
171 the message instance is used.
172 """
173 from email.generator import BytesGenerator
174 policy = self.policy if policy is None else policy
175 fp = BytesIO()
176 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000177 g.flatten(self, unixfrom=unixfrom)
178 return fp.getvalue()
179
180 def is_multipart(self):
181 """Return True if the message consists of multiple parts."""
182 return isinstance(self._payload, list)
183
184 #
185 # Unix From_ line
186 #
187 def set_unixfrom(self, unixfrom):
188 self._unixfrom = unixfrom
189
190 def get_unixfrom(self):
191 return self._unixfrom
192
193 #
194 # Payload manipulation.
195 #
196 def attach(self, payload):
197 """Add the given payload to the current payload.
198
199 The current payload will always be a list of objects after this method
200 is called. If you want to set the payload to a scalar object, use
201 set_payload() instead.
202 """
203 if self._payload is None:
204 self._payload = [payload]
205 else:
R David Murray5dda1242014-03-06 11:44:17 -0500206 try:
207 self._payload.append(payload)
208 except AttributeError:
209 raise TypeError("Attach is not valid on a message with a"
210 " non-multipart payload")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211
212 def get_payload(self, i=None, decode=False):
213 """Return a reference to the payload.
214
215 The payload will either be a list object or a string. If you mutate
216 the list object, you modify the message's payload in place. Optional
217 i returns that index into the payload.
218
219 Optional decode is a flag indicating whether the payload should be
220 decoded or not, according to the Content-Transfer-Encoding header
221 (default is False).
222
223 When True and the message is not a multipart, the payload will be
224 decoded if this header's value is `quoted-printable' or `base64'. If
225 some other encoding is used, or the header is missing, or if the
226 payload has bogus data (i.e. bogus base64 or uuencoded data), the
227 payload is returned as-is.
228
229 If the message is a multipart and the decode flag is True, then None
230 is returned.
231 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000232 # Here is the logic table for this code, based on the email5.0.0 code:
233 # i decode is_multipart result
234 # ------ ------ ------------ ------------------------------
235 # None True True None
236 # i True True None
237 # None False True _payload (a list)
238 # i False True _payload element i (a Message)
239 # i False False error (not a list)
240 # i True False error (not a list)
241 # None False False _payload
242 # None True False _payload decoded (bytes)
243 # Note that Barry planned to factor out the 'decode' case, but that
244 # isn't so easy now that we handle the 8 bit data, which needs to be
245 # converted in both the decode and non-decode path.
246 if self.is_multipart():
247 if decode:
248 return None
249 if i is None:
250 return self._payload
251 else:
252 return self._payload[i]
253 # For backward compatibility, Use isinstance and this error message
254 # instead of the more logical is_multipart test.
255 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000257 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400258 # cte might be a Header, so for now stringify it.
259 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400260 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000261 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400262 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000263 bpayload = payload.encode('ascii', 'surrogateescape')
264 if not decode:
265 try:
266 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
267 except LookupError:
268 payload = bpayload.decode('ascii', 'replace')
269 elif decode:
270 try:
271 bpayload = payload.encode('ascii')
272 except UnicodeError:
273 # This won't happen for RFC compliant messages (messages
274 # containing only ASCII codepoints in the unicode input).
275 # If it does happen, turn the string into bytes in a way
276 # guaranteed not to fail.
277 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000278 if not decode:
279 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000280 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000281 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000282 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400283 # XXX: this is a bit of a hack; decode_b should probably be factored
284 # out somewhere, but I haven't figured out where yet.
285 value, defects = decode_b(b''.join(bpayload.splitlines()))
286 for defect in defects:
287 self.policy.handle_defect(self, defect)
288 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000289 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000290 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000291 out_file = BytesIO()
292 try:
293 uu.decode(in_file, out_file, quiet=True)
294 return out_file.getvalue()
295 except uu.Error:
296 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000297 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000298 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000299 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000300 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000301
302 def set_payload(self, payload, charset=None):
303 """Set the payload to the given value.
304
305 Optional charset sets the message's default character set. See
306 set_charset() for details.
307 """
R David Murray15a693a2014-02-07 12:46:17 -0500308 if hasattr(payload, 'encode'):
309 if charset is None:
R David Murray15a693a2014-02-07 12:46:17 -0500310 self._payload = payload
311 return
312 if not isinstance(charset, Charset):
313 charset = Charset(charset)
314 payload = payload.encode(charset.output_charset)
315 if hasattr(payload, 'decode'):
316 self._payload = payload.decode('ascii', 'surrogateescape')
317 else:
318 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000319 if charset is not None:
320 self.set_charset(charset)
321
322 def set_charset(self, charset):
323 """Set the charset of the payload to a given character set.
324
325 charset can be a Charset instance, a string naming a character set, or
326 None. If it is a string it will be converted to a Charset instance.
327 If charset is None, the charset parameter will be removed from the
328 Content-Type field. Anything else will generate a TypeError.
329
330 The message will be assumed to be of type text/* encoded with
331 charset.input_charset. It will be converted to charset.output_charset
332 and encoded properly, if needed, when generating the plain text
333 representation of the message. MIME headers (MIME-Version,
334 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 """
336 if charset is None:
337 self.del_param('charset')
338 self._charset = None
339 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000340 if not isinstance(charset, Charset):
341 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342 self._charset = charset
343 if 'MIME-Version' not in self:
344 self.add_header('MIME-Version', '1.0')
345 if 'Content-Type' not in self:
346 self.add_header('Content-Type', 'text/plain',
347 charset=charset.get_output_charset())
348 else:
349 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000350 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000351 self._payload = charset.body_encode(self._payload)
352 if 'Content-Transfer-Encoding' not in self:
353 cte = charset.get_body_encoding()
354 try:
355 cte(self)
356 except TypeError:
R David Murrayfcc00722014-02-07 13:03:08 -0500357 # This 'if' is for backward compatibility, it allows unicode
358 # through even though that won't work correctly if the
359 # message is serialized.
R David Murray15a693a2014-02-07 12:46:17 -0500360 payload = self._payload
361 if payload:
362 try:
363 payload = payload.encode('ascii', 'surrogateescape')
364 except UnicodeError:
365 payload = payload.encode(charset.output_charset)
366 self._payload = charset.body_encode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000367 self.add_header('Content-Transfer-Encoding', cte)
368
369 def get_charset(self):
370 """Return the Charset instance associated with the message's payload.
371 """
372 return self._charset
373
374 #
375 # MAPPING INTERFACE (partial)
376 #
377 def __len__(self):
378 """Return the total number of headers, including duplicates."""
379 return len(self._headers)
380
381 def __getitem__(self, name):
382 """Get a header value.
383
384 Return None if the header is missing instead of raising an exception.
385
386 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000387 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000388 the values matching a header field name.
389 """
390 return self.get(name)
391
392 def __setitem__(self, name, val):
393 """Set the value of a header.
394
395 Note: this does not overwrite an existing header with the same field
396 name. Use __delitem__() first to delete any existing headers.
397 """
R David Murrayabfc3742012-05-29 09:14:44 -0400398 max_count = self.policy.header_max_count(name)
399 if max_count:
400 lname = name.lower()
401 found = 0
402 for k, v in self._headers:
403 if k.lower() == lname:
404 found += 1
405 if found >= max_count:
406 raise ValueError("There may be at most {} {} headers "
407 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400408 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409
410 def __delitem__(self, name):
411 """Delete all occurrences of a header, if present.
412
413 Does not raise an exception if the header is missing.
414 """
415 name = name.lower()
416 newheaders = []
417 for k, v in self._headers:
418 if k.lower() != name:
419 newheaders.append((k, v))
420 self._headers = newheaders
421
422 def __contains__(self, name):
423 return name.lower() in [k.lower() for k, v in self._headers]
424
425 def __iter__(self):
426 for field, value in self._headers:
427 yield field
428
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000429 def keys(self):
430 """Return a list of all the message's header field names.
431
432 These will be sorted in the order they appeared in the original
433 message, or were added to the message, and may contain duplicates.
434 Any fields deleted and re-inserted are always appended to the header
435 list.
436 """
437 return [k for k, v in self._headers]
438
439 def values(self):
440 """Return a list of all the message's header values.
441
442 These will be sorted in the order they appeared in the original
443 message, or were added to the message, and may contain duplicates.
444 Any fields deleted and re-inserted are always appended to the header
445 list.
446 """
R David Murrayc27e5222012-05-25 15:01:48 -0400447 return [self.policy.header_fetch_parse(k, v)
448 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000449
450 def items(self):
451 """Get all the message's header fields and values.
452
453 These will be sorted in the order they appeared in the original
454 message, or were added to the message, and may contain duplicates.
455 Any fields deleted and re-inserted are always appended to the header
456 list.
457 """
R David Murrayc27e5222012-05-25 15:01:48 -0400458 return [(k, self.policy.header_fetch_parse(k, v))
459 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000460
461 def get(self, name, failobj=None):
462 """Get a header value.
463
464 Like __getitem__() but return failobj instead of None when the field
465 is missing.
466 """
467 name = name.lower()
468 for k, v in self._headers:
469 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400470 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000471 return failobj
472
473 #
R David Murrayc27e5222012-05-25 15:01:48 -0400474 # "Internal" methods (public API, but only intended for use by a parser
475 # or generator, not normal application code.
476 #
477
478 def set_raw(self, name, value):
479 """Store name and value in the model without modification.
480
481 This is an "internal" API, intended only for use by a parser.
482 """
483 self._headers.append((name, value))
484
485 def raw_items(self):
486 """Return the (name, value) header pairs without modification.
487
488 This is an "internal" API, intended only for use by a generator.
489 """
490 return iter(self._headers.copy())
491
492 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000493 # Additional useful stuff
494 #
495
496 def get_all(self, name, failobj=None):
497 """Return a list of all the values for the named field.
498
499 These will be sorted in the order they appeared in the original
500 message, and may contain duplicates. Any fields deleted and
501 re-inserted are always appended to the header list.
502
503 If no such fields exist, failobj is returned (defaults to None).
504 """
505 values = []
506 name = name.lower()
507 for k, v in self._headers:
508 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400509 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000510 if not values:
511 return failobj
512 return values
513
514 def add_header(self, _name, _value, **_params):
515 """Extended header setting.
516
517 name is the header field to add. keyword arguments can be used to set
518 additional parameters for the header field, with underscores converted
519 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000520 value is None, in which case only the key will be added. If a
521 parameter value contains non-ASCII characters it can be specified as a
522 three-tuple of (charset, language, value), in which case it will be
523 encoded according to RFC2231 rules. Otherwise it will be encoded using
524 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000525
R. David Murray7ec754b2010-12-13 23:51:19 +0000526 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000527
528 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000529 msg.add_header('content-disposition', 'attachment',
530 filename=('utf-8', '', Fußballer.ppt'))
531 msg.add_header('content-disposition', 'attachment',
532 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533 """
534 parts = []
535 for k, v in _params.items():
536 if v is None:
537 parts.append(k.replace('_', '-'))
538 else:
539 parts.append(_formatparam(k.replace('_', '-'), v))
540 if _value is not None:
541 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400542 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000543
544 def replace_header(self, _name, _value):
545 """Replace a header.
546
547 Replace the first matching header found in the message, retaining
548 header order and case. If no matching header was found, a KeyError is
549 raised.
550 """
551 _name = _name.lower()
552 for i, (k, v) in zip(range(len(self._headers)), self._headers):
553 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400554 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000555 break
556 else:
557 raise KeyError(_name)
558
559 #
560 # Use these three methods instead of the three above.
561 #
562
563 def get_content_type(self):
564 """Return the message's content type.
565
566 The returned string is coerced to lower case of the form
567 `maintype/subtype'. If there was no Content-Type header in the
568 message, the default type as given by get_default_type() will be
569 returned. Since according to RFC 2045, messages always have a default
570 type this will always return a value.
571
572 RFC 2045 defines a message's default type to be text/plain unless it
573 appears inside a multipart/digest container, in which case it would be
574 message/rfc822.
575 """
576 missing = object()
577 value = self.get('content-type', missing)
578 if value is missing:
579 # This should have no parameters
580 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000581 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000582 # RFC 2045, section 5.2 says if its invalid, use text/plain
583 if ctype.count('/') != 1:
584 return 'text/plain'
585 return ctype
586
587 def get_content_maintype(self):
588 """Return the message's main content type.
589
590 This is the `maintype' part of the string returned by
591 get_content_type().
592 """
593 ctype = self.get_content_type()
594 return ctype.split('/')[0]
595
596 def get_content_subtype(self):
597 """Returns the message's sub-content type.
598
599 This is the `subtype' part of the string returned by
600 get_content_type().
601 """
602 ctype = self.get_content_type()
603 return ctype.split('/')[1]
604
605 def get_default_type(self):
606 """Return the `default' content type.
607
608 Most messages have a default content type of text/plain, except for
609 messages that are subparts of multipart/digest containers. Such
610 subparts have a default content type of message/rfc822.
611 """
612 return self._default_type
613
614 def set_default_type(self, ctype):
615 """Set the `default' content type.
616
617 ctype should be either "text/plain" or "message/rfc822", although this
618 is not enforced. The default content type is not stored in the
619 Content-Type header.
620 """
621 self._default_type = ctype
622
623 def _get_params_preserve(self, failobj, header):
624 # Like get_params() but preserves the quoting of values. BAW:
625 # should this be part of the public interface?
626 missing = object()
627 value = self.get(header, missing)
628 if value is missing:
629 return failobj
630 params = []
R David Murraya2150232011-03-16 21:11:23 -0400631 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000632 try:
633 name, val = p.split('=', 1)
634 name = name.strip()
635 val = val.strip()
636 except ValueError:
637 # Must have been a bare attribute
638 name = p.strip()
639 val = ''
640 params.append((name, val))
641 params = utils.decode_params(params)
642 return params
643
644 def get_params(self, failobj=None, header='content-type', unquote=True):
645 """Return the message's Content-Type parameters, as a list.
646
647 The elements of the returned list are 2-tuples of key/value pairs, as
648 split on the `=' sign. The left hand side of the `=' is the key,
649 while the right hand side is the value. If there is no `=' sign in
650 the parameter the value is the empty string. The value is as
651 described in the get_param() method.
652
653 Optional failobj is the object to return if there is no Content-Type
654 header. Optional header is the header to search instead of
655 Content-Type. If unquote is True, the value is unquoted.
656 """
657 missing = object()
658 params = self._get_params_preserve(missing, header)
659 if params is missing:
660 return failobj
661 if unquote:
662 return [(k, _unquotevalue(v)) for k, v in params]
663 else:
664 return params
665
666 def get_param(self, param, failobj=None, header='content-type',
667 unquote=True):
668 """Return the parameter value if found in the Content-Type header.
669
670 Optional failobj is the object to return if there is no Content-Type
671 header, or the Content-Type header has no such parameter. Optional
672 header is the header to search instead of Content-Type.
673
674 Parameter keys are always compared case insensitively. The return
675 value can either be a string, or a 3-tuple if the parameter was RFC
676 2231 encoded. When it's a 3-tuple, the elements of the value are of
677 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
678 LANGUAGE can be None, in which case you should consider VALUE to be
679 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400680 The parameter value (either the returned string, or the VALUE item in
681 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000682
R David Murray3ac8c782012-06-17 15:26:35 -0400683 If your application doesn't care whether the parameter was RFC 2231
684 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000685
R David Murray0de4d3e2013-11-03 12:23:23 -0500686 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400687 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000688
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000689 """
690 if header not in self:
691 return failobj
692 for k, v in self._get_params_preserve(failobj, header):
693 if k.lower() == param.lower():
694 if unquote:
695 return _unquotevalue(v)
696 else:
697 return v
698 return failobj
699
700 def set_param(self, param, value, header='Content-Type', requote=True,
R David Murray3da240f2013-10-16 22:48:40 -0400701 charset=None, language='', replace=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000702 """Set a parameter in the Content-Type header.
703
704 If the parameter already exists in the header, its value will be
705 replaced with the new value.
706
707 If header is Content-Type and has not yet been defined for this
708 message, it will be set to "text/plain" and the new parameter and
709 value will be appended as per RFC 2045.
710
711 An alternate header can specified in the header argument, and all
712 parameters will be quoted as necessary unless requote is False.
713
714 If charset is specified, the parameter will be encoded according to RFC
715 2231. Optional language specifies the RFC 2231 language, defaulting
716 to the empty string. Both charset and language should be strings.
717 """
718 if not isinstance(value, tuple) and charset:
719 value = (charset, language, value)
720
721 if header not in self and header.lower() == 'content-type':
722 ctype = 'text/plain'
723 else:
724 ctype = self.get(header)
725 if not self.get_param(param, header=header):
726 if not ctype:
727 ctype = _formatparam(param, value, requote)
728 else:
729 ctype = SEMISPACE.join(
730 [ctype, _formatparam(param, value, requote)])
731 else:
732 ctype = ''
733 for old_param, old_value in self.get_params(header=header,
734 unquote=requote):
735 append_param = ''
736 if old_param.lower() == param.lower():
737 append_param = _formatparam(param, value, requote)
738 else:
739 append_param = _formatparam(old_param, old_value, requote)
740 if not ctype:
741 ctype = append_param
742 else:
743 ctype = SEMISPACE.join([ctype, append_param])
744 if ctype != self.get(header):
R David Murray3da240f2013-10-16 22:48:40 -0400745 if replace:
746 self.replace_header(header, ctype)
747 else:
748 del self[header]
749 self[header] = ctype
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000750
751 def del_param(self, param, header='content-type', requote=True):
752 """Remove the given parameter completely from the Content-Type header.
753
754 The header will be re-written in place without the parameter or its
755 value. All values will be quoted as necessary unless requote is
756 False. Optional header specifies an alternative to the Content-Type
757 header.
758 """
759 if header not in self:
760 return
761 new_ctype = ''
762 for p, v in self.get_params(header=header, unquote=requote):
763 if p.lower() != param.lower():
764 if not new_ctype:
765 new_ctype = _formatparam(p, v, requote)
766 else:
767 new_ctype = SEMISPACE.join([new_ctype,
768 _formatparam(p, v, requote)])
769 if new_ctype != self.get(header):
770 del self[header]
771 self[header] = new_ctype
772
773 def set_type(self, type, header='Content-Type', requote=True):
774 """Set the main type and subtype for the Content-Type header.
775
776 type must be a string in the form "maintype/subtype", otherwise a
777 ValueError is raised.
778
779 This method replaces the Content-Type header, keeping all the
780 parameters in place. If requote is False, this leaves the existing
781 header's quoting as is. Otherwise, the parameters will be quoted (the
782 default).
783
784 An alternative header can be specified in the header argument. When
785 the Content-Type header is set, we'll always also add a MIME-Version
786 header.
787 """
788 # BAW: should we be strict?
789 if not type.count('/') == 1:
790 raise ValueError
791 # Set the Content-Type, you get a MIME-Version
792 if header.lower() == 'content-type':
793 del self['mime-version']
794 self['MIME-Version'] = '1.0'
795 if header not in self:
796 self[header] = type
797 return
798 params = self.get_params(header=header, unquote=requote)
799 del self[header]
800 self[header] = type
801 # Skip the first param; it's the old type.
802 for p, v in params[1:]:
803 self.set_param(p, v, header, requote)
804
805 def get_filename(self, failobj=None):
806 """Return the filename associated with the payload if present.
807
808 The filename is extracted from the Content-Disposition header's
809 `filename' parameter, and it is unquoted. If that header is missing
810 the `filename' parameter, this method falls back to looking for the
811 `name' parameter.
812 """
813 missing = object()
814 filename = self.get_param('filename', missing, 'content-disposition')
815 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000816 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000817 if filename is missing:
818 return failobj
819 return utils.collapse_rfc2231_value(filename).strip()
820
821 def get_boundary(self, failobj=None):
822 """Return the boundary associated with the payload if present.
823
824 The boundary is extracted from the Content-Type header's `boundary'
825 parameter, and it is unquoted.
826 """
827 missing = object()
828 boundary = self.get_param('boundary', missing)
829 if boundary is missing:
830 return failobj
831 # RFC 2046 says that boundaries may begin but not end in w/s
832 return utils.collapse_rfc2231_value(boundary).rstrip()
833
834 def set_boundary(self, boundary):
835 """Set the boundary parameter in Content-Type to 'boundary'.
836
837 This is subtly different than deleting the Content-Type header and
838 adding a new one with a new boundary parameter via add_header(). The
839 main difference is that using the set_boundary() method preserves the
840 order of the Content-Type header in the original message.
841
842 HeaderParseError is raised if the message has no Content-Type header.
843 """
844 missing = object()
845 params = self._get_params_preserve(missing, 'content-type')
846 if params is missing:
847 # There was no Content-Type header, and we don't know what type
848 # to set it to, so raise an exception.
849 raise errors.HeaderParseError('No Content-Type header found')
850 newparams = []
851 foundp = False
852 for pk, pv in params:
853 if pk.lower() == 'boundary':
854 newparams.append(('boundary', '"%s"' % boundary))
855 foundp = True
856 else:
857 newparams.append((pk, pv))
858 if not foundp:
859 # The original Content-Type header had no boundary attribute.
860 # Tack one on the end. BAW: should we raise an exception
861 # instead???
862 newparams.append(('boundary', '"%s"' % boundary))
863 # Replace the existing Content-Type header with the new value
864 newheaders = []
865 for h, v in self._headers:
866 if h.lower() == 'content-type':
867 parts = []
868 for k, v in newparams:
869 if v == '':
870 parts.append(k)
871 else:
872 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400873 val = SEMISPACE.join(parts)
874 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000875
876 else:
877 newheaders.append((h, v))
878 self._headers = newheaders
879
880 def get_content_charset(self, failobj=None):
881 """Return the charset parameter of the Content-Type header.
882
883 The returned string is always coerced to lower case. If there is no
884 Content-Type header, or if that header has no charset parameter,
885 failobj is returned.
886 """
887 missing = object()
888 charset = self.get_param('charset', missing)
889 if charset is missing:
890 return failobj
891 if isinstance(charset, tuple):
892 # RFC 2231 encoded, so decode it, and it better end up as ascii.
893 pcharset = charset[0] or 'us-ascii'
894 try:
895 # LookupError will be raised if the charset isn't known to
896 # Python. UnicodeError will be raised if the encoded text
897 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000898 as_bytes = charset[2].encode('raw-unicode-escape')
899 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000900 except (LookupError, UnicodeError):
901 charset = charset[2]
902 # charset characters must be in us-ascii range
903 try:
904 charset.encode('us-ascii')
905 except UnicodeError:
906 return failobj
907 # RFC 2046, $4.1.2 says charsets are not case sensitive
908 return charset.lower()
909
910 def get_charsets(self, failobj=None):
911 """Return a list containing the charset(s) used in this message.
912
913 The returned list of items describes the Content-Type headers'
914 charset parameter for this message and all the subparts in its
915 payload.
916
917 Each item will either be a string (the value of the charset parameter
918 in the Content-Type header of that part) or the value of the
919 'failobj' parameter (defaults to None), if the part does not have a
920 main MIME type of "text", or the charset is not defined.
921
922 The list will contain one string for each part of the message, plus
923 one for the container message (i.e. self), so that a non-multipart
924 message will still return a list of length 1.
925 """
926 return [part.get_content_charset(failobj) for part in self.walk()]
927
928 # I.e. def walk(self): ...
929 from email.iterators import walk
R David Murray3da240f2013-10-16 22:48:40 -0400930
931
932class MIMEPart(Message):
933
934 def __init__(self, policy=None):
935 if policy is None:
936 from email.policy import default
937 policy = default
938 Message.__init__(self, policy)
939
940 @property
941 def is_attachment(self):
942 c_d = self.get('content-disposition')
943 if c_d is None:
944 return False
945 return c_d.lower() == 'attachment'
946
947 def _find_body(self, part, preferencelist):
948 if part.is_attachment:
949 return
950 maintype, subtype = part.get_content_type().split('/')
951 if maintype == 'text':
952 if subtype in preferencelist:
953 yield (preferencelist.index(subtype), part)
954 return
955 if maintype != 'multipart':
956 return
957 if subtype != 'related':
958 for subpart in part.iter_parts():
959 yield from self._find_body(subpart, preferencelist)
960 return
961 if 'related' in preferencelist:
962 yield (preferencelist.index('related'), part)
963 candidate = None
964 start = part.get_param('start')
965 if start:
966 for subpart in part.iter_parts():
967 if subpart['content-id'] == start:
968 candidate = subpart
969 break
970 if candidate is None:
971 subparts = part.get_payload()
972 candidate = subparts[0] if subparts else None
973 if candidate is not None:
974 yield from self._find_body(candidate, preferencelist)
975
976 def get_body(self, preferencelist=('related', 'html', 'plain')):
977 """Return best candidate mime part for display as 'body' of message.
978
979 Do a depth first search, starting with self, looking for the first part
980 matching each of the items in preferencelist, and return the part
981 corresponding to the first item that has a match, or None if no items
982 have a match. If 'related' is not included in preferencelist, consider
983 the root part of any multipart/related encountered as a candidate
984 match. Ignore parts with 'Content-Disposition: attachment'.
985 """
986 best_prio = len(preferencelist)
987 body = None
988 for prio, part in self._find_body(self, preferencelist):
989 if prio < best_prio:
990 best_prio = prio
991 body = part
992 if prio == 0:
993 break
994 return body
995
996 _body_types = {('text', 'plain'),
997 ('text', 'html'),
998 ('multipart', 'related'),
999 ('multipart', 'alternative')}
1000 def iter_attachments(self):
1001 """Return an iterator over the non-main parts of a multipart.
1002
1003 Skip the first of each occurrence of text/plain, text/html,
1004 multipart/related, or multipart/alternative in the multipart (unless
1005 they have a 'Content-Disposition: attachment' header) and include all
1006 remaining subparts in the returned iterator. When applied to a
1007 multipart/related, return all parts except the root part. Return an
1008 empty iterator when applied to a multipart/alternative or a
1009 non-multipart.
1010 """
1011 maintype, subtype = self.get_content_type().split('/')
1012 if maintype != 'multipart' or subtype == 'alternative':
1013 return
1014 parts = self.get_payload()
1015 if maintype == 'multipart' and subtype == 'related':
1016 # For related, we treat everything but the root as an attachment.
1017 # The root may be indicated by 'start'; if there's no start or we
1018 # can't find the named start, treat the first subpart as the root.
1019 start = self.get_param('start')
1020 if start:
1021 found = False
1022 attachments = []
1023 for part in parts:
1024 if part.get('content-id') == start:
1025 found = True
1026 else:
1027 attachments.append(part)
1028 if found:
1029 yield from attachments
1030 return
1031 parts.pop(0)
1032 yield from parts
1033 return
1034 # Otherwise we more or less invert the remaining logic in get_body.
1035 # This only really works in edge cases (ex: non-text relateds or
1036 # alternatives) if the sending agent sets content-disposition.
1037 seen = [] # Only skip the first example of each candidate type.
1038 for part in parts:
1039 maintype, subtype = part.get_content_type().split('/')
1040 if ((maintype, subtype) in self._body_types and
1041 not part.is_attachment and subtype not in seen):
1042 seen.append(subtype)
1043 continue
1044 yield part
1045
1046 def iter_parts(self):
1047 """Return an iterator over all immediate subparts of a multipart.
1048
1049 Return an empty iterator for a non-multipart.
1050 """
1051 if self.get_content_maintype() == 'multipart':
1052 yield from self.get_payload()
1053
1054 def get_content(self, *args, content_manager=None, **kw):
1055 if content_manager is None:
1056 content_manager = self.policy.content_manager
1057 return content_manager.get_content(self, *args, **kw)
1058
1059 def set_content(self, *args, content_manager=None, **kw):
1060 if content_manager is None:
1061 content_manager = self.policy.content_manager
1062 content_manager.set_content(self, *args, **kw)
1063
1064 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1065 if self.get_content_maintype() == 'multipart':
1066 existing_subtype = self.get_content_subtype()
1067 disallowed_subtypes = disallowed_subtypes + (subtype,)
1068 if existing_subtype in disallowed_subtypes:
1069 raise ValueError("Cannot convert {} to {}".format(
1070 existing_subtype, subtype))
1071 keep_headers = []
1072 part_headers = []
1073 for name, value in self._headers:
1074 if name.lower().startswith('content-'):
1075 part_headers.append((name, value))
1076 else:
1077 keep_headers.append((name, value))
1078 if part_headers:
1079 # There is existing content, move it to the first subpart.
1080 part = type(self)(policy=self.policy)
1081 part._headers = part_headers
1082 part._payload = self._payload
1083 self._payload = [part]
1084 else:
1085 self._payload = []
1086 self._headers = keep_headers
1087 self['Content-Type'] = 'multipart/' + subtype
1088 if boundary is not None:
1089 self.set_param('boundary', boundary)
1090
1091 def make_related(self, boundary=None):
1092 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1093
1094 def make_alternative(self, boundary=None):
1095 self._make_multipart('alternative', ('mixed',), boundary)
1096
1097 def make_mixed(self, boundary=None):
1098 self._make_multipart('mixed', (), boundary)
1099
1100 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1101 if (self.get_content_maintype() != 'multipart' or
1102 self.get_content_subtype() != _subtype):
1103 getattr(self, 'make_' + _subtype)()
1104 part = type(self)(policy=self.policy)
1105 part.set_content(*args, **kw)
1106 if _disp and 'content-disposition' not in part:
1107 part['Content-Disposition'] = _disp
1108 self.attach(part)
1109
1110 def add_related(self, *args, **kw):
1111 self._add_multipart('related', *args, _disp='inline', **kw)
1112
1113 def add_alternative(self, *args, **kw):
1114 self._add_multipart('alternative', *args, **kw)
1115
1116 def add_attachment(self, *args, **kw):
1117 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1118
1119 def clear(self):
1120 self._headers = []
1121 self._payload = None
1122
1123 def clear_content(self):
1124 self._headers = [(n, v) for n, v in self._headers
1125 if not n.lower().startswith('content-')]
1126 self._payload = None
1127
1128
1129class EmailMessage(MIMEPart):
1130
1131 def set_content(self, *args, **kw):
1132 super().set_content(*args, **kw)
1133 if 'MIME-Version' not in self:
1134 self['MIME-Version'] = '1.0'