blob: 63b51f6007d16478cf3be51f9573544319bb28d0 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013from io import BytesIO, StringIO
14
15# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from email import utils
17from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040018from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000019from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040020from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000021Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000030def _splitparam(param):
31 # Split header parameters. BAW: this may be too simple. It isn't
32 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040033 # found in the wild. We may eventually need a full fledged parser.
34 # RDM: we might have a Header here; for now just stringify it.
35 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000036 if not sep:
37 return a.strip(), None
38 return a.strip(), b.strip()
39
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040def _formatparam(param, value=None, quote=True):
41 """Convenience function to format and return a key=value pair.
42
R. David Murray7ec754b2010-12-13 23:51:19 +000043 This will quote the value if needed or if quote is true. If value is a
44 three tuple (charset, language, value), it will be encoded according
45 to RFC2231 rules. If it contains non-ascii characters it will likewise
46 be encoded according to RFC2231 rules, using the utf-8 charset and
47 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048 """
49 if value is not None and len(value) > 0:
50 # A tuple is used for RFC 2231 encoded parameter values where items
51 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000052 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 if isinstance(value, tuple):
54 # Encode as per RFC 2231
55 param += '*'
56 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000057 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000058 else:
59 try:
60 value.encode('ascii')
61 except UnicodeEncodeError:
62 param += '*'
63 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000064 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 # BAW: Please check this. I think that if quote is set it should
66 # force quoting even if not necessary.
67 if quote or tspecials.search(value):
68 return '%s="%s"' % (param, utils.quote(value))
69 else:
70 return '%s=%s' % (param, value)
71 else:
72 return param
73
74def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040075 # RDM This might be a Header, so for now stringify it.
76 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 plist = []
78 while s[:1] == ';':
79 s = s[1:]
80 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000081 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 end = s.find(';', end + 1)
83 if end < 0:
84 end = len(s)
85 f = s[:end]
86 if '=' in f:
87 i = f.index('=')
88 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
89 plist.append(f.strip())
90 s = s[end:]
91 return plist
92
93
94def _unquotevalue(value):
95 # This is different than utils.collapse_rfc2231_value() because it doesn't
96 # try to convert the value to a unicode. Message.get_param() and
97 # Message.get_params() are both currently defined to return the tuple in
98 # the face of RFC 2231 parameters.
99 if isinstance(value, tuple):
100 return value[0], value[1], utils.unquote(value[2])
101 else:
102 return utils.unquote(value)
103
104
105
106class Message:
107 """Basic message object.
108
109 A message object is defined as something that has a bunch of RFC 2822
110 headers and a payload. It may optionally have an envelope header
111 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
112 multipart or a message/rfc822), then the payload is a list of Message
113 objects, otherwise it is a string.
114
115 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000116 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000117 do in fact appear multiple times (e.g. Received) and for those headers,
118 you must use the explicit API to set or get all the headers. Not all of
119 the mapping methods are implemented.
120 """
R David Murrayc27e5222012-05-25 15:01:48 -0400121 def __init__(self, policy=compat32):
122 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000123 self._headers = []
124 self._unixfrom = None
125 self._payload = None
126 self._charset = None
127 # Defaults for multipart messages
128 self.preamble = self.epilogue = None
129 self.defects = []
130 # Default content type
131 self._default_type = 'text/plain'
132
133 def __str__(self):
134 """Return the entire formatted message as a string.
135 This includes the headers, body, and envelope header.
136 """
137 return self.as_string()
138
139 def as_string(self, unixfrom=False, maxheaderlen=0):
140 """Return the entire formatted message as a string.
141 Optional `unixfrom' when True, means include the Unix From_ envelope
142 header.
143
144 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400145 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000146 Generator instance.
147 """
148 from email.generator import Generator
149 fp = StringIO()
150 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
151 g.flatten(self, unixfrom=unixfrom)
152 return fp.getvalue()
153
154 def is_multipart(self):
155 """Return True if the message consists of multiple parts."""
156 return isinstance(self._payload, list)
157
158 #
159 # Unix From_ line
160 #
161 def set_unixfrom(self, unixfrom):
162 self._unixfrom = unixfrom
163
164 def get_unixfrom(self):
165 return self._unixfrom
166
167 #
168 # Payload manipulation.
169 #
170 def attach(self, payload):
171 """Add the given payload to the current payload.
172
173 The current payload will always be a list of objects after this method
174 is called. If you want to set the payload to a scalar object, use
175 set_payload() instead.
176 """
177 if self._payload is None:
178 self._payload = [payload]
179 else:
180 self._payload.append(payload)
181
182 def get_payload(self, i=None, decode=False):
183 """Return a reference to the payload.
184
185 The payload will either be a list object or a string. If you mutate
186 the list object, you modify the message's payload in place. Optional
187 i returns that index into the payload.
188
189 Optional decode is a flag indicating whether the payload should be
190 decoded or not, according to the Content-Transfer-Encoding header
191 (default is False).
192
193 When True and the message is not a multipart, the payload will be
194 decoded if this header's value is `quoted-printable' or `base64'. If
195 some other encoding is used, or the header is missing, or if the
196 payload has bogus data (i.e. bogus base64 or uuencoded data), the
197 payload is returned as-is.
198
199 If the message is a multipart and the decode flag is True, then None
200 is returned.
201 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000202 # Here is the logic table for this code, based on the email5.0.0 code:
203 # i decode is_multipart result
204 # ------ ------ ------------ ------------------------------
205 # None True True None
206 # i True True None
207 # None False True _payload (a list)
208 # i False True _payload element i (a Message)
209 # i False False error (not a list)
210 # i True False error (not a list)
211 # None False False _payload
212 # None True False _payload decoded (bytes)
213 # Note that Barry planned to factor out the 'decode' case, but that
214 # isn't so easy now that we handle the 8 bit data, which needs to be
215 # converted in both the decode and non-decode path.
216 if self.is_multipart():
217 if decode:
218 return None
219 if i is None:
220 return self._payload
221 else:
222 return self._payload[i]
223 # For backward compatibility, Use isinstance and this error message
224 # instead of the more logical is_multipart test.
225 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000227 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400228 # cte might be a Header, so for now stringify it.
229 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400230 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000231 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400232 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000233 bpayload = payload.encode('ascii', 'surrogateescape')
234 if not decode:
235 try:
236 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
237 except LookupError:
238 payload = bpayload.decode('ascii', 'replace')
239 elif decode:
240 try:
241 bpayload = payload.encode('ascii')
242 except UnicodeError:
243 # This won't happen for RFC compliant messages (messages
244 # containing only ASCII codepoints in the unicode input).
245 # If it does happen, turn the string into bytes in a way
246 # guaranteed not to fail.
247 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248 if not decode:
249 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000251 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400253 # XXX: this is a bit of a hack; decode_b should probably be factored
254 # out somewhere, but I haven't figured out where yet.
255 value, defects = decode_b(b''.join(bpayload.splitlines()))
256 for defect in defects:
257 self.policy.handle_defect(self, defect)
258 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000260 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 out_file = BytesIO()
262 try:
263 uu.decode(in_file, out_file, quiet=True)
264 return out_file.getvalue()
265 except uu.Error:
266 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000267 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000268 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000270 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000271
272 def set_payload(self, payload, charset=None):
273 """Set the payload to the given value.
274
275 Optional charset sets the message's default character set. See
276 set_charset() for details.
277 """
R David Murrayd5c4c742013-12-11 16:34:34 -0500278 if hasattr(payload, 'encode'):
279 if charset is None:
280 # We should check for ASCII-only here, but we can't do that
281 # for backward compatibility reasons. Fixed in 3.4.
282 self._payload = payload
283 return
284 if not isinstance(charset, Charset):
285 charset = Charset(charset)
286 payload = payload.encode(charset.output_charset)
287 if hasattr(payload, 'decode'):
288 self._payload = payload.decode('ascii', 'surrogateescape')
289 else:
290 self._payload = payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000291 if charset is not None:
292 self.set_charset(charset)
293
294 def set_charset(self, charset):
295 """Set the charset of the payload to a given character set.
296
297 charset can be a Charset instance, a string naming a character set, or
298 None. If it is a string it will be converted to a Charset instance.
299 If charset is None, the charset parameter will be removed from the
300 Content-Type field. Anything else will generate a TypeError.
301
302 The message will be assumed to be of type text/* encoded with
303 charset.input_charset. It will be converted to charset.output_charset
304 and encoded properly, if needed, when generating the plain text
305 representation of the message. MIME headers (MIME-Version,
306 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000307 """
308 if charset is None:
309 self.del_param('charset')
310 self._charset = None
311 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000312 if not isinstance(charset, Charset):
313 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000314 self._charset = charset
315 if 'MIME-Version' not in self:
316 self.add_header('MIME-Version', '1.0')
317 if 'Content-Type' not in self:
318 self.add_header('Content-Type', 'text/plain',
319 charset=charset.get_output_charset())
320 else:
321 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000322 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000323 self._payload = charset.body_encode(self._payload)
324 if 'Content-Transfer-Encoding' not in self:
325 cte = charset.get_body_encoding()
326 try:
327 cte(self)
328 except TypeError:
R David Murrayd5c4c742013-12-11 16:34:34 -0500329 # This if is for backward compatibility and will be removed
330 # in 3.4 when the ascii check is added to set_payload.
331 payload = self._payload
332 if payload:
333 try:
334 payload = payload.encode('ascii', 'surrogateescape')
335 except UnicodeError:
336 payload = payload.encode(charset.output_charset)
337 self._payload = charset.body_encode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 self.add_header('Content-Transfer-Encoding', cte)
339
340 def get_charset(self):
341 """Return the Charset instance associated with the message's payload.
342 """
343 return self._charset
344
345 #
346 # MAPPING INTERFACE (partial)
347 #
348 def __len__(self):
349 """Return the total number of headers, including duplicates."""
350 return len(self._headers)
351
352 def __getitem__(self, name):
353 """Get a header value.
354
355 Return None if the header is missing instead of raising an exception.
356
357 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000358 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000359 the values matching a header field name.
360 """
361 return self.get(name)
362
363 def __setitem__(self, name, val):
364 """Set the value of a header.
365
366 Note: this does not overwrite an existing header with the same field
367 name. Use __delitem__() first to delete any existing headers.
368 """
R David Murrayabfc3742012-05-29 09:14:44 -0400369 max_count = self.policy.header_max_count(name)
370 if max_count:
371 lname = name.lower()
372 found = 0
373 for k, v in self._headers:
374 if k.lower() == lname:
375 found += 1
376 if found >= max_count:
377 raise ValueError("There may be at most {} {} headers "
378 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400379 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000380
381 def __delitem__(self, name):
382 """Delete all occurrences of a header, if present.
383
384 Does not raise an exception if the header is missing.
385 """
386 name = name.lower()
387 newheaders = []
388 for k, v in self._headers:
389 if k.lower() != name:
390 newheaders.append((k, v))
391 self._headers = newheaders
392
393 def __contains__(self, name):
394 return name.lower() in [k.lower() for k, v in self._headers]
395
396 def __iter__(self):
397 for field, value in self._headers:
398 yield field
399
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400 def keys(self):
401 """Return a list of all the message's header field names.
402
403 These will be sorted in the order they appeared in the original
404 message, or were added to the message, and may contain duplicates.
405 Any fields deleted and re-inserted are always appended to the header
406 list.
407 """
408 return [k for k, v in self._headers]
409
410 def values(self):
411 """Return a list of all the message's header values.
412
413 These will be sorted in the order they appeared in the original
414 message, or were added to the message, and may contain duplicates.
415 Any fields deleted and re-inserted are always appended to the header
416 list.
417 """
R David Murrayc27e5222012-05-25 15:01:48 -0400418 return [self.policy.header_fetch_parse(k, v)
419 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000420
421 def items(self):
422 """Get all the message's header fields and values.
423
424 These will be sorted in the order they appeared in the original
425 message, or were added to the message, and may contain duplicates.
426 Any fields deleted and re-inserted are always appended to the header
427 list.
428 """
R David Murrayc27e5222012-05-25 15:01:48 -0400429 return [(k, self.policy.header_fetch_parse(k, v))
430 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000431
432 def get(self, name, failobj=None):
433 """Get a header value.
434
435 Like __getitem__() but return failobj instead of None when the field
436 is missing.
437 """
438 name = name.lower()
439 for k, v in self._headers:
440 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400441 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000442 return failobj
443
444 #
R David Murrayc27e5222012-05-25 15:01:48 -0400445 # "Internal" methods (public API, but only intended for use by a parser
446 # or generator, not normal application code.
447 #
448
449 def set_raw(self, name, value):
450 """Store name and value in the model without modification.
451
452 This is an "internal" API, intended only for use by a parser.
453 """
454 self._headers.append((name, value))
455
456 def raw_items(self):
457 """Return the (name, value) header pairs without modification.
458
459 This is an "internal" API, intended only for use by a generator.
460 """
461 return iter(self._headers.copy())
462
463 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000464 # Additional useful stuff
465 #
466
467 def get_all(self, name, failobj=None):
468 """Return a list of all the values for the named field.
469
470 These will be sorted in the order they appeared in the original
471 message, and may contain duplicates. Any fields deleted and
472 re-inserted are always appended to the header list.
473
474 If no such fields exist, failobj is returned (defaults to None).
475 """
476 values = []
477 name = name.lower()
478 for k, v in self._headers:
479 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400480 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000481 if not values:
482 return failobj
483 return values
484
485 def add_header(self, _name, _value, **_params):
486 """Extended header setting.
487
488 name is the header field to add. keyword arguments can be used to set
489 additional parameters for the header field, with underscores converted
490 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000491 value is None, in which case only the key will be added. If a
492 parameter value contains non-ASCII characters it can be specified as a
493 three-tuple of (charset, language, value), in which case it will be
494 encoded according to RFC2231 rules. Otherwise it will be encoded using
495 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000496
R. David Murray7ec754b2010-12-13 23:51:19 +0000497 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000498
499 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000500 msg.add_header('content-disposition', 'attachment',
501 filename=('utf-8', '', Fußballer.ppt'))
502 msg.add_header('content-disposition', 'attachment',
503 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000504 """
505 parts = []
506 for k, v in _params.items():
507 if v is None:
508 parts.append(k.replace('_', '-'))
509 else:
510 parts.append(_formatparam(k.replace('_', '-'), v))
511 if _value is not None:
512 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400513 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000514
515 def replace_header(self, _name, _value):
516 """Replace a header.
517
518 Replace the first matching header found in the message, retaining
519 header order and case. If no matching header was found, a KeyError is
520 raised.
521 """
522 _name = _name.lower()
523 for i, (k, v) in zip(range(len(self._headers)), self._headers):
524 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400525 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000526 break
527 else:
528 raise KeyError(_name)
529
530 #
531 # Use these three methods instead of the three above.
532 #
533
534 def get_content_type(self):
535 """Return the message's content type.
536
537 The returned string is coerced to lower case of the form
538 `maintype/subtype'. If there was no Content-Type header in the
539 message, the default type as given by get_default_type() will be
540 returned. Since according to RFC 2045, messages always have a default
541 type this will always return a value.
542
543 RFC 2045 defines a message's default type to be text/plain unless it
544 appears inside a multipart/digest container, in which case it would be
545 message/rfc822.
546 """
547 missing = object()
548 value = self.get('content-type', missing)
549 if value is missing:
550 # This should have no parameters
551 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000552 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000553 # RFC 2045, section 5.2 says if its invalid, use text/plain
554 if ctype.count('/') != 1:
555 return 'text/plain'
556 return ctype
557
558 def get_content_maintype(self):
559 """Return the message's main content type.
560
561 This is the `maintype' part of the string returned by
562 get_content_type().
563 """
564 ctype = self.get_content_type()
565 return ctype.split('/')[0]
566
567 def get_content_subtype(self):
568 """Returns the message's sub-content type.
569
570 This is the `subtype' part of the string returned by
571 get_content_type().
572 """
573 ctype = self.get_content_type()
574 return ctype.split('/')[1]
575
576 def get_default_type(self):
577 """Return the `default' content type.
578
579 Most messages have a default content type of text/plain, except for
580 messages that are subparts of multipart/digest containers. Such
581 subparts have a default content type of message/rfc822.
582 """
583 return self._default_type
584
585 def set_default_type(self, ctype):
586 """Set the `default' content type.
587
588 ctype should be either "text/plain" or "message/rfc822", although this
589 is not enforced. The default content type is not stored in the
590 Content-Type header.
591 """
592 self._default_type = ctype
593
594 def _get_params_preserve(self, failobj, header):
595 # Like get_params() but preserves the quoting of values. BAW:
596 # should this be part of the public interface?
597 missing = object()
598 value = self.get(header, missing)
599 if value is missing:
600 return failobj
601 params = []
R David Murraya2150232011-03-16 21:11:23 -0400602 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000603 try:
604 name, val = p.split('=', 1)
605 name = name.strip()
606 val = val.strip()
607 except ValueError:
608 # Must have been a bare attribute
609 name = p.strip()
610 val = ''
611 params.append((name, val))
612 params = utils.decode_params(params)
613 return params
614
615 def get_params(self, failobj=None, header='content-type', unquote=True):
616 """Return the message's Content-Type parameters, as a list.
617
618 The elements of the returned list are 2-tuples of key/value pairs, as
619 split on the `=' sign. The left hand side of the `=' is the key,
620 while the right hand side is the value. If there is no `=' sign in
621 the parameter the value is the empty string. The value is as
622 described in the get_param() method.
623
624 Optional failobj is the object to return if there is no Content-Type
625 header. Optional header is the header to search instead of
626 Content-Type. If unquote is True, the value is unquoted.
627 """
628 missing = object()
629 params = self._get_params_preserve(missing, header)
630 if params is missing:
631 return failobj
632 if unquote:
633 return [(k, _unquotevalue(v)) for k, v in params]
634 else:
635 return params
636
637 def get_param(self, param, failobj=None, header='content-type',
638 unquote=True):
639 """Return the parameter value if found in the Content-Type header.
640
641 Optional failobj is the object to return if there is no Content-Type
642 header, or the Content-Type header has no such parameter. Optional
643 header is the header to search instead of Content-Type.
644
645 Parameter keys are always compared case insensitively. The return
646 value can either be a string, or a 3-tuple if the parameter was RFC
647 2231 encoded. When it's a 3-tuple, the elements of the value are of
648 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
649 LANGUAGE can be None, in which case you should consider VALUE to be
650 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400651 The parameter value (either the returned string, or the VALUE item in
652 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000653
R David Murray3ac8c782012-06-17 15:26:35 -0400654 If your application doesn't care whether the parameter was RFC 2231
655 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656
R David Murray0de4d3e2013-11-03 12:23:23 -0500657 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400658 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000660 """
661 if header not in self:
662 return failobj
663 for k, v in self._get_params_preserve(failobj, header):
664 if k.lower() == param.lower():
665 if unquote:
666 return _unquotevalue(v)
667 else:
668 return v
669 return failobj
670
671 def set_param(self, param, value, header='Content-Type', requote=True,
672 charset=None, language=''):
673 """Set a parameter in the Content-Type header.
674
675 If the parameter already exists in the header, its value will be
676 replaced with the new value.
677
678 If header is Content-Type and has not yet been defined for this
679 message, it will be set to "text/plain" and the new parameter and
680 value will be appended as per RFC 2045.
681
682 An alternate header can specified in the header argument, and all
683 parameters will be quoted as necessary unless requote is False.
684
685 If charset is specified, the parameter will be encoded according to RFC
686 2231. Optional language specifies the RFC 2231 language, defaulting
687 to the empty string. Both charset and language should be strings.
688 """
689 if not isinstance(value, tuple) and charset:
690 value = (charset, language, value)
691
692 if header not in self and header.lower() == 'content-type':
693 ctype = 'text/plain'
694 else:
695 ctype = self.get(header)
696 if not self.get_param(param, header=header):
697 if not ctype:
698 ctype = _formatparam(param, value, requote)
699 else:
700 ctype = SEMISPACE.join(
701 [ctype, _formatparam(param, value, requote)])
702 else:
703 ctype = ''
704 for old_param, old_value in self.get_params(header=header,
705 unquote=requote):
706 append_param = ''
707 if old_param.lower() == param.lower():
708 append_param = _formatparam(param, value, requote)
709 else:
710 append_param = _formatparam(old_param, old_value, requote)
711 if not ctype:
712 ctype = append_param
713 else:
714 ctype = SEMISPACE.join([ctype, append_param])
715 if ctype != self.get(header):
716 del self[header]
717 self[header] = ctype
718
719 def del_param(self, param, header='content-type', requote=True):
720 """Remove the given parameter completely from the Content-Type header.
721
722 The header will be re-written in place without the parameter or its
723 value. All values will be quoted as necessary unless requote is
724 False. Optional header specifies an alternative to the Content-Type
725 header.
726 """
727 if header not in self:
728 return
729 new_ctype = ''
730 for p, v in self.get_params(header=header, unquote=requote):
731 if p.lower() != param.lower():
732 if not new_ctype:
733 new_ctype = _formatparam(p, v, requote)
734 else:
735 new_ctype = SEMISPACE.join([new_ctype,
736 _formatparam(p, v, requote)])
737 if new_ctype != self.get(header):
738 del self[header]
739 self[header] = new_ctype
740
741 def set_type(self, type, header='Content-Type', requote=True):
742 """Set the main type and subtype for the Content-Type header.
743
744 type must be a string in the form "maintype/subtype", otherwise a
745 ValueError is raised.
746
747 This method replaces the Content-Type header, keeping all the
748 parameters in place. If requote is False, this leaves the existing
749 header's quoting as is. Otherwise, the parameters will be quoted (the
750 default).
751
752 An alternative header can be specified in the header argument. When
753 the Content-Type header is set, we'll always also add a MIME-Version
754 header.
755 """
756 # BAW: should we be strict?
757 if not type.count('/') == 1:
758 raise ValueError
759 # Set the Content-Type, you get a MIME-Version
760 if header.lower() == 'content-type':
761 del self['mime-version']
762 self['MIME-Version'] = '1.0'
763 if header not in self:
764 self[header] = type
765 return
766 params = self.get_params(header=header, unquote=requote)
767 del self[header]
768 self[header] = type
769 # Skip the first param; it's the old type.
770 for p, v in params[1:]:
771 self.set_param(p, v, header, requote)
772
773 def get_filename(self, failobj=None):
774 """Return the filename associated with the payload if present.
775
776 The filename is extracted from the Content-Disposition header's
777 `filename' parameter, and it is unquoted. If that header is missing
778 the `filename' parameter, this method falls back to looking for the
779 `name' parameter.
780 """
781 missing = object()
782 filename = self.get_param('filename', missing, 'content-disposition')
783 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000784 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000785 if filename is missing:
786 return failobj
787 return utils.collapse_rfc2231_value(filename).strip()
788
789 def get_boundary(self, failobj=None):
790 """Return the boundary associated with the payload if present.
791
792 The boundary is extracted from the Content-Type header's `boundary'
793 parameter, and it is unquoted.
794 """
795 missing = object()
796 boundary = self.get_param('boundary', missing)
797 if boundary is missing:
798 return failobj
799 # RFC 2046 says that boundaries may begin but not end in w/s
800 return utils.collapse_rfc2231_value(boundary).rstrip()
801
802 def set_boundary(self, boundary):
803 """Set the boundary parameter in Content-Type to 'boundary'.
804
805 This is subtly different than deleting the Content-Type header and
806 adding a new one with a new boundary parameter via add_header(). The
807 main difference is that using the set_boundary() method preserves the
808 order of the Content-Type header in the original message.
809
810 HeaderParseError is raised if the message has no Content-Type header.
811 """
812 missing = object()
813 params = self._get_params_preserve(missing, 'content-type')
814 if params is missing:
815 # There was no Content-Type header, and we don't know what type
816 # to set it to, so raise an exception.
817 raise errors.HeaderParseError('No Content-Type header found')
818 newparams = []
819 foundp = False
820 for pk, pv in params:
821 if pk.lower() == 'boundary':
822 newparams.append(('boundary', '"%s"' % boundary))
823 foundp = True
824 else:
825 newparams.append((pk, pv))
826 if not foundp:
827 # The original Content-Type header had no boundary attribute.
828 # Tack one on the end. BAW: should we raise an exception
829 # instead???
830 newparams.append(('boundary', '"%s"' % boundary))
831 # Replace the existing Content-Type header with the new value
832 newheaders = []
833 for h, v in self._headers:
834 if h.lower() == 'content-type':
835 parts = []
836 for k, v in newparams:
837 if v == '':
838 parts.append(k)
839 else:
840 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400841 val = SEMISPACE.join(parts)
842 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000843
844 else:
845 newheaders.append((h, v))
846 self._headers = newheaders
847
848 def get_content_charset(self, failobj=None):
849 """Return the charset parameter of the Content-Type header.
850
851 The returned string is always coerced to lower case. If there is no
852 Content-Type header, or if that header has no charset parameter,
853 failobj is returned.
854 """
855 missing = object()
856 charset = self.get_param('charset', missing)
857 if charset is missing:
858 return failobj
859 if isinstance(charset, tuple):
860 # RFC 2231 encoded, so decode it, and it better end up as ascii.
861 pcharset = charset[0] or 'us-ascii'
862 try:
863 # LookupError will be raised if the charset isn't known to
864 # Python. UnicodeError will be raised if the encoded text
865 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000866 as_bytes = charset[2].encode('raw-unicode-escape')
867 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000868 except (LookupError, UnicodeError):
869 charset = charset[2]
870 # charset characters must be in us-ascii range
871 try:
872 charset.encode('us-ascii')
873 except UnicodeError:
874 return failobj
875 # RFC 2046, $4.1.2 says charsets are not case sensitive
876 return charset.lower()
877
878 def get_charsets(self, failobj=None):
879 """Return a list containing the charset(s) used in this message.
880
881 The returned list of items describes the Content-Type headers'
882 charset parameter for this message and all the subparts in its
883 payload.
884
885 Each item will either be a string (the value of the charset parameter
886 in the Content-Type header of that part) or the value of the
887 'failobj' parameter (defaults to None), if the part does not have a
888 main MIME type of "text", or the charset is not defined.
889
890 The list will contain one string for each part of the message, plus
891 one for the container message (i.e. self), so that a non-multipart
892 message will still return a list of length 1.
893 """
894 return [part.get_content_charset(failobj) for part in self.walk()]
895
896 # I.e. def walk(self): ...
897 from email.iterators import walk