blob: f43a3809beca9a461d21fb1046be1154e14c2d59 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013from io import BytesIO, StringIO
14
15# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from email import utils
17from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040018from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000019from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040020from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000021Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000030def _splitparam(param):
31 # Split header parameters. BAW: this may be too simple. It isn't
32 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040033 # found in the wild. We may eventually need a full fledged parser.
34 # RDM: we might have a Header here; for now just stringify it.
35 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000036 if not sep:
37 return a.strip(), None
38 return a.strip(), b.strip()
39
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040def _formatparam(param, value=None, quote=True):
41 """Convenience function to format and return a key=value pair.
42
R. David Murray7ec754b2010-12-13 23:51:19 +000043 This will quote the value if needed or if quote is true. If value is a
44 three tuple (charset, language, value), it will be encoded according
45 to RFC2231 rules. If it contains non-ascii characters it will likewise
46 be encoded according to RFC2231 rules, using the utf-8 charset and
47 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048 """
49 if value is not None and len(value) > 0:
50 # A tuple is used for RFC 2231 encoded parameter values where items
51 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000052 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 if isinstance(value, tuple):
54 # Encode as per RFC 2231
55 param += '*'
56 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000057 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000058 else:
59 try:
60 value.encode('ascii')
61 except UnicodeEncodeError:
62 param += '*'
63 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000064 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 # BAW: Please check this. I think that if quote is set it should
66 # force quoting even if not necessary.
67 if quote or tspecials.search(value):
68 return '%s="%s"' % (param, utils.quote(value))
69 else:
70 return '%s=%s' % (param, value)
71 else:
72 return param
73
74def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040075 # RDM This might be a Header, so for now stringify it.
76 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 plist = []
78 while s[:1] == ';':
79 s = s[1:]
80 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000081 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 end = s.find(';', end + 1)
83 if end < 0:
84 end = len(s)
85 f = s[:end]
86 if '=' in f:
87 i = f.index('=')
88 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
89 plist.append(f.strip())
90 s = s[end:]
91 return plist
92
93
94def _unquotevalue(value):
95 # This is different than utils.collapse_rfc2231_value() because it doesn't
96 # try to convert the value to a unicode. Message.get_param() and
97 # Message.get_params() are both currently defined to return the tuple in
98 # the face of RFC 2231 parameters.
99 if isinstance(value, tuple):
100 return value[0], value[1], utils.unquote(value[2])
101 else:
102 return utils.unquote(value)
103
104
105
106class Message:
107 """Basic message object.
108
109 A message object is defined as something that has a bunch of RFC 2822
110 headers and a payload. It may optionally have an envelope header
111 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
112 multipart or a message/rfc822), then the payload is a list of Message
113 objects, otherwise it is a string.
114
115 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000116 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000117 do in fact appear multiple times (e.g. Received) and for those headers,
118 you must use the explicit API to set or get all the headers. Not all of
119 the mapping methods are implemented.
120 """
R David Murrayc27e5222012-05-25 15:01:48 -0400121 def __init__(self, policy=compat32):
122 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000123 self._headers = []
124 self._unixfrom = None
125 self._payload = None
126 self._charset = None
127 # Defaults for multipart messages
128 self.preamble = self.epilogue = None
129 self.defects = []
130 # Default content type
131 self._default_type = 'text/plain'
132
133 def __str__(self):
134 """Return the entire formatted message as a string.
135 This includes the headers, body, and envelope header.
136 """
137 return self.as_string()
138
139 def as_string(self, unixfrom=False, maxheaderlen=0):
140 """Return the entire formatted message as a string.
141 Optional `unixfrom' when True, means include the Unix From_ envelope
142 header.
143
144 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400145 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000146 Generator instance.
147 """
148 from email.generator import Generator
149 fp = StringIO()
150 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
151 g.flatten(self, unixfrom=unixfrom)
152 return fp.getvalue()
153
154 def is_multipart(self):
155 """Return True if the message consists of multiple parts."""
156 return isinstance(self._payload, list)
157
158 #
159 # Unix From_ line
160 #
161 def set_unixfrom(self, unixfrom):
162 self._unixfrom = unixfrom
163
164 def get_unixfrom(self):
165 return self._unixfrom
166
167 #
168 # Payload manipulation.
169 #
170 def attach(self, payload):
171 """Add the given payload to the current payload.
172
173 The current payload will always be a list of objects after this method
174 is called. If you want to set the payload to a scalar object, use
175 set_payload() instead.
176 """
177 if self._payload is None:
178 self._payload = [payload]
179 else:
180 self._payload.append(payload)
181
182 def get_payload(self, i=None, decode=False):
183 """Return a reference to the payload.
184
185 The payload will either be a list object or a string. If you mutate
186 the list object, you modify the message's payload in place. Optional
187 i returns that index into the payload.
188
189 Optional decode is a flag indicating whether the payload should be
190 decoded or not, according to the Content-Transfer-Encoding header
191 (default is False).
192
193 When True and the message is not a multipart, the payload will be
194 decoded if this header's value is `quoted-printable' or `base64'. If
195 some other encoding is used, or the header is missing, or if the
196 payload has bogus data (i.e. bogus base64 or uuencoded data), the
197 payload is returned as-is.
198
199 If the message is a multipart and the decode flag is True, then None
200 is returned.
201 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000202 # Here is the logic table for this code, based on the email5.0.0 code:
203 # i decode is_multipart result
204 # ------ ------ ------------ ------------------------------
205 # None True True None
206 # i True True None
207 # None False True _payload (a list)
208 # i False True _payload element i (a Message)
209 # i False False error (not a list)
210 # i True False error (not a list)
211 # None False False _payload
212 # None True False _payload decoded (bytes)
213 # Note that Barry planned to factor out the 'decode' case, but that
214 # isn't so easy now that we handle the 8 bit data, which needs to be
215 # converted in both the decode and non-decode path.
216 if self.is_multipart():
217 if decode:
218 return None
219 if i is None:
220 return self._payload
221 else:
222 return self._payload[i]
223 # For backward compatibility, Use isinstance and this error message
224 # instead of the more logical is_multipart test.
225 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000227 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400228 # cte might be a Header, so for now stringify it.
229 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400230 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000231 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400232 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000233 bpayload = payload.encode('ascii', 'surrogateescape')
234 if not decode:
235 try:
236 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
237 except LookupError:
238 payload = bpayload.decode('ascii', 'replace')
239 elif decode:
240 try:
241 bpayload = payload.encode('ascii')
242 except UnicodeError:
243 # This won't happen for RFC compliant messages (messages
244 # containing only ASCII codepoints in the unicode input).
245 # If it does happen, turn the string into bytes in a way
246 # guaranteed not to fail.
247 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248 if not decode:
249 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000251 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400253 # XXX: this is a bit of a hack; decode_b should probably be factored
254 # out somewhere, but I haven't figured out where yet.
255 value, defects = decode_b(b''.join(bpayload.splitlines()))
256 for defect in defects:
257 self.policy.handle_defect(self, defect)
258 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000260 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 out_file = BytesIO()
262 try:
263 uu.decode(in_file, out_file, quiet=True)
264 return out_file.getvalue()
265 except uu.Error:
266 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000267 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000268 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000270 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000271
272 def set_payload(self, payload, charset=None):
273 """Set the payload to the given value.
274
275 Optional charset sets the message's default character set. See
276 set_charset() for details.
277 """
R David Murray00ae4352013-08-21 21:10:31 -0400278 if isinstance(payload, bytes):
279 payload = payload.decode('ascii', 'surrogateescape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000280 self._payload = payload
281 if charset is not None:
282 self.set_charset(charset)
283
284 def set_charset(self, charset):
285 """Set the charset of the payload to a given character set.
286
287 charset can be a Charset instance, a string naming a character set, or
288 None. If it is a string it will be converted to a Charset instance.
289 If charset is None, the charset parameter will be removed from the
290 Content-Type field. Anything else will generate a TypeError.
291
292 The message will be assumed to be of type text/* encoded with
293 charset.input_charset. It will be converted to charset.output_charset
294 and encoded properly, if needed, when generating the plain text
295 representation of the message. MIME headers (MIME-Version,
296 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 """
298 if charset is None:
299 self.del_param('charset')
300 self._charset = None
301 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000302 if not isinstance(charset, Charset):
303 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000304 self._charset = charset
305 if 'MIME-Version' not in self:
306 self.add_header('MIME-Version', '1.0')
307 if 'Content-Type' not in self:
308 self.add_header('Content-Type', 'text/plain',
309 charset=charset.get_output_charset())
310 else:
311 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000312 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000313 self._payload = charset.body_encode(self._payload)
314 if 'Content-Transfer-Encoding' not in self:
315 cte = charset.get_body_encoding()
316 try:
317 cte(self)
318 except TypeError:
319 self._payload = charset.body_encode(self._payload)
320 self.add_header('Content-Transfer-Encoding', cte)
321
322 def get_charset(self):
323 """Return the Charset instance associated with the message's payload.
324 """
325 return self._charset
326
327 #
328 # MAPPING INTERFACE (partial)
329 #
330 def __len__(self):
331 """Return the total number of headers, including duplicates."""
332 return len(self._headers)
333
334 def __getitem__(self, name):
335 """Get a header value.
336
337 Return None if the header is missing instead of raising an exception.
338
339 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000340 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 the values matching a header field name.
342 """
343 return self.get(name)
344
345 def __setitem__(self, name, val):
346 """Set the value of a header.
347
348 Note: this does not overwrite an existing header with the same field
349 name. Use __delitem__() first to delete any existing headers.
350 """
R David Murrayabfc3742012-05-29 09:14:44 -0400351 max_count = self.policy.header_max_count(name)
352 if max_count:
353 lname = name.lower()
354 found = 0
355 for k, v in self._headers:
356 if k.lower() == lname:
357 found += 1
358 if found >= max_count:
359 raise ValueError("There may be at most {} {} headers "
360 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400361 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000362
363 def __delitem__(self, name):
364 """Delete all occurrences of a header, if present.
365
366 Does not raise an exception if the header is missing.
367 """
368 name = name.lower()
369 newheaders = []
370 for k, v in self._headers:
371 if k.lower() != name:
372 newheaders.append((k, v))
373 self._headers = newheaders
374
375 def __contains__(self, name):
376 return name.lower() in [k.lower() for k, v in self._headers]
377
378 def __iter__(self):
379 for field, value in self._headers:
380 yield field
381
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000382 def keys(self):
383 """Return a list of all the message's header field names.
384
385 These will be sorted in the order they appeared in the original
386 message, or were added to the message, and may contain duplicates.
387 Any fields deleted and re-inserted are always appended to the header
388 list.
389 """
390 return [k for k, v in self._headers]
391
392 def values(self):
393 """Return a list of all the message's header values.
394
395 These will be sorted in the order they appeared in the original
396 message, or were added to the message, and may contain duplicates.
397 Any fields deleted and re-inserted are always appended to the header
398 list.
399 """
R David Murrayc27e5222012-05-25 15:01:48 -0400400 return [self.policy.header_fetch_parse(k, v)
401 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000402
403 def items(self):
404 """Get all the message's header fields and values.
405
406 These will be sorted in the order they appeared in the original
407 message, or were added to the message, and may contain duplicates.
408 Any fields deleted and re-inserted are always appended to the header
409 list.
410 """
R David Murrayc27e5222012-05-25 15:01:48 -0400411 return [(k, self.policy.header_fetch_parse(k, v))
412 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000413
414 def get(self, name, failobj=None):
415 """Get a header value.
416
417 Like __getitem__() but return failobj instead of None when the field
418 is missing.
419 """
420 name = name.lower()
421 for k, v in self._headers:
422 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400423 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000424 return failobj
425
426 #
R David Murrayc27e5222012-05-25 15:01:48 -0400427 # "Internal" methods (public API, but only intended for use by a parser
428 # or generator, not normal application code.
429 #
430
431 def set_raw(self, name, value):
432 """Store name and value in the model without modification.
433
434 This is an "internal" API, intended only for use by a parser.
435 """
436 self._headers.append((name, value))
437
438 def raw_items(self):
439 """Return the (name, value) header pairs without modification.
440
441 This is an "internal" API, intended only for use by a generator.
442 """
443 return iter(self._headers.copy())
444
445 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000446 # Additional useful stuff
447 #
448
449 def get_all(self, name, failobj=None):
450 """Return a list of all the values for the named field.
451
452 These will be sorted in the order they appeared in the original
453 message, and may contain duplicates. Any fields deleted and
454 re-inserted are always appended to the header list.
455
456 If no such fields exist, failobj is returned (defaults to None).
457 """
458 values = []
459 name = name.lower()
460 for k, v in self._headers:
461 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400462 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000463 if not values:
464 return failobj
465 return values
466
467 def add_header(self, _name, _value, **_params):
468 """Extended header setting.
469
470 name is the header field to add. keyword arguments can be used to set
471 additional parameters for the header field, with underscores converted
472 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000473 value is None, in which case only the key will be added. If a
474 parameter value contains non-ASCII characters it can be specified as a
475 three-tuple of (charset, language, value), in which case it will be
476 encoded according to RFC2231 rules. Otherwise it will be encoded using
477 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000478
R. David Murray7ec754b2010-12-13 23:51:19 +0000479 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000480
481 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000482 msg.add_header('content-disposition', 'attachment',
483 filename=('utf-8', '', Fußballer.ppt'))
484 msg.add_header('content-disposition', 'attachment',
485 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000486 """
487 parts = []
488 for k, v in _params.items():
489 if v is None:
490 parts.append(k.replace('_', '-'))
491 else:
492 parts.append(_formatparam(k.replace('_', '-'), v))
493 if _value is not None:
494 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400495 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000496
497 def replace_header(self, _name, _value):
498 """Replace a header.
499
500 Replace the first matching header found in the message, retaining
501 header order and case. If no matching header was found, a KeyError is
502 raised.
503 """
504 _name = _name.lower()
505 for i, (k, v) in zip(range(len(self._headers)), self._headers):
506 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400507 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000508 break
509 else:
510 raise KeyError(_name)
511
512 #
513 # Use these three methods instead of the three above.
514 #
515
516 def get_content_type(self):
517 """Return the message's content type.
518
519 The returned string is coerced to lower case of the form
520 `maintype/subtype'. If there was no Content-Type header in the
521 message, the default type as given by get_default_type() will be
522 returned. Since according to RFC 2045, messages always have a default
523 type this will always return a value.
524
525 RFC 2045 defines a message's default type to be text/plain unless it
526 appears inside a multipart/digest container, in which case it would be
527 message/rfc822.
528 """
529 missing = object()
530 value = self.get('content-type', missing)
531 if value is missing:
532 # This should have no parameters
533 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000534 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000535 # RFC 2045, section 5.2 says if its invalid, use text/plain
536 if ctype.count('/') != 1:
537 return 'text/plain'
538 return ctype
539
540 def get_content_maintype(self):
541 """Return the message's main content type.
542
543 This is the `maintype' part of the string returned by
544 get_content_type().
545 """
546 ctype = self.get_content_type()
547 return ctype.split('/')[0]
548
549 def get_content_subtype(self):
550 """Returns the message's sub-content type.
551
552 This is the `subtype' part of the string returned by
553 get_content_type().
554 """
555 ctype = self.get_content_type()
556 return ctype.split('/')[1]
557
558 def get_default_type(self):
559 """Return the `default' content type.
560
561 Most messages have a default content type of text/plain, except for
562 messages that are subparts of multipart/digest containers. Such
563 subparts have a default content type of message/rfc822.
564 """
565 return self._default_type
566
567 def set_default_type(self, ctype):
568 """Set the `default' content type.
569
570 ctype should be either "text/plain" or "message/rfc822", although this
571 is not enforced. The default content type is not stored in the
572 Content-Type header.
573 """
574 self._default_type = ctype
575
576 def _get_params_preserve(self, failobj, header):
577 # Like get_params() but preserves the quoting of values. BAW:
578 # should this be part of the public interface?
579 missing = object()
580 value = self.get(header, missing)
581 if value is missing:
582 return failobj
583 params = []
R David Murraya2150232011-03-16 21:11:23 -0400584 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000585 try:
586 name, val = p.split('=', 1)
587 name = name.strip()
588 val = val.strip()
589 except ValueError:
590 # Must have been a bare attribute
591 name = p.strip()
592 val = ''
593 params.append((name, val))
594 params = utils.decode_params(params)
595 return params
596
597 def get_params(self, failobj=None, header='content-type', unquote=True):
598 """Return the message's Content-Type parameters, as a list.
599
600 The elements of the returned list are 2-tuples of key/value pairs, as
601 split on the `=' sign. The left hand side of the `=' is the key,
602 while the right hand side is the value. If there is no `=' sign in
603 the parameter the value is the empty string. The value is as
604 described in the get_param() method.
605
606 Optional failobj is the object to return if there is no Content-Type
607 header. Optional header is the header to search instead of
608 Content-Type. If unquote is True, the value is unquoted.
609 """
610 missing = object()
611 params = self._get_params_preserve(missing, header)
612 if params is missing:
613 return failobj
614 if unquote:
615 return [(k, _unquotevalue(v)) for k, v in params]
616 else:
617 return params
618
619 def get_param(self, param, failobj=None, header='content-type',
620 unquote=True):
621 """Return the parameter value if found in the Content-Type header.
622
623 Optional failobj is the object to return if there is no Content-Type
624 header, or the Content-Type header has no such parameter. Optional
625 header is the header to search instead of Content-Type.
626
627 Parameter keys are always compared case insensitively. The return
628 value can either be a string, or a 3-tuple if the parameter was RFC
629 2231 encoded. When it's a 3-tuple, the elements of the value are of
630 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
631 LANGUAGE can be None, in which case you should consider VALUE to be
632 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400633 The parameter value (either the returned string, or the VALUE item in
634 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635
R David Murray3ac8c782012-06-17 15:26:35 -0400636 If your application doesn't care whether the parameter was RFC 2231
637 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000638
R David Murray0de4d3e2013-11-03 12:23:23 -0500639 rawparam = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400640 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000641
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000642 """
643 if header not in self:
644 return failobj
645 for k, v in self._get_params_preserve(failobj, header):
646 if k.lower() == param.lower():
647 if unquote:
648 return _unquotevalue(v)
649 else:
650 return v
651 return failobj
652
653 def set_param(self, param, value, header='Content-Type', requote=True,
654 charset=None, language=''):
655 """Set a parameter in the Content-Type header.
656
657 If the parameter already exists in the header, its value will be
658 replaced with the new value.
659
660 If header is Content-Type and has not yet been defined for this
661 message, it will be set to "text/plain" and the new parameter and
662 value will be appended as per RFC 2045.
663
664 An alternate header can specified in the header argument, and all
665 parameters will be quoted as necessary unless requote is False.
666
667 If charset is specified, the parameter will be encoded according to RFC
668 2231. Optional language specifies the RFC 2231 language, defaulting
669 to the empty string. Both charset and language should be strings.
670 """
671 if not isinstance(value, tuple) and charset:
672 value = (charset, language, value)
673
674 if header not in self and header.lower() == 'content-type':
675 ctype = 'text/plain'
676 else:
677 ctype = self.get(header)
678 if not self.get_param(param, header=header):
679 if not ctype:
680 ctype = _formatparam(param, value, requote)
681 else:
682 ctype = SEMISPACE.join(
683 [ctype, _formatparam(param, value, requote)])
684 else:
685 ctype = ''
686 for old_param, old_value in self.get_params(header=header,
687 unquote=requote):
688 append_param = ''
689 if old_param.lower() == param.lower():
690 append_param = _formatparam(param, value, requote)
691 else:
692 append_param = _formatparam(old_param, old_value, requote)
693 if not ctype:
694 ctype = append_param
695 else:
696 ctype = SEMISPACE.join([ctype, append_param])
697 if ctype != self.get(header):
698 del self[header]
699 self[header] = ctype
700
701 def del_param(self, param, header='content-type', requote=True):
702 """Remove the given parameter completely from the Content-Type header.
703
704 The header will be re-written in place without the parameter or its
705 value. All values will be quoted as necessary unless requote is
706 False. Optional header specifies an alternative to the Content-Type
707 header.
708 """
709 if header not in self:
710 return
711 new_ctype = ''
712 for p, v in self.get_params(header=header, unquote=requote):
713 if p.lower() != param.lower():
714 if not new_ctype:
715 new_ctype = _formatparam(p, v, requote)
716 else:
717 new_ctype = SEMISPACE.join([new_ctype,
718 _formatparam(p, v, requote)])
719 if new_ctype != self.get(header):
720 del self[header]
721 self[header] = new_ctype
722
723 def set_type(self, type, header='Content-Type', requote=True):
724 """Set the main type and subtype for the Content-Type header.
725
726 type must be a string in the form "maintype/subtype", otherwise a
727 ValueError is raised.
728
729 This method replaces the Content-Type header, keeping all the
730 parameters in place. If requote is False, this leaves the existing
731 header's quoting as is. Otherwise, the parameters will be quoted (the
732 default).
733
734 An alternative header can be specified in the header argument. When
735 the Content-Type header is set, we'll always also add a MIME-Version
736 header.
737 """
738 # BAW: should we be strict?
739 if not type.count('/') == 1:
740 raise ValueError
741 # Set the Content-Type, you get a MIME-Version
742 if header.lower() == 'content-type':
743 del self['mime-version']
744 self['MIME-Version'] = '1.0'
745 if header not in self:
746 self[header] = type
747 return
748 params = self.get_params(header=header, unquote=requote)
749 del self[header]
750 self[header] = type
751 # Skip the first param; it's the old type.
752 for p, v in params[1:]:
753 self.set_param(p, v, header, requote)
754
755 def get_filename(self, failobj=None):
756 """Return the filename associated with the payload if present.
757
758 The filename is extracted from the Content-Disposition header's
759 `filename' parameter, and it is unquoted. If that header is missing
760 the `filename' parameter, this method falls back to looking for the
761 `name' parameter.
762 """
763 missing = object()
764 filename = self.get_param('filename', missing, 'content-disposition')
765 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000766 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000767 if filename is missing:
768 return failobj
769 return utils.collapse_rfc2231_value(filename).strip()
770
771 def get_boundary(self, failobj=None):
772 """Return the boundary associated with the payload if present.
773
774 The boundary is extracted from the Content-Type header's `boundary'
775 parameter, and it is unquoted.
776 """
777 missing = object()
778 boundary = self.get_param('boundary', missing)
779 if boundary is missing:
780 return failobj
781 # RFC 2046 says that boundaries may begin but not end in w/s
782 return utils.collapse_rfc2231_value(boundary).rstrip()
783
784 def set_boundary(self, boundary):
785 """Set the boundary parameter in Content-Type to 'boundary'.
786
787 This is subtly different than deleting the Content-Type header and
788 adding a new one with a new boundary parameter via add_header(). The
789 main difference is that using the set_boundary() method preserves the
790 order of the Content-Type header in the original message.
791
792 HeaderParseError is raised if the message has no Content-Type header.
793 """
794 missing = object()
795 params = self._get_params_preserve(missing, 'content-type')
796 if params is missing:
797 # There was no Content-Type header, and we don't know what type
798 # to set it to, so raise an exception.
799 raise errors.HeaderParseError('No Content-Type header found')
800 newparams = []
801 foundp = False
802 for pk, pv in params:
803 if pk.lower() == 'boundary':
804 newparams.append(('boundary', '"%s"' % boundary))
805 foundp = True
806 else:
807 newparams.append((pk, pv))
808 if not foundp:
809 # The original Content-Type header had no boundary attribute.
810 # Tack one on the end. BAW: should we raise an exception
811 # instead???
812 newparams.append(('boundary', '"%s"' % boundary))
813 # Replace the existing Content-Type header with the new value
814 newheaders = []
815 for h, v in self._headers:
816 if h.lower() == 'content-type':
817 parts = []
818 for k, v in newparams:
819 if v == '':
820 parts.append(k)
821 else:
822 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400823 val = SEMISPACE.join(parts)
824 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000825
826 else:
827 newheaders.append((h, v))
828 self._headers = newheaders
829
830 def get_content_charset(self, failobj=None):
831 """Return the charset parameter of the Content-Type header.
832
833 The returned string is always coerced to lower case. If there is no
834 Content-Type header, or if that header has no charset parameter,
835 failobj is returned.
836 """
837 missing = object()
838 charset = self.get_param('charset', missing)
839 if charset is missing:
840 return failobj
841 if isinstance(charset, tuple):
842 # RFC 2231 encoded, so decode it, and it better end up as ascii.
843 pcharset = charset[0] or 'us-ascii'
844 try:
845 # LookupError will be raised if the charset isn't known to
846 # Python. UnicodeError will be raised if the encoded text
847 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000848 as_bytes = charset[2].encode('raw-unicode-escape')
849 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000850 except (LookupError, UnicodeError):
851 charset = charset[2]
852 # charset characters must be in us-ascii range
853 try:
854 charset.encode('us-ascii')
855 except UnicodeError:
856 return failobj
857 # RFC 2046, $4.1.2 says charsets are not case sensitive
858 return charset.lower()
859
860 def get_charsets(self, failobj=None):
861 """Return a list containing the charset(s) used in this message.
862
863 The returned list of items describes the Content-Type headers'
864 charset parameter for this message and all the subparts in its
865 payload.
866
867 Each item will either be a string (the value of the charset parameter
868 in the Content-Type header of that part) or the value of the
869 'failobj' parameter (defaults to None), if the part does not have a
870 main MIME type of "text", or the charset is not defined.
871
872 The list will contain one string for each part of the message, plus
873 one for the container message (i.e. self), so that a non-multipart
874 message will still return a list of length 1.
875 """
876 return [part.get_content_charset(failobj) for part in self.walk()]
877
878 # I.e. def walk(self): ...
879 from email.iterators import walk