blob: 3feab52799a43c52e54d9592f8fb9d1f06fc9758 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013from io import BytesIO, StringIO
14
15# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from email import utils
17from email import errors
R David Murrayc27e5222012-05-25 15:01:48 -040018from email._policybase import compat32
R. David Murray92532142011-01-07 23:25:30 +000019from email import charset as _charset
R David Murray80e0aee2012-05-27 21:23:34 -040020from email._encoded_words import decode_b
R. David Murray92532142011-01-07 23:25:30 +000021Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000030def _splitparam(param):
31 # Split header parameters. BAW: this may be too simple. It isn't
32 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murraya2150232011-03-16 21:11:23 -040033 # found in the wild. We may eventually need a full fledged parser.
34 # RDM: we might have a Header here; for now just stringify it.
35 a, sep, b = str(param).partition(';')
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000036 if not sep:
37 return a.strip(), None
38 return a.strip(), b.strip()
39
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040def _formatparam(param, value=None, quote=True):
41 """Convenience function to format and return a key=value pair.
42
R. David Murray7ec754b2010-12-13 23:51:19 +000043 This will quote the value if needed or if quote is true. If value is a
44 three tuple (charset, language, value), it will be encoded according
45 to RFC2231 rules. If it contains non-ascii characters it will likewise
46 be encoded according to RFC2231 rules, using the utf-8 charset and
47 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048 """
49 if value is not None and len(value) > 0:
50 # A tuple is used for RFC 2231 encoded parameter values where items
51 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000052 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 if isinstance(value, tuple):
54 # Encode as per RFC 2231
55 param += '*'
56 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000057 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000058 else:
59 try:
60 value.encode('ascii')
61 except UnicodeEncodeError:
62 param += '*'
63 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000064 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000065 # BAW: Please check this. I think that if quote is set it should
66 # force quoting even if not necessary.
67 if quote or tspecials.search(value):
68 return '%s="%s"' % (param, utils.quote(value))
69 else:
70 return '%s=%s' % (param, value)
71 else:
72 return param
73
74def _parseparam(s):
R David Murraya2150232011-03-16 21:11:23 -040075 # RDM This might be a Header, so for now stringify it.
76 s = ';' + str(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 plist = []
78 while s[:1] == ';':
79 s = s[1:]
80 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000081 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 end = s.find(';', end + 1)
83 if end < 0:
84 end = len(s)
85 f = s[:end]
86 if '=' in f:
87 i = f.index('=')
88 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
89 plist.append(f.strip())
90 s = s[end:]
91 return plist
92
93
94def _unquotevalue(value):
95 # This is different than utils.collapse_rfc2231_value() because it doesn't
96 # try to convert the value to a unicode. Message.get_param() and
97 # Message.get_params() are both currently defined to return the tuple in
98 # the face of RFC 2231 parameters.
99 if isinstance(value, tuple):
100 return value[0], value[1], utils.unquote(value[2])
101 else:
102 return utils.unquote(value)
103
104
105
106class Message:
107 """Basic message object.
108
109 A message object is defined as something that has a bunch of RFC 2822
110 headers and a payload. It may optionally have an envelope header
111 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
112 multipart or a message/rfc822), then the payload is a list of Message
113 objects, otherwise it is a string.
114
115 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000116 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000117 do in fact appear multiple times (e.g. Received) and for those headers,
118 you must use the explicit API to set or get all the headers. Not all of
119 the mapping methods are implemented.
120 """
R David Murrayc27e5222012-05-25 15:01:48 -0400121 def __init__(self, policy=compat32):
122 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000123 self._headers = []
124 self._unixfrom = None
125 self._payload = None
126 self._charset = None
127 # Defaults for multipart messages
128 self.preamble = self.epilogue = None
129 self.defects = []
130 # Default content type
131 self._default_type = 'text/plain'
132
133 def __str__(self):
134 """Return the entire formatted message as a string.
135 This includes the headers, body, and envelope header.
136 """
137 return self.as_string()
138
139 def as_string(self, unixfrom=False, maxheaderlen=0):
140 """Return the entire formatted message as a string.
141 Optional `unixfrom' when True, means include the Unix From_ envelope
142 header.
143
144 This is a convenience method and may not generate the message exactly
R David Murray7dedcb42011-03-15 14:01:18 -0400145 as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000146 Generator instance.
147 """
148 from email.generator import Generator
149 fp = StringIO()
150 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
151 g.flatten(self, unixfrom=unixfrom)
152 return fp.getvalue()
153
154 def is_multipart(self):
155 """Return True if the message consists of multiple parts."""
156 return isinstance(self._payload, list)
157
158 #
159 # Unix From_ line
160 #
161 def set_unixfrom(self, unixfrom):
162 self._unixfrom = unixfrom
163
164 def get_unixfrom(self):
165 return self._unixfrom
166
167 #
168 # Payload manipulation.
169 #
170 def attach(self, payload):
171 """Add the given payload to the current payload.
172
173 The current payload will always be a list of objects after this method
174 is called. If you want to set the payload to a scalar object, use
175 set_payload() instead.
176 """
177 if self._payload is None:
178 self._payload = [payload]
179 else:
180 self._payload.append(payload)
181
182 def get_payload(self, i=None, decode=False):
183 """Return a reference to the payload.
184
185 The payload will either be a list object or a string. If you mutate
186 the list object, you modify the message's payload in place. Optional
187 i returns that index into the payload.
188
189 Optional decode is a flag indicating whether the payload should be
190 decoded or not, according to the Content-Transfer-Encoding header
191 (default is False).
192
193 When True and the message is not a multipart, the payload will be
194 decoded if this header's value is `quoted-printable' or `base64'. If
195 some other encoding is used, or the header is missing, or if the
196 payload has bogus data (i.e. bogus base64 or uuencoded data), the
197 payload is returned as-is.
198
199 If the message is a multipart and the decode flag is True, then None
200 is returned.
201 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000202 # Here is the logic table for this code, based on the email5.0.0 code:
203 # i decode is_multipart result
204 # ------ ------ ------------ ------------------------------
205 # None True True None
206 # i True True None
207 # None False True _payload (a list)
208 # i False True _payload element i (a Message)
209 # i False False error (not a list)
210 # i True False error (not a list)
211 # None False False _payload
212 # None True False _payload decoded (bytes)
213 # Note that Barry planned to factor out the 'decode' case, but that
214 # isn't so easy now that we handle the 8 bit data, which needs to be
215 # converted in both the decode and non-decode path.
216 if self.is_multipart():
217 if decode:
218 return None
219 if i is None:
220 return self._payload
221 else:
222 return self._payload[i]
223 # For backward compatibility, Use isinstance and this error message
224 # instead of the more logical is_multipart test.
225 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000227 payload = self._payload
R David Murraya2150232011-03-16 21:11:23 -0400228 # cte might be a Header, so for now stringify it.
229 cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray106f8e32011-03-15 12:48:41 -0400230 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000231 if isinstance(payload, str):
R David Murrayc27e5222012-05-25 15:01:48 -0400232 if utils._has_surrogates(payload):
R. David Murray96fd54e2010-10-08 15:55:28 +0000233 bpayload = payload.encode('ascii', 'surrogateescape')
234 if not decode:
235 try:
236 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
237 except LookupError:
238 payload = bpayload.decode('ascii', 'replace')
239 elif decode:
240 try:
241 bpayload = payload.encode('ascii')
242 except UnicodeError:
243 # This won't happen for RFC compliant messages (messages
244 # containing only ASCII codepoints in the unicode input).
245 # If it does happen, turn the string into bytes in a way
246 # guaranteed not to fail.
247 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248 if not decode:
249 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000251 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 elif cte == 'base64':
R David Murray80e0aee2012-05-27 21:23:34 -0400253 # XXX: this is a bit of a hack; decode_b should probably be factored
254 # out somewhere, but I haven't figured out where yet.
255 value, defects = decode_b(b''.join(bpayload.splitlines()))
256 for defect in defects:
257 self.policy.handle_defect(self, defect)
258 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000260 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 out_file = BytesIO()
262 try:
263 uu.decode(in_file, out_file, quiet=True)
264 return out_file.getvalue()
265 except uu.Error:
266 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000267 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000268 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000270 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000271
272 def set_payload(self, payload, charset=None):
273 """Set the payload to the given value.
274
275 Optional charset sets the message's default character set. See
276 set_charset() for details.
277 """
278 self._payload = payload
279 if charset is not None:
280 self.set_charset(charset)
281
282 def set_charset(self, charset):
283 """Set the charset of the payload to a given character set.
284
285 charset can be a Charset instance, a string naming a character set, or
286 None. If it is a string it will be converted to a Charset instance.
287 If charset is None, the charset parameter will be removed from the
288 Content-Type field. Anything else will generate a TypeError.
289
290 The message will be assumed to be of type text/* encoded with
291 charset.input_charset. It will be converted to charset.output_charset
292 and encoded properly, if needed, when generating the plain text
293 representation of the message. MIME headers (MIME-Version,
294 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000295 """
296 if charset is None:
297 self.del_param('charset')
298 self._charset = None
299 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000300 if not isinstance(charset, Charset):
301 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000302 self._charset = charset
303 if 'MIME-Version' not in self:
304 self.add_header('MIME-Version', '1.0')
305 if 'Content-Type' not in self:
306 self.add_header('Content-Type', 'text/plain',
307 charset=charset.get_output_charset())
308 else:
309 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000310 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000311 self._payload = charset.body_encode(self._payload)
312 if 'Content-Transfer-Encoding' not in self:
313 cte = charset.get_body_encoding()
314 try:
315 cte(self)
316 except TypeError:
317 self._payload = charset.body_encode(self._payload)
318 self.add_header('Content-Transfer-Encoding', cte)
319
320 def get_charset(self):
321 """Return the Charset instance associated with the message's payload.
322 """
323 return self._charset
324
325 #
326 # MAPPING INTERFACE (partial)
327 #
328 def __len__(self):
329 """Return the total number of headers, including duplicates."""
330 return len(self._headers)
331
332 def __getitem__(self, name):
333 """Get a header value.
334
335 Return None if the header is missing instead of raising an exception.
336
337 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000338 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000339 the values matching a header field name.
340 """
341 return self.get(name)
342
343 def __setitem__(self, name, val):
344 """Set the value of a header.
345
346 Note: this does not overwrite an existing header with the same field
347 name. Use __delitem__() first to delete any existing headers.
348 """
R David Murrayabfc3742012-05-29 09:14:44 -0400349 max_count = self.policy.header_max_count(name)
350 if max_count:
351 lname = name.lower()
352 found = 0
353 for k, v in self._headers:
354 if k.lower() == lname:
355 found += 1
356 if found >= max_count:
357 raise ValueError("There may be at most {} {} headers "
358 "in a message".format(max_count, name))
R David Murrayc27e5222012-05-25 15:01:48 -0400359 self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000360
361 def __delitem__(self, name):
362 """Delete all occurrences of a header, if present.
363
364 Does not raise an exception if the header is missing.
365 """
366 name = name.lower()
367 newheaders = []
368 for k, v in self._headers:
369 if k.lower() != name:
370 newheaders.append((k, v))
371 self._headers = newheaders
372
373 def __contains__(self, name):
374 return name.lower() in [k.lower() for k, v in self._headers]
375
376 def __iter__(self):
377 for field, value in self._headers:
378 yield field
379
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000380 def keys(self):
381 """Return a list of all the message's header field names.
382
383 These will be sorted in the order they appeared in the original
384 message, or were added to the message, and may contain duplicates.
385 Any fields deleted and re-inserted are always appended to the header
386 list.
387 """
388 return [k for k, v in self._headers]
389
390 def values(self):
391 """Return a list of all the message's header values.
392
393 These will be sorted in the order they appeared in the original
394 message, or were added to the message, and may contain duplicates.
395 Any fields deleted and re-inserted are always appended to the header
396 list.
397 """
R David Murrayc27e5222012-05-25 15:01:48 -0400398 return [self.policy.header_fetch_parse(k, v)
399 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400
401 def items(self):
402 """Get all the message's header fields and values.
403
404 These will be sorted in the order they appeared in the original
405 message, or were added to the message, and may contain duplicates.
406 Any fields deleted and re-inserted are always appended to the header
407 list.
408 """
R David Murrayc27e5222012-05-25 15:01:48 -0400409 return [(k, self.policy.header_fetch_parse(k, v))
410 for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000411
412 def get(self, name, failobj=None):
413 """Get a header value.
414
415 Like __getitem__() but return failobj instead of None when the field
416 is missing.
417 """
418 name = name.lower()
419 for k, v in self._headers:
420 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400421 return self.policy.header_fetch_parse(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000422 return failobj
423
424 #
R David Murrayc27e5222012-05-25 15:01:48 -0400425 # "Internal" methods (public API, but only intended for use by a parser
426 # or generator, not normal application code.
427 #
428
429 def set_raw(self, name, value):
430 """Store name and value in the model without modification.
431
432 This is an "internal" API, intended only for use by a parser.
433 """
434 self._headers.append((name, value))
435
436 def raw_items(self):
437 """Return the (name, value) header pairs without modification.
438
439 This is an "internal" API, intended only for use by a generator.
440 """
441 return iter(self._headers.copy())
442
443 #
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000444 # Additional useful stuff
445 #
446
447 def get_all(self, name, failobj=None):
448 """Return a list of all the values for the named field.
449
450 These will be sorted in the order they appeared in the original
451 message, and may contain duplicates. Any fields deleted and
452 re-inserted are always appended to the header list.
453
454 If no such fields exist, failobj is returned (defaults to None).
455 """
456 values = []
457 name = name.lower()
458 for k, v in self._headers:
459 if k.lower() == name:
R David Murrayc27e5222012-05-25 15:01:48 -0400460 values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000461 if not values:
462 return failobj
463 return values
464
465 def add_header(self, _name, _value, **_params):
466 """Extended header setting.
467
468 name is the header field to add. keyword arguments can be used to set
469 additional parameters for the header field, with underscores converted
470 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000471 value is None, in which case only the key will be added. If a
472 parameter value contains non-ASCII characters it can be specified as a
473 three-tuple of (charset, language, value), in which case it will be
474 encoded according to RFC2231 rules. Otherwise it will be encoded using
475 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000476
R. David Murray7ec754b2010-12-13 23:51:19 +0000477 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000478
479 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000480 msg.add_header('content-disposition', 'attachment',
481 filename=('utf-8', '', Fußballer.ppt'))
482 msg.add_header('content-disposition', 'attachment',
483 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000484 """
485 parts = []
486 for k, v in _params.items():
487 if v is None:
488 parts.append(k.replace('_', '-'))
489 else:
490 parts.append(_formatparam(k.replace('_', '-'), v))
491 if _value is not None:
492 parts.insert(0, _value)
R David Murrayc27e5222012-05-25 15:01:48 -0400493 self[_name] = SEMISPACE.join(parts)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000494
495 def replace_header(self, _name, _value):
496 """Replace a header.
497
498 Replace the first matching header found in the message, retaining
499 header order and case. If no matching header was found, a KeyError is
500 raised.
501 """
502 _name = _name.lower()
503 for i, (k, v) in zip(range(len(self._headers)), self._headers):
504 if k.lower() == _name:
R David Murrayc27e5222012-05-25 15:01:48 -0400505 self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000506 break
507 else:
508 raise KeyError(_name)
509
510 #
511 # Use these three methods instead of the three above.
512 #
513
514 def get_content_type(self):
515 """Return the message's content type.
516
517 The returned string is coerced to lower case of the form
518 `maintype/subtype'. If there was no Content-Type header in the
519 message, the default type as given by get_default_type() will be
520 returned. Since according to RFC 2045, messages always have a default
521 type this will always return a value.
522
523 RFC 2045 defines a message's default type to be text/plain unless it
524 appears inside a multipart/digest container, in which case it would be
525 message/rfc822.
526 """
527 missing = object()
528 value = self.get('content-type', missing)
529 if value is missing:
530 # This should have no parameters
531 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000532 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533 # RFC 2045, section 5.2 says if its invalid, use text/plain
534 if ctype.count('/') != 1:
535 return 'text/plain'
536 return ctype
537
538 def get_content_maintype(self):
539 """Return the message's main content type.
540
541 This is the `maintype' part of the string returned by
542 get_content_type().
543 """
544 ctype = self.get_content_type()
545 return ctype.split('/')[0]
546
547 def get_content_subtype(self):
548 """Returns the message's sub-content type.
549
550 This is the `subtype' part of the string returned by
551 get_content_type().
552 """
553 ctype = self.get_content_type()
554 return ctype.split('/')[1]
555
556 def get_default_type(self):
557 """Return the `default' content type.
558
559 Most messages have a default content type of text/plain, except for
560 messages that are subparts of multipart/digest containers. Such
561 subparts have a default content type of message/rfc822.
562 """
563 return self._default_type
564
565 def set_default_type(self, ctype):
566 """Set the `default' content type.
567
568 ctype should be either "text/plain" or "message/rfc822", although this
569 is not enforced. The default content type is not stored in the
570 Content-Type header.
571 """
572 self._default_type = ctype
573
574 def _get_params_preserve(self, failobj, header):
575 # Like get_params() but preserves the quoting of values. BAW:
576 # should this be part of the public interface?
577 missing = object()
578 value = self.get(header, missing)
579 if value is missing:
580 return failobj
581 params = []
R David Murraya2150232011-03-16 21:11:23 -0400582 for p in _parseparam(value):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000583 try:
584 name, val = p.split('=', 1)
585 name = name.strip()
586 val = val.strip()
587 except ValueError:
588 # Must have been a bare attribute
589 name = p.strip()
590 val = ''
591 params.append((name, val))
592 params = utils.decode_params(params)
593 return params
594
595 def get_params(self, failobj=None, header='content-type', unquote=True):
596 """Return the message's Content-Type parameters, as a list.
597
598 The elements of the returned list are 2-tuples of key/value pairs, as
599 split on the `=' sign. The left hand side of the `=' is the key,
600 while the right hand side is the value. If there is no `=' sign in
601 the parameter the value is the empty string. The value is as
602 described in the get_param() method.
603
604 Optional failobj is the object to return if there is no Content-Type
605 header. Optional header is the header to search instead of
606 Content-Type. If unquote is True, the value is unquoted.
607 """
608 missing = object()
609 params = self._get_params_preserve(missing, header)
610 if params is missing:
611 return failobj
612 if unquote:
613 return [(k, _unquotevalue(v)) for k, v in params]
614 else:
615 return params
616
617 def get_param(self, param, failobj=None, header='content-type',
618 unquote=True):
619 """Return the parameter value if found in the Content-Type header.
620
621 Optional failobj is the object to return if there is no Content-Type
622 header, or the Content-Type header has no such parameter. Optional
623 header is the header to search instead of Content-Type.
624
625 Parameter keys are always compared case insensitively. The return
626 value can either be a string, or a 3-tuple if the parameter was RFC
627 2231 encoded. When it's a 3-tuple, the elements of the value are of
628 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
629 LANGUAGE can be None, in which case you should consider VALUE to be
630 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray3ac8c782012-06-17 15:26:35 -0400631 The parameter value (either the returned string, or the VALUE item in
632 the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000633
R David Murray3ac8c782012-06-17 15:26:35 -0400634 If your application doesn't care whether the parameter was RFC 2231
635 encoded, it can turn the return value into a string as follows:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000636
637 param = msg.get_param('foo')
R David Murray3ac8c782012-06-17 15:26:35 -0400638 param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000639
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000640 """
641 if header not in self:
642 return failobj
643 for k, v in self._get_params_preserve(failobj, header):
644 if k.lower() == param.lower():
645 if unquote:
646 return _unquotevalue(v)
647 else:
648 return v
649 return failobj
650
651 def set_param(self, param, value, header='Content-Type', requote=True,
652 charset=None, language=''):
653 """Set a parameter in the Content-Type header.
654
655 If the parameter already exists in the header, its value will be
656 replaced with the new value.
657
658 If header is Content-Type and has not yet been defined for this
659 message, it will be set to "text/plain" and the new parameter and
660 value will be appended as per RFC 2045.
661
662 An alternate header can specified in the header argument, and all
663 parameters will be quoted as necessary unless requote is False.
664
665 If charset is specified, the parameter will be encoded according to RFC
666 2231. Optional language specifies the RFC 2231 language, defaulting
667 to the empty string. Both charset and language should be strings.
668 """
669 if not isinstance(value, tuple) and charset:
670 value = (charset, language, value)
671
672 if header not in self and header.lower() == 'content-type':
673 ctype = 'text/plain'
674 else:
675 ctype = self.get(header)
676 if not self.get_param(param, header=header):
677 if not ctype:
678 ctype = _formatparam(param, value, requote)
679 else:
680 ctype = SEMISPACE.join(
681 [ctype, _formatparam(param, value, requote)])
682 else:
683 ctype = ''
684 for old_param, old_value in self.get_params(header=header,
685 unquote=requote):
686 append_param = ''
687 if old_param.lower() == param.lower():
688 append_param = _formatparam(param, value, requote)
689 else:
690 append_param = _formatparam(old_param, old_value, requote)
691 if not ctype:
692 ctype = append_param
693 else:
694 ctype = SEMISPACE.join([ctype, append_param])
695 if ctype != self.get(header):
696 del self[header]
697 self[header] = ctype
698
699 def del_param(self, param, header='content-type', requote=True):
700 """Remove the given parameter completely from the Content-Type header.
701
702 The header will be re-written in place without the parameter or its
703 value. All values will be quoted as necessary unless requote is
704 False. Optional header specifies an alternative to the Content-Type
705 header.
706 """
707 if header not in self:
708 return
709 new_ctype = ''
710 for p, v in self.get_params(header=header, unquote=requote):
711 if p.lower() != param.lower():
712 if not new_ctype:
713 new_ctype = _formatparam(p, v, requote)
714 else:
715 new_ctype = SEMISPACE.join([new_ctype,
716 _formatparam(p, v, requote)])
717 if new_ctype != self.get(header):
718 del self[header]
719 self[header] = new_ctype
720
721 def set_type(self, type, header='Content-Type', requote=True):
722 """Set the main type and subtype for the Content-Type header.
723
724 type must be a string in the form "maintype/subtype", otherwise a
725 ValueError is raised.
726
727 This method replaces the Content-Type header, keeping all the
728 parameters in place. If requote is False, this leaves the existing
729 header's quoting as is. Otherwise, the parameters will be quoted (the
730 default).
731
732 An alternative header can be specified in the header argument. When
733 the Content-Type header is set, we'll always also add a MIME-Version
734 header.
735 """
736 # BAW: should we be strict?
737 if not type.count('/') == 1:
738 raise ValueError
739 # Set the Content-Type, you get a MIME-Version
740 if header.lower() == 'content-type':
741 del self['mime-version']
742 self['MIME-Version'] = '1.0'
743 if header not in self:
744 self[header] = type
745 return
746 params = self.get_params(header=header, unquote=requote)
747 del self[header]
748 self[header] = type
749 # Skip the first param; it's the old type.
750 for p, v in params[1:]:
751 self.set_param(p, v, header, requote)
752
753 def get_filename(self, failobj=None):
754 """Return the filename associated with the payload if present.
755
756 The filename is extracted from the Content-Disposition header's
757 `filename' parameter, and it is unquoted. If that header is missing
758 the `filename' parameter, this method falls back to looking for the
759 `name' parameter.
760 """
761 missing = object()
762 filename = self.get_param('filename', missing, 'content-disposition')
763 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000764 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000765 if filename is missing:
766 return failobj
767 return utils.collapse_rfc2231_value(filename).strip()
768
769 def get_boundary(self, failobj=None):
770 """Return the boundary associated with the payload if present.
771
772 The boundary is extracted from the Content-Type header's `boundary'
773 parameter, and it is unquoted.
774 """
775 missing = object()
776 boundary = self.get_param('boundary', missing)
777 if boundary is missing:
778 return failobj
779 # RFC 2046 says that boundaries may begin but not end in w/s
780 return utils.collapse_rfc2231_value(boundary).rstrip()
781
782 def set_boundary(self, boundary):
783 """Set the boundary parameter in Content-Type to 'boundary'.
784
785 This is subtly different than deleting the Content-Type header and
786 adding a new one with a new boundary parameter via add_header(). The
787 main difference is that using the set_boundary() method preserves the
788 order of the Content-Type header in the original message.
789
790 HeaderParseError is raised if the message has no Content-Type header.
791 """
792 missing = object()
793 params = self._get_params_preserve(missing, 'content-type')
794 if params is missing:
795 # There was no Content-Type header, and we don't know what type
796 # to set it to, so raise an exception.
797 raise errors.HeaderParseError('No Content-Type header found')
798 newparams = []
799 foundp = False
800 for pk, pv in params:
801 if pk.lower() == 'boundary':
802 newparams.append(('boundary', '"%s"' % boundary))
803 foundp = True
804 else:
805 newparams.append((pk, pv))
806 if not foundp:
807 # The original Content-Type header had no boundary attribute.
808 # Tack one on the end. BAW: should we raise an exception
809 # instead???
810 newparams.append(('boundary', '"%s"' % boundary))
811 # Replace the existing Content-Type header with the new value
812 newheaders = []
813 for h, v in self._headers:
814 if h.lower() == 'content-type':
815 parts = []
816 for k, v in newparams:
817 if v == '':
818 parts.append(k)
819 else:
820 parts.append('%s=%s' % (k, v))
R David Murrayc27e5222012-05-25 15:01:48 -0400821 val = SEMISPACE.join(parts)
822 newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000823
824 else:
825 newheaders.append((h, v))
826 self._headers = newheaders
827
828 def get_content_charset(self, failobj=None):
829 """Return the charset parameter of the Content-Type header.
830
831 The returned string is always coerced to lower case. If there is no
832 Content-Type header, or if that header has no charset parameter,
833 failobj is returned.
834 """
835 missing = object()
836 charset = self.get_param('charset', missing)
837 if charset is missing:
838 return failobj
839 if isinstance(charset, tuple):
840 # RFC 2231 encoded, so decode it, and it better end up as ascii.
841 pcharset = charset[0] or 'us-ascii'
842 try:
843 # LookupError will be raised if the charset isn't known to
844 # Python. UnicodeError will be raised if the encoded text
845 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000846 as_bytes = charset[2].encode('raw-unicode-escape')
847 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000848 except (LookupError, UnicodeError):
849 charset = charset[2]
850 # charset characters must be in us-ascii range
851 try:
852 charset.encode('us-ascii')
853 except UnicodeError:
854 return failobj
855 # RFC 2046, $4.1.2 says charsets are not case sensitive
856 return charset.lower()
857
858 def get_charsets(self, failobj=None):
859 """Return a list containing the charset(s) used in this message.
860
861 The returned list of items describes the Content-Type headers'
862 charset parameter for this message and all the subparts in its
863 payload.
864
865 Each item will either be a string (the value of the charset parameter
866 in the Content-Type header of that part) or the value of the
867 'failobj' parameter (defaults to None), if the part does not have a
868 main MIME type of "text", or the charset is not defined.
869
870 The list will contain one string for each part of the message, plus
871 one for the container message (i.e. self), so that a non-multipart
872 message will still return a list of length 1.
873 """
874 return [part.get_content_charset(failobj) for part in self.walk()]
875
876 # I.e. def walk(self): ...
877 from email.iterators import walk