blob: 28835d09d95276678e77384ec140ad7871ff0dc9 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
R. David Murray92532142011-01-07 23:25:30 +000019from email import header
20from email import charset as _charset
21Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23SEMISPACE = '; '
24
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000026# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000027tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
R. David Murray96fd54e2010-10-08 15:55:28 +000029# How to figure out if we are processing strings that come from a byte
30# source with undecodable characters.
31_has_surrogates = re.compile(
32 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
33
Guido van Rossum8b3febe2007-08-30 01:15:14 +000034
Guido van Rossum8b3febe2007-08-30 01:15:14 +000035# Helper functions
R. David Murray92532142011-01-07 23:25:30 +000036def _sanitize_header(name, value):
37 # If the header value contains surrogates, return a Header using
38 # the unknown-8bit charset to encode the bytes as encoded words.
R. David Murray96fd54e2010-10-08 15:55:28 +000039 if not isinstance(value, str):
R. David Murray92532142011-01-07 23:25:30 +000040 # Assume it is already a header object
R. David Murray96fd54e2010-10-08 15:55:28 +000041 return value
42 if _has_surrogates(value):
R. David Murray92532142011-01-07 23:25:30 +000043 return header.Header(value, charset=_charset.UNKNOWN8BIT,
44 header_name=name)
R. David Murray96fd54e2010-10-08 15:55:28 +000045 else:
46 return value
47
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000048def _splitparam(param):
49 # Split header parameters. BAW: this may be too simple. It isn't
50 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
51 # found in the wild. We may eventually need a full fledged parser
52 # eventually.
53 a, sep, b = param.partition(';')
54 if not sep:
55 return a.strip(), None
56 return a.strip(), b.strip()
57
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058def _formatparam(param, value=None, quote=True):
59 """Convenience function to format and return a key=value pair.
60
R. David Murray7ec754b2010-12-13 23:51:19 +000061 This will quote the value if needed or if quote is true. If value is a
62 three tuple (charset, language, value), it will be encoded according
63 to RFC2231 rules. If it contains non-ascii characters it will likewise
64 be encoded according to RFC2231 rules, using the utf-8 charset and
65 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 """
67 if value is not None and len(value) > 0:
68 # A tuple is used for RFC 2231 encoded parameter values where items
69 # are (charset, language, value). charset is a string, not a Charset
R. David Murraydfd7eb02010-12-24 22:36:49 +000070 # instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071 if isinstance(value, tuple):
72 # Encode as per RFC 2231
73 param += '*'
74 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murraydfd7eb02010-12-24 22:36:49 +000075 return '%s=%s' % (param, value)
R. David Murray7ec754b2010-12-13 23:51:19 +000076 else:
77 try:
78 value.encode('ascii')
79 except UnicodeEncodeError:
80 param += '*'
81 value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murraydfd7eb02010-12-24 22:36:49 +000082 return '%s=%s' % (param, value)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000083 # BAW: Please check this. I think that if quote is set it should
84 # force quoting even if not necessary.
85 if quote or tspecials.search(value):
86 return '%s="%s"' % (param, utils.quote(value))
87 else:
88 return '%s=%s' % (param, value)
89 else:
90 return param
91
92def _parseparam(s):
93 plist = []
94 while s[:1] == ';':
95 s = s[1:]
96 end = s.find(';')
R. David Murrayd48739f2010-04-14 18:59:18 +000097 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000098 end = s.find(';', end + 1)
99 if end < 0:
100 end = len(s)
101 f = s[:end]
102 if '=' in f:
103 i = f.index('=')
104 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
105 plist.append(f.strip())
106 s = s[end:]
107 return plist
108
109
110def _unquotevalue(value):
111 # This is different than utils.collapse_rfc2231_value() because it doesn't
112 # try to convert the value to a unicode. Message.get_param() and
113 # Message.get_params() are both currently defined to return the tuple in
114 # the face of RFC 2231 parameters.
115 if isinstance(value, tuple):
116 return value[0], value[1], utils.unquote(value[2])
117 else:
118 return utils.unquote(value)
119
120
121
122class Message:
123 """Basic message object.
124
125 A message object is defined as something that has a bunch of RFC 2822
126 headers and a payload. It may optionally have an envelope header
127 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
128 multipart or a message/rfc822), then the payload is a list of Message
129 objects, otherwise it is a string.
130
131 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayd2c310f2010-10-01 02:08:02 +0000132 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000133 do in fact appear multiple times (e.g. Received) and for those headers,
134 you must use the explicit API to set or get all the headers. Not all of
135 the mapping methods are implemented.
136 """
137 def __init__(self):
138 self._headers = []
139 self._unixfrom = None
140 self._payload = None
141 self._charset = None
142 # Defaults for multipart messages
143 self.preamble = self.epilogue = None
144 self.defects = []
145 # Default content type
146 self._default_type = 'text/plain'
147
148 def __str__(self):
149 """Return the entire formatted message as a string.
150 This includes the headers, body, and envelope header.
151 """
152 return self.as_string()
153
154 def as_string(self, unixfrom=False, maxheaderlen=0):
155 """Return the entire formatted message as a string.
156 Optional `unixfrom' when True, means include the Unix From_ envelope
157 header.
158
159 This is a convenience method and may not generate the message exactly
160 as you intend because by default it mangles lines that begin with
161 "From ". For more flexibility, use the flatten() method of a
162 Generator instance.
163 """
164 from email.generator import Generator
165 fp = StringIO()
166 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
167 g.flatten(self, unixfrom=unixfrom)
168 return fp.getvalue()
169
170 def is_multipart(self):
171 """Return True if the message consists of multiple parts."""
172 return isinstance(self._payload, list)
173
174 #
175 # Unix From_ line
176 #
177 def set_unixfrom(self, unixfrom):
178 self._unixfrom = unixfrom
179
180 def get_unixfrom(self):
181 return self._unixfrom
182
183 #
184 # Payload manipulation.
185 #
186 def attach(self, payload):
187 """Add the given payload to the current payload.
188
189 The current payload will always be a list of objects after this method
190 is called. If you want to set the payload to a scalar object, use
191 set_payload() instead.
192 """
193 if self._payload is None:
194 self._payload = [payload]
195 else:
196 self._payload.append(payload)
197
198 def get_payload(self, i=None, decode=False):
199 """Return a reference to the payload.
200
201 The payload will either be a list object or a string. If you mutate
202 the list object, you modify the message's payload in place. Optional
203 i returns that index into the payload.
204
205 Optional decode is a flag indicating whether the payload should be
206 decoded or not, according to the Content-Transfer-Encoding header
207 (default is False).
208
209 When True and the message is not a multipart, the payload will be
210 decoded if this header's value is `quoted-printable' or `base64'. If
211 some other encoding is used, or the header is missing, or if the
212 payload has bogus data (i.e. bogus base64 or uuencoded data), the
213 payload is returned as-is.
214
215 If the message is a multipart and the decode flag is True, then None
216 is returned.
217 """
R. David Murray96fd54e2010-10-08 15:55:28 +0000218 # Here is the logic table for this code, based on the email5.0.0 code:
219 # i decode is_multipart result
220 # ------ ------ ------------ ------------------------------
221 # None True True None
222 # i True True None
223 # None False True _payload (a list)
224 # i False True _payload element i (a Message)
225 # i False False error (not a list)
226 # i True False error (not a list)
227 # None False False _payload
228 # None True False _payload decoded (bytes)
229 # Note that Barry planned to factor out the 'decode' case, but that
230 # isn't so easy now that we handle the 8 bit data, which needs to be
231 # converted in both the decode and non-decode path.
232 if self.is_multipart():
233 if decode:
234 return None
235 if i is None:
236 return self._payload
237 else:
238 return self._payload[i]
239 # For backward compatibility, Use isinstance and this error message
240 # instead of the more logical is_multipart test.
241 if i is not None and not isinstance(self._payload, list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000242 raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray96fd54e2010-10-08 15:55:28 +0000243 payload = self._payload
244 cte = self.get('content-transfer-encoding', '').lower()
R David Murray106f8e32011-03-15 12:48:41 -0400245 # payload may be bytes here.
R. David Murray96fd54e2010-10-08 15:55:28 +0000246 if isinstance(payload, str):
247 if _has_surrogates(payload):
248 bpayload = payload.encode('ascii', 'surrogateescape')
249 if not decode:
250 try:
251 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
252 except LookupError:
253 payload = bpayload.decode('ascii', 'replace')
254 elif decode:
255 try:
256 bpayload = payload.encode('ascii')
257 except UnicodeError:
258 # This won't happen for RFC compliant messages (messages
259 # containing only ASCII codepoints in the unicode input).
260 # If it does happen, turn the string into bytes in a way
261 # guaranteed not to fail.
262 bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 if not decode:
264 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265 if cte == 'quoted-printable':
R. David Murray96fd54e2010-10-08 15:55:28 +0000266 return utils._qdecode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000267 elif cte == 'base64':
268 try:
R. David Murray96fd54e2010-10-08 15:55:28 +0000269 return base64.b64decode(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 except binascii.Error:
271 # Incorrect padding
R. David Murray96fd54e2010-10-08 15:55:28 +0000272 return bpayload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000273 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray96fd54e2010-10-08 15:55:28 +0000274 in_file = BytesIO(bpayload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000275 out_file = BytesIO()
276 try:
277 uu.decode(in_file, out_file, quiet=True)
278 return out_file.getvalue()
279 except uu.Error:
280 # Some decoding problem
R. David Murray96fd54e2010-10-08 15:55:28 +0000281 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000282 if isinstance(payload, str):
R. David Murray96fd54e2010-10-08 15:55:28 +0000283 return bpayload
Barry Warsaw8b2af272007-08-31 03:04:26 +0000284 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000285
286 def set_payload(self, payload, charset=None):
287 """Set the payload to the given value.
288
289 Optional charset sets the message's default character set. See
290 set_charset() for details.
291 """
292 self._payload = payload
293 if charset is not None:
294 self.set_charset(charset)
295
296 def set_charset(self, charset):
297 """Set the charset of the payload to a given character set.
298
299 charset can be a Charset instance, a string naming a character set, or
300 None. If it is a string it will be converted to a Charset instance.
301 If charset is None, the charset parameter will be removed from the
302 Content-Type field. Anything else will generate a TypeError.
303
304 The message will be assumed to be of type text/* encoded with
305 charset.input_charset. It will be converted to charset.output_charset
306 and encoded properly, if needed, when generating the plain text
307 representation of the message. MIME headers (MIME-Version,
308 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000309 """
310 if charset is None:
311 self.del_param('charset')
312 self._charset = None
313 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000314 if not isinstance(charset, Charset):
315 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000316 self._charset = charset
317 if 'MIME-Version' not in self:
318 self.add_header('MIME-Version', '1.0')
319 if 'Content-Type' not in self:
320 self.add_header('Content-Type', 'text/plain',
321 charset=charset.get_output_charset())
322 else:
323 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000324 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 self._payload = charset.body_encode(self._payload)
326 if 'Content-Transfer-Encoding' not in self:
327 cte = charset.get_body_encoding()
328 try:
329 cte(self)
330 except TypeError:
331 self._payload = charset.body_encode(self._payload)
332 self.add_header('Content-Transfer-Encoding', cte)
333
334 def get_charset(self):
335 """Return the Charset instance associated with the message's payload.
336 """
337 return self._charset
338
339 #
340 # MAPPING INTERFACE (partial)
341 #
342 def __len__(self):
343 """Return the total number of headers, including duplicates."""
344 return len(self._headers)
345
346 def __getitem__(self, name):
347 """Get a header value.
348
349 Return None if the header is missing instead of raising an exception.
350
351 Note that if the header appeared multiple times, exactly which
R. David Murrayd2c310f2010-10-01 02:08:02 +0000352 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353 the values matching a header field name.
354 """
355 return self.get(name)
356
357 def __setitem__(self, name, val):
358 """Set the value of a header.
359
360 Note: this does not overwrite an existing header with the same field
361 name. Use __delitem__() first to delete any existing headers.
362 """
363 self._headers.append((name, val))
364
365 def __delitem__(self, name):
366 """Delete all occurrences of a header, if present.
367
368 Does not raise an exception if the header is missing.
369 """
370 name = name.lower()
371 newheaders = []
372 for k, v in self._headers:
373 if k.lower() != name:
374 newheaders.append((k, v))
375 self._headers = newheaders
376
377 def __contains__(self, name):
378 return name.lower() in [k.lower() for k, v in self._headers]
379
380 def __iter__(self):
381 for field, value in self._headers:
382 yield field
383
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000384 def keys(self):
385 """Return a list of all the message's header field names.
386
387 These will be sorted in the order they appeared in the original
388 message, or were added to the message, and may contain duplicates.
389 Any fields deleted and re-inserted are always appended to the header
390 list.
391 """
392 return [k for k, v in self._headers]
393
394 def values(self):
395 """Return a list of all the message's header values.
396
397 These will be sorted in the order they appeared in the original
398 message, or were added to the message, and may contain duplicates.
399 Any fields deleted and re-inserted are always appended to the header
400 list.
401 """
R. David Murray92532142011-01-07 23:25:30 +0000402 return [_sanitize_header(k, v) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000403
404 def items(self):
405 """Get all the message's header fields and values.
406
407 These will be sorted in the order they appeared in the original
408 message, or were added to the message, and may contain duplicates.
409 Any fields deleted and re-inserted are always appended to the header
410 list.
411 """
R. David Murray92532142011-01-07 23:25:30 +0000412 return [(k, _sanitize_header(k, v)) for k, v in self._headers]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000413
414 def get(self, name, failobj=None):
415 """Get a header value.
416
417 Like __getitem__() but return failobj instead of None when the field
418 is missing.
419 """
420 name = name.lower()
421 for k, v in self._headers:
422 if k.lower() == name:
R. David Murray92532142011-01-07 23:25:30 +0000423 return _sanitize_header(k, v)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000424 return failobj
425
426 #
427 # Additional useful stuff
428 #
429
430 def get_all(self, name, failobj=None):
431 """Return a list of all the values for the named field.
432
433 These will be sorted in the order they appeared in the original
434 message, and may contain duplicates. Any fields deleted and
435 re-inserted are always appended to the header list.
436
437 If no such fields exist, failobj is returned (defaults to None).
438 """
439 values = []
440 name = name.lower()
441 for k, v in self._headers:
442 if k.lower() == name:
R. David Murray92532142011-01-07 23:25:30 +0000443 values.append(_sanitize_header(k, v))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000444 if not values:
445 return failobj
446 return values
447
448 def add_header(self, _name, _value, **_params):
449 """Extended header setting.
450
451 name is the header field to add. keyword arguments can be used to set
452 additional parameters for the header field, with underscores converted
453 to dashes. Normally the parameter will be added as key="value" unless
R. David Murray7ec754b2010-12-13 23:51:19 +0000454 value is None, in which case only the key will be added. If a
455 parameter value contains non-ASCII characters it can be specified as a
456 three-tuple of (charset, language, value), in which case it will be
457 encoded according to RFC2231 rules. Otherwise it will be encoded using
458 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000459
R. David Murray7ec754b2010-12-13 23:51:19 +0000460 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000461
462 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray7ec754b2010-12-13 23:51:19 +0000463 msg.add_header('content-disposition', 'attachment',
464 filename=('utf-8', '', Fußballer.ppt'))
465 msg.add_header('content-disposition', 'attachment',
466 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000467 """
468 parts = []
469 for k, v in _params.items():
470 if v is None:
471 parts.append(k.replace('_', '-'))
472 else:
473 parts.append(_formatparam(k.replace('_', '-'), v))
474 if _value is not None:
475 parts.insert(0, _value)
476 self._headers.append((_name, SEMISPACE.join(parts)))
477
478 def replace_header(self, _name, _value):
479 """Replace a header.
480
481 Replace the first matching header found in the message, retaining
482 header order and case. If no matching header was found, a KeyError is
483 raised.
484 """
485 _name = _name.lower()
486 for i, (k, v) in zip(range(len(self._headers)), self._headers):
487 if k.lower() == _name:
488 self._headers[i] = (k, _value)
489 break
490 else:
491 raise KeyError(_name)
492
493 #
494 # Use these three methods instead of the three above.
495 #
496
497 def get_content_type(self):
498 """Return the message's content type.
499
500 The returned string is coerced to lower case of the form
501 `maintype/subtype'. If there was no Content-Type header in the
502 message, the default type as given by get_default_type() will be
503 returned. Since according to RFC 2045, messages always have a default
504 type this will always return a value.
505
506 RFC 2045 defines a message's default type to be text/plain unless it
507 appears inside a multipart/digest container, in which case it would be
508 message/rfc822.
509 """
510 missing = object()
511 value = self.get('content-type', missing)
512 if value is missing:
513 # This should have no parameters
514 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000515 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000516 # RFC 2045, section 5.2 says if its invalid, use text/plain
517 if ctype.count('/') != 1:
518 return 'text/plain'
519 return ctype
520
521 def get_content_maintype(self):
522 """Return the message's main content type.
523
524 This is the `maintype' part of the string returned by
525 get_content_type().
526 """
527 ctype = self.get_content_type()
528 return ctype.split('/')[0]
529
530 def get_content_subtype(self):
531 """Returns the message's sub-content type.
532
533 This is the `subtype' part of the string returned by
534 get_content_type().
535 """
536 ctype = self.get_content_type()
537 return ctype.split('/')[1]
538
539 def get_default_type(self):
540 """Return the `default' content type.
541
542 Most messages have a default content type of text/plain, except for
543 messages that are subparts of multipart/digest containers. Such
544 subparts have a default content type of message/rfc822.
545 """
546 return self._default_type
547
548 def set_default_type(self, ctype):
549 """Set the `default' content type.
550
551 ctype should be either "text/plain" or "message/rfc822", although this
552 is not enforced. The default content type is not stored in the
553 Content-Type header.
554 """
555 self._default_type = ctype
556
557 def _get_params_preserve(self, failobj, header):
558 # Like get_params() but preserves the quoting of values. BAW:
559 # should this be part of the public interface?
560 missing = object()
561 value = self.get(header, missing)
562 if value is missing:
563 return failobj
564 params = []
565 for p in _parseparam(';' + value):
566 try:
567 name, val = p.split('=', 1)
568 name = name.strip()
569 val = val.strip()
570 except ValueError:
571 # Must have been a bare attribute
572 name = p.strip()
573 val = ''
574 params.append((name, val))
575 params = utils.decode_params(params)
576 return params
577
578 def get_params(self, failobj=None, header='content-type', unquote=True):
579 """Return the message's Content-Type parameters, as a list.
580
581 The elements of the returned list are 2-tuples of key/value pairs, as
582 split on the `=' sign. The left hand side of the `=' is the key,
583 while the right hand side is the value. If there is no `=' sign in
584 the parameter the value is the empty string. The value is as
585 described in the get_param() method.
586
587 Optional failobj is the object to return if there is no Content-Type
588 header. Optional header is the header to search instead of
589 Content-Type. If unquote is True, the value is unquoted.
590 """
591 missing = object()
592 params = self._get_params_preserve(missing, header)
593 if params is missing:
594 return failobj
595 if unquote:
596 return [(k, _unquotevalue(v)) for k, v in params]
597 else:
598 return params
599
600 def get_param(self, param, failobj=None, header='content-type',
601 unquote=True):
602 """Return the parameter value if found in the Content-Type header.
603
604 Optional failobj is the object to return if there is no Content-Type
605 header, or the Content-Type header has no such parameter. Optional
606 header is the header to search instead of Content-Type.
607
608 Parameter keys are always compared case insensitively. The return
609 value can either be a string, or a 3-tuple if the parameter was RFC
610 2231 encoded. When it's a 3-tuple, the elements of the value are of
611 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
612 LANGUAGE can be None, in which case you should consider VALUE to be
613 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
614
615 Your application should be prepared to deal with 3-tuple return
616 values, and can convert the parameter to a Unicode string like so:
617
618 param = msg.get_param('foo')
619 if isinstance(param, tuple):
620 param = unicode(param[2], param[0] or 'us-ascii')
621
622 In any case, the parameter value (either the returned string, or the
623 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
624 to False.
625 """
626 if header not in self:
627 return failobj
628 for k, v in self._get_params_preserve(failobj, header):
629 if k.lower() == param.lower():
630 if unquote:
631 return _unquotevalue(v)
632 else:
633 return v
634 return failobj
635
636 def set_param(self, param, value, header='Content-Type', requote=True,
637 charset=None, language=''):
638 """Set a parameter in the Content-Type header.
639
640 If the parameter already exists in the header, its value will be
641 replaced with the new value.
642
643 If header is Content-Type and has not yet been defined for this
644 message, it will be set to "text/plain" and the new parameter and
645 value will be appended as per RFC 2045.
646
647 An alternate header can specified in the header argument, and all
648 parameters will be quoted as necessary unless requote is False.
649
650 If charset is specified, the parameter will be encoded according to RFC
651 2231. Optional language specifies the RFC 2231 language, defaulting
652 to the empty string. Both charset and language should be strings.
653 """
654 if not isinstance(value, tuple) and charset:
655 value = (charset, language, value)
656
657 if header not in self and header.lower() == 'content-type':
658 ctype = 'text/plain'
659 else:
660 ctype = self.get(header)
661 if not self.get_param(param, header=header):
662 if not ctype:
663 ctype = _formatparam(param, value, requote)
664 else:
665 ctype = SEMISPACE.join(
666 [ctype, _formatparam(param, value, requote)])
667 else:
668 ctype = ''
669 for old_param, old_value in self.get_params(header=header,
670 unquote=requote):
671 append_param = ''
672 if old_param.lower() == param.lower():
673 append_param = _formatparam(param, value, requote)
674 else:
675 append_param = _formatparam(old_param, old_value, requote)
676 if not ctype:
677 ctype = append_param
678 else:
679 ctype = SEMISPACE.join([ctype, append_param])
680 if ctype != self.get(header):
681 del self[header]
682 self[header] = ctype
683
684 def del_param(self, param, header='content-type', requote=True):
685 """Remove the given parameter completely from the Content-Type header.
686
687 The header will be re-written in place without the parameter or its
688 value. All values will be quoted as necessary unless requote is
689 False. Optional header specifies an alternative to the Content-Type
690 header.
691 """
692 if header not in self:
693 return
694 new_ctype = ''
695 for p, v in self.get_params(header=header, unquote=requote):
696 if p.lower() != param.lower():
697 if not new_ctype:
698 new_ctype = _formatparam(p, v, requote)
699 else:
700 new_ctype = SEMISPACE.join([new_ctype,
701 _formatparam(p, v, requote)])
702 if new_ctype != self.get(header):
703 del self[header]
704 self[header] = new_ctype
705
706 def set_type(self, type, header='Content-Type', requote=True):
707 """Set the main type and subtype for the Content-Type header.
708
709 type must be a string in the form "maintype/subtype", otherwise a
710 ValueError is raised.
711
712 This method replaces the Content-Type header, keeping all the
713 parameters in place. If requote is False, this leaves the existing
714 header's quoting as is. Otherwise, the parameters will be quoted (the
715 default).
716
717 An alternative header can be specified in the header argument. When
718 the Content-Type header is set, we'll always also add a MIME-Version
719 header.
720 """
721 # BAW: should we be strict?
722 if not type.count('/') == 1:
723 raise ValueError
724 # Set the Content-Type, you get a MIME-Version
725 if header.lower() == 'content-type':
726 del self['mime-version']
727 self['MIME-Version'] = '1.0'
728 if header not in self:
729 self[header] = type
730 return
731 params = self.get_params(header=header, unquote=requote)
732 del self[header]
733 self[header] = type
734 # Skip the first param; it's the old type.
735 for p, v in params[1:]:
736 self.set_param(p, v, header, requote)
737
738 def get_filename(self, failobj=None):
739 """Return the filename associated with the payload if present.
740
741 The filename is extracted from the Content-Disposition header's
742 `filename' parameter, and it is unquoted. If that header is missing
743 the `filename' parameter, this method falls back to looking for the
744 `name' parameter.
745 """
746 missing = object()
747 filename = self.get_param('filename', missing, 'content-disposition')
748 if filename is missing:
R. David Murraybf2e0aa2009-10-10 00:13:32 +0000749 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000750 if filename is missing:
751 return failobj
752 return utils.collapse_rfc2231_value(filename).strip()
753
754 def get_boundary(self, failobj=None):
755 """Return the boundary associated with the payload if present.
756
757 The boundary is extracted from the Content-Type header's `boundary'
758 parameter, and it is unquoted.
759 """
760 missing = object()
761 boundary = self.get_param('boundary', missing)
762 if boundary is missing:
763 return failobj
764 # RFC 2046 says that boundaries may begin but not end in w/s
765 return utils.collapse_rfc2231_value(boundary).rstrip()
766
767 def set_boundary(self, boundary):
768 """Set the boundary parameter in Content-Type to 'boundary'.
769
770 This is subtly different than deleting the Content-Type header and
771 adding a new one with a new boundary parameter via add_header(). The
772 main difference is that using the set_boundary() method preserves the
773 order of the Content-Type header in the original message.
774
775 HeaderParseError is raised if the message has no Content-Type header.
776 """
777 missing = object()
778 params = self._get_params_preserve(missing, 'content-type')
779 if params is missing:
780 # There was no Content-Type header, and we don't know what type
781 # to set it to, so raise an exception.
782 raise errors.HeaderParseError('No Content-Type header found')
783 newparams = []
784 foundp = False
785 for pk, pv in params:
786 if pk.lower() == 'boundary':
787 newparams.append(('boundary', '"%s"' % boundary))
788 foundp = True
789 else:
790 newparams.append((pk, pv))
791 if not foundp:
792 # The original Content-Type header had no boundary attribute.
793 # Tack one on the end. BAW: should we raise an exception
794 # instead???
795 newparams.append(('boundary', '"%s"' % boundary))
796 # Replace the existing Content-Type header with the new value
797 newheaders = []
798 for h, v in self._headers:
799 if h.lower() == 'content-type':
800 parts = []
801 for k, v in newparams:
802 if v == '':
803 parts.append(k)
804 else:
805 parts.append('%s=%s' % (k, v))
806 newheaders.append((h, SEMISPACE.join(parts)))
807
808 else:
809 newheaders.append((h, v))
810 self._headers = newheaders
811
812 def get_content_charset(self, failobj=None):
813 """Return the charset parameter of the Content-Type header.
814
815 The returned string is always coerced to lower case. If there is no
816 Content-Type header, or if that header has no charset parameter,
817 failobj is returned.
818 """
819 missing = object()
820 charset = self.get_param('charset', missing)
821 if charset is missing:
822 return failobj
823 if isinstance(charset, tuple):
824 # RFC 2231 encoded, so decode it, and it better end up as ascii.
825 pcharset = charset[0] or 'us-ascii'
826 try:
827 # LookupError will be raised if the charset isn't known to
828 # Python. UnicodeError will be raised if the encoded text
829 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000830 as_bytes = charset[2].encode('raw-unicode-escape')
831 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000832 except (LookupError, UnicodeError):
833 charset = charset[2]
834 # charset characters must be in us-ascii range
835 try:
836 charset.encode('us-ascii')
837 except UnicodeError:
838 return failobj
839 # RFC 2046, $4.1.2 says charsets are not case sensitive
840 return charset.lower()
841
842 def get_charsets(self, failobj=None):
843 """Return a list containing the charset(s) used in this message.
844
845 The returned list of items describes the Content-Type headers'
846 charset parameter for this message and all the subparts in its
847 payload.
848
849 Each item will either be a string (the value of the charset parameter
850 in the Content-Type header of that part) or the value of the
851 'failobj' parameter (defaults to None), if the part does not have a
852 main MIME type of "text", or the charset is not defined.
853
854 The list will contain one string for each part of the message, plus
855 one for the container message (i.e. self), so that a non-multipart
856 message will still return a list of length 1.
857 """
858 return [part.get_content_charset(failobj) for part in self.walk()]
859
860 # I.e. def walk(self): ...
861 from email.iterators import walk