blob: ff262c7c8f5f925f1b4488a13d2c89f6fdc8fe43 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
Guido van Rossum9604e662007-08-30 03:46:43 +000019from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
23# Regular expression used to split header parameters. BAW: this may be too
24# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
25# most headers found in the wild. We may eventually need a full fledged
26# parser eventually.
27paramre = re.compile(r'\s*;\s*')
28# Regular expression that matches `special' characters in parameters, the
29# existance of which force quoting of the parameter value.
30tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
31
32
33
34# Helper functions
35def _formatparam(param, value=None, quote=True):
36 """Convenience function to format and return a key=value pair.
37
38 This will quote the value if needed or if quote is true.
39 """
40 if value is not None and len(value) > 0:
41 # A tuple is used for RFC 2231 encoded parameter values where items
42 # are (charset, language, value). charset is a string, not a Charset
43 # instance.
44 if isinstance(value, tuple):
45 # Encode as per RFC 2231
46 param += '*'
47 value = utils.encode_rfc2231(value[2], value[0], value[1])
48 # BAW: Please check this. I think that if quote is set it should
49 # force quoting even if not necessary.
50 if quote or tspecials.search(value):
51 return '%s="%s"' % (param, utils.quote(value))
52 else:
53 return '%s=%s' % (param, value)
54 else:
55 return param
56
57def _parseparam(s):
58 plist = []
59 while s[:1] == ';':
60 s = s[1:]
61 end = s.find(';')
62 while end > 0 and s.count('"', 0, end) % 2:
63 end = s.find(';', end + 1)
64 if end < 0:
65 end = len(s)
66 f = s[:end]
67 if '=' in f:
68 i = f.index('=')
69 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
70 plist.append(f.strip())
71 s = s[end:]
72 return plist
73
74
75def _unquotevalue(value):
76 # This is different than utils.collapse_rfc2231_value() because it doesn't
77 # try to convert the value to a unicode. Message.get_param() and
78 # Message.get_params() are both currently defined to return the tuple in
79 # the face of RFC 2231 parameters.
80 if isinstance(value, tuple):
81 return value[0], value[1], utils.unquote(value[2])
82 else:
83 return utils.unquote(value)
84
85
86
87class Message:
88 """Basic message object.
89
90 A message object is defined as something that has a bunch of RFC 2822
91 headers and a payload. It may optionally have an envelope header
92 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
93 multipart or a message/rfc822), then the payload is a list of Message
94 objects, otherwise it is a string.
95
96 Message objects implement part of the `mapping' interface, which assumes
97 there is exactly one occurrance of the header per message. Some headers
98 do in fact appear multiple times (e.g. Received) and for those headers,
99 you must use the explicit API to set or get all the headers. Not all of
100 the mapping methods are implemented.
101 """
102 def __init__(self):
103 self._headers = []
104 self._unixfrom = None
105 self._payload = None
106 self._charset = None
107 # Defaults for multipart messages
108 self.preamble = self.epilogue = None
109 self.defects = []
110 # Default content type
111 self._default_type = 'text/plain'
112
113 def __str__(self):
114 """Return the entire formatted message as a string.
115 This includes the headers, body, and envelope header.
116 """
117 return self.as_string()
118
119 def as_string(self, unixfrom=False, maxheaderlen=0):
120 """Return the entire formatted message as a string.
121 Optional `unixfrom' when True, means include the Unix From_ envelope
122 header.
123
124 This is a convenience method and may not generate the message exactly
125 as you intend because by default it mangles lines that begin with
126 "From ". For more flexibility, use the flatten() method of a
127 Generator instance.
128 """
129 from email.generator import Generator
130 fp = StringIO()
131 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
132 g.flatten(self, unixfrom=unixfrom)
133 return fp.getvalue()
134
135 def is_multipart(self):
136 """Return True if the message consists of multiple parts."""
137 return isinstance(self._payload, list)
138
139 #
140 # Unix From_ line
141 #
142 def set_unixfrom(self, unixfrom):
143 self._unixfrom = unixfrom
144
145 def get_unixfrom(self):
146 return self._unixfrom
147
148 #
149 # Payload manipulation.
150 #
151 def attach(self, payload):
152 """Add the given payload to the current payload.
153
154 The current payload will always be a list of objects after this method
155 is called. If you want to set the payload to a scalar object, use
156 set_payload() instead.
157 """
158 if self._payload is None:
159 self._payload = [payload]
160 else:
161 self._payload.append(payload)
162
163 def get_payload(self, i=None, decode=False):
164 """Return a reference to the payload.
165
166 The payload will either be a list object or a string. If you mutate
167 the list object, you modify the message's payload in place. Optional
168 i returns that index into the payload.
169
170 Optional decode is a flag indicating whether the payload should be
171 decoded or not, according to the Content-Transfer-Encoding header
172 (default is False).
173
174 When True and the message is not a multipart, the payload will be
175 decoded if this header's value is `quoted-printable' or `base64'. If
176 some other encoding is used, or the header is missing, or if the
177 payload has bogus data (i.e. bogus base64 or uuencoded data), the
178 payload is returned as-is.
179
180 If the message is a multipart and the decode flag is True, then None
181 is returned.
182 """
183 if i is None:
184 payload = self._payload
185 elif not isinstance(self._payload, list):
186 raise TypeError('Expected list, got %s' % type(self._payload))
187 else:
188 payload = self._payload[i]
189 if not decode:
190 return payload
191 # Decoded payloads always return bytes. XXX split this part out into
192 # a new method called .get_decoded_payload().
193 if self.is_multipart():
194 return None
195 cte = self.get('content-transfer-encoding', '').lower()
196 if cte == 'quoted-printable':
197 return utils._qdecode(payload)
198 elif cte == 'base64':
199 try:
Barry Warsaw8b2af272007-08-31 03:04:26 +0000200 if isinstance(payload, str):
201 payload = payload.encode('raw-unicode-escape')
202 return base64.b64decode(payload)
203 #return utils._bdecode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000204 except binascii.Error:
205 # Incorrect padding
206 pass
207 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000208 in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000209 out_file = BytesIO()
210 try:
211 uu.decode(in_file, out_file, quiet=True)
212 return out_file.getvalue()
213 except uu.Error:
214 # Some decoding problem
215 pass
216 # Is there a better way to do this? We can't use the bytes
217 # constructor.
Barry Warsaw8b2af272007-08-31 03:04:26 +0000218 if isinstance(payload, str):
219 return payload.encode('raw-unicode-escape')
220 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000221
222 def set_payload(self, payload, charset=None):
223 """Set the payload to the given value.
224
225 Optional charset sets the message's default character set. See
226 set_charset() for details.
227 """
228 self._payload = payload
229 if charset is not None:
230 self.set_charset(charset)
231
232 def set_charset(self, charset):
233 """Set the charset of the payload to a given character set.
234
235 charset can be a Charset instance, a string naming a character set, or
236 None. If it is a string it will be converted to a Charset instance.
237 If charset is None, the charset parameter will be removed from the
238 Content-Type field. Anything else will generate a TypeError.
239
240 The message will be assumed to be of type text/* encoded with
241 charset.input_charset. It will be converted to charset.output_charset
242 and encoded properly, if needed, when generating the plain text
243 representation of the message. MIME headers (MIME-Version,
244 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000245 """
246 if charset is None:
247 self.del_param('charset')
248 self._charset = None
249 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000250 if not isinstance(charset, Charset):
251 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 self._charset = charset
253 if 'MIME-Version' not in self:
254 self.add_header('MIME-Version', '1.0')
255 if 'Content-Type' not in self:
256 self.add_header('Content-Type', 'text/plain',
257 charset=charset.get_output_charset())
258 else:
259 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000260 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 self._payload = charset.body_encode(self._payload)
262 if 'Content-Transfer-Encoding' not in self:
263 cte = charset.get_body_encoding()
264 try:
265 cte(self)
266 except TypeError:
267 self._payload = charset.body_encode(self._payload)
268 self.add_header('Content-Transfer-Encoding', cte)
269
270 def get_charset(self):
271 """Return the Charset instance associated with the message's payload.
272 """
273 return self._charset
274
275 #
276 # MAPPING INTERFACE (partial)
277 #
278 def __len__(self):
279 """Return the total number of headers, including duplicates."""
280 return len(self._headers)
281
282 def __getitem__(self, name):
283 """Get a header value.
284
285 Return None if the header is missing instead of raising an exception.
286
287 Note that if the header appeared multiple times, exactly which
288 occurrance gets returned is undefined. Use get_all() to get all
289 the values matching a header field name.
290 """
291 return self.get(name)
292
293 def __setitem__(self, name, val):
294 """Set the value of a header.
295
296 Note: this does not overwrite an existing header with the same field
297 name. Use __delitem__() first to delete any existing headers.
298 """
299 self._headers.append((name, val))
300
301 def __delitem__(self, name):
302 """Delete all occurrences of a header, if present.
303
304 Does not raise an exception if the header is missing.
305 """
306 name = name.lower()
307 newheaders = []
308 for k, v in self._headers:
309 if k.lower() != name:
310 newheaders.append((k, v))
311 self._headers = newheaders
312
313 def __contains__(self, name):
314 return name.lower() in [k.lower() for k, v in self._headers]
315
316 def __iter__(self):
317 for field, value in self._headers:
318 yield field
319
320 def __len__(self):
321 return len(self._headers)
322
323 def keys(self):
324 """Return a list of all the message's header field names.
325
326 These will be sorted in the order they appeared in the original
327 message, or were added to the message, and may contain duplicates.
328 Any fields deleted and re-inserted are always appended to the header
329 list.
330 """
331 return [k for k, v in self._headers]
332
333 def values(self):
334 """Return a list of all the message's header values.
335
336 These will be sorted in the order they appeared in the original
337 message, or were added to the message, and may contain duplicates.
338 Any fields deleted and re-inserted are always appended to the header
339 list.
340 """
341 return [v for k, v in self._headers]
342
343 def items(self):
344 """Get all the message's header fields and values.
345
346 These will be sorted in the order they appeared in the original
347 message, or were added to the message, and may contain duplicates.
348 Any fields deleted and re-inserted are always appended to the header
349 list.
350 """
351 return self._headers[:]
352
353 def get(self, name, failobj=None):
354 """Get a header value.
355
356 Like __getitem__() but return failobj instead of None when the field
357 is missing.
358 """
359 name = name.lower()
360 for k, v in self._headers:
361 if k.lower() == name:
362 return v
363 return failobj
364
365 #
366 # Additional useful stuff
367 #
368
369 def get_all(self, name, failobj=None):
370 """Return a list of all the values for the named field.
371
372 These will be sorted in the order they appeared in the original
373 message, and may contain duplicates. Any fields deleted and
374 re-inserted are always appended to the header list.
375
376 If no such fields exist, failobj is returned (defaults to None).
377 """
378 values = []
379 name = name.lower()
380 for k, v in self._headers:
381 if k.lower() == name:
382 values.append(v)
383 if not values:
384 return failobj
385 return values
386
387 def add_header(self, _name, _value, **_params):
388 """Extended header setting.
389
390 name is the header field to add. keyword arguments can be used to set
391 additional parameters for the header field, with underscores converted
392 to dashes. Normally the parameter will be added as key="value" unless
393 value is None, in which case only the key will be added.
394
395 Example:
396
397 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
398 """
399 parts = []
400 for k, v in _params.items():
401 if v is None:
402 parts.append(k.replace('_', '-'))
403 else:
404 parts.append(_formatparam(k.replace('_', '-'), v))
405 if _value is not None:
406 parts.insert(0, _value)
407 self._headers.append((_name, SEMISPACE.join(parts)))
408
409 def replace_header(self, _name, _value):
410 """Replace a header.
411
412 Replace the first matching header found in the message, retaining
413 header order and case. If no matching header was found, a KeyError is
414 raised.
415 """
416 _name = _name.lower()
417 for i, (k, v) in zip(range(len(self._headers)), self._headers):
418 if k.lower() == _name:
419 self._headers[i] = (k, _value)
420 break
421 else:
422 raise KeyError(_name)
423
424 #
425 # Use these three methods instead of the three above.
426 #
427
428 def get_content_type(self):
429 """Return the message's content type.
430
431 The returned string is coerced to lower case of the form
432 `maintype/subtype'. If there was no Content-Type header in the
433 message, the default type as given by get_default_type() will be
434 returned. Since according to RFC 2045, messages always have a default
435 type this will always return a value.
436
437 RFC 2045 defines a message's default type to be text/plain unless it
438 appears inside a multipart/digest container, in which case it would be
439 message/rfc822.
440 """
441 missing = object()
442 value = self.get('content-type', missing)
443 if value is missing:
444 # This should have no parameters
445 return self.get_default_type()
446 ctype = paramre.split(value)[0].lower().strip()
447 # RFC 2045, section 5.2 says if its invalid, use text/plain
448 if ctype.count('/') != 1:
449 return 'text/plain'
450 return ctype
451
452 def get_content_maintype(self):
453 """Return the message's main content type.
454
455 This is the `maintype' part of the string returned by
456 get_content_type().
457 """
458 ctype = self.get_content_type()
459 return ctype.split('/')[0]
460
461 def get_content_subtype(self):
462 """Returns the message's sub-content type.
463
464 This is the `subtype' part of the string returned by
465 get_content_type().
466 """
467 ctype = self.get_content_type()
468 return ctype.split('/')[1]
469
470 def get_default_type(self):
471 """Return the `default' content type.
472
473 Most messages have a default content type of text/plain, except for
474 messages that are subparts of multipart/digest containers. Such
475 subparts have a default content type of message/rfc822.
476 """
477 return self._default_type
478
479 def set_default_type(self, ctype):
480 """Set the `default' content type.
481
482 ctype should be either "text/plain" or "message/rfc822", although this
483 is not enforced. The default content type is not stored in the
484 Content-Type header.
485 """
486 self._default_type = ctype
487
488 def _get_params_preserve(self, failobj, header):
489 # Like get_params() but preserves the quoting of values. BAW:
490 # should this be part of the public interface?
491 missing = object()
492 value = self.get(header, missing)
493 if value is missing:
494 return failobj
495 params = []
496 for p in _parseparam(';' + value):
497 try:
498 name, val = p.split('=', 1)
499 name = name.strip()
500 val = val.strip()
501 except ValueError:
502 # Must have been a bare attribute
503 name = p.strip()
504 val = ''
505 params.append((name, val))
506 params = utils.decode_params(params)
507 return params
508
509 def get_params(self, failobj=None, header='content-type', unquote=True):
510 """Return the message's Content-Type parameters, as a list.
511
512 The elements of the returned list are 2-tuples of key/value pairs, as
513 split on the `=' sign. The left hand side of the `=' is the key,
514 while the right hand side is the value. If there is no `=' sign in
515 the parameter the value is the empty string. The value is as
516 described in the get_param() method.
517
518 Optional failobj is the object to return if there is no Content-Type
519 header. Optional header is the header to search instead of
520 Content-Type. If unquote is True, the value is unquoted.
521 """
522 missing = object()
523 params = self._get_params_preserve(missing, header)
524 if params is missing:
525 return failobj
526 if unquote:
527 return [(k, _unquotevalue(v)) for k, v in params]
528 else:
529 return params
530
531 def get_param(self, param, failobj=None, header='content-type',
532 unquote=True):
533 """Return the parameter value if found in the Content-Type header.
534
535 Optional failobj is the object to return if there is no Content-Type
536 header, or the Content-Type header has no such parameter. Optional
537 header is the header to search instead of Content-Type.
538
539 Parameter keys are always compared case insensitively. The return
540 value can either be a string, or a 3-tuple if the parameter was RFC
541 2231 encoded. When it's a 3-tuple, the elements of the value are of
542 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
543 LANGUAGE can be None, in which case you should consider VALUE to be
544 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
545
546 Your application should be prepared to deal with 3-tuple return
547 values, and can convert the parameter to a Unicode string like so:
548
549 param = msg.get_param('foo')
550 if isinstance(param, tuple):
551 param = unicode(param[2], param[0] or 'us-ascii')
552
553 In any case, the parameter value (either the returned string, or the
554 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
555 to False.
556 """
557 if header not in self:
558 return failobj
559 for k, v in self._get_params_preserve(failobj, header):
560 if k.lower() == param.lower():
561 if unquote:
562 return _unquotevalue(v)
563 else:
564 return v
565 return failobj
566
567 def set_param(self, param, value, header='Content-Type', requote=True,
568 charset=None, language=''):
569 """Set a parameter in the Content-Type header.
570
571 If the parameter already exists in the header, its value will be
572 replaced with the new value.
573
574 If header is Content-Type and has not yet been defined for this
575 message, it will be set to "text/plain" and the new parameter and
576 value will be appended as per RFC 2045.
577
578 An alternate header can specified in the header argument, and all
579 parameters will be quoted as necessary unless requote is False.
580
581 If charset is specified, the parameter will be encoded according to RFC
582 2231. Optional language specifies the RFC 2231 language, defaulting
583 to the empty string. Both charset and language should be strings.
584 """
585 if not isinstance(value, tuple) and charset:
586 value = (charset, language, value)
587
588 if header not in self and header.lower() == 'content-type':
589 ctype = 'text/plain'
590 else:
591 ctype = self.get(header)
592 if not self.get_param(param, header=header):
593 if not ctype:
594 ctype = _formatparam(param, value, requote)
595 else:
596 ctype = SEMISPACE.join(
597 [ctype, _formatparam(param, value, requote)])
598 else:
599 ctype = ''
600 for old_param, old_value in self.get_params(header=header,
601 unquote=requote):
602 append_param = ''
603 if old_param.lower() == param.lower():
604 append_param = _formatparam(param, value, requote)
605 else:
606 append_param = _formatparam(old_param, old_value, requote)
607 if not ctype:
608 ctype = append_param
609 else:
610 ctype = SEMISPACE.join([ctype, append_param])
611 if ctype != self.get(header):
612 del self[header]
613 self[header] = ctype
614
615 def del_param(self, param, header='content-type', requote=True):
616 """Remove the given parameter completely from the Content-Type header.
617
618 The header will be re-written in place without the parameter or its
619 value. All values will be quoted as necessary unless requote is
620 False. Optional header specifies an alternative to the Content-Type
621 header.
622 """
623 if header not in self:
624 return
625 new_ctype = ''
626 for p, v in self.get_params(header=header, unquote=requote):
627 if p.lower() != param.lower():
628 if not new_ctype:
629 new_ctype = _formatparam(p, v, requote)
630 else:
631 new_ctype = SEMISPACE.join([new_ctype,
632 _formatparam(p, v, requote)])
633 if new_ctype != self.get(header):
634 del self[header]
635 self[header] = new_ctype
636
637 def set_type(self, type, header='Content-Type', requote=True):
638 """Set the main type and subtype for the Content-Type header.
639
640 type must be a string in the form "maintype/subtype", otherwise a
641 ValueError is raised.
642
643 This method replaces the Content-Type header, keeping all the
644 parameters in place. If requote is False, this leaves the existing
645 header's quoting as is. Otherwise, the parameters will be quoted (the
646 default).
647
648 An alternative header can be specified in the header argument. When
649 the Content-Type header is set, we'll always also add a MIME-Version
650 header.
651 """
652 # BAW: should we be strict?
653 if not type.count('/') == 1:
654 raise ValueError
655 # Set the Content-Type, you get a MIME-Version
656 if header.lower() == 'content-type':
657 del self['mime-version']
658 self['MIME-Version'] = '1.0'
659 if header not in self:
660 self[header] = type
661 return
662 params = self.get_params(header=header, unquote=requote)
663 del self[header]
664 self[header] = type
665 # Skip the first param; it's the old type.
666 for p, v in params[1:]:
667 self.set_param(p, v, header, requote)
668
669 def get_filename(self, failobj=None):
670 """Return the filename associated with the payload if present.
671
672 The filename is extracted from the Content-Disposition header's
673 `filename' parameter, and it is unquoted. If that header is missing
674 the `filename' parameter, this method falls back to looking for the
675 `name' parameter.
676 """
677 missing = object()
678 filename = self.get_param('filename', missing, 'content-disposition')
679 if filename is missing:
680 filename = self.get_param('name', missing, 'content-disposition')
681 if filename is missing:
682 return failobj
683 return utils.collapse_rfc2231_value(filename).strip()
684
685 def get_boundary(self, failobj=None):
686 """Return the boundary associated with the payload if present.
687
688 The boundary is extracted from the Content-Type header's `boundary'
689 parameter, and it is unquoted.
690 """
691 missing = object()
692 boundary = self.get_param('boundary', missing)
693 if boundary is missing:
694 return failobj
695 # RFC 2046 says that boundaries may begin but not end in w/s
696 return utils.collapse_rfc2231_value(boundary).rstrip()
697
698 def set_boundary(self, boundary):
699 """Set the boundary parameter in Content-Type to 'boundary'.
700
701 This is subtly different than deleting the Content-Type header and
702 adding a new one with a new boundary parameter via add_header(). The
703 main difference is that using the set_boundary() method preserves the
704 order of the Content-Type header in the original message.
705
706 HeaderParseError is raised if the message has no Content-Type header.
707 """
708 missing = object()
709 params = self._get_params_preserve(missing, 'content-type')
710 if params is missing:
711 # There was no Content-Type header, and we don't know what type
712 # to set it to, so raise an exception.
713 raise errors.HeaderParseError('No Content-Type header found')
714 newparams = []
715 foundp = False
716 for pk, pv in params:
717 if pk.lower() == 'boundary':
718 newparams.append(('boundary', '"%s"' % boundary))
719 foundp = True
720 else:
721 newparams.append((pk, pv))
722 if not foundp:
723 # The original Content-Type header had no boundary attribute.
724 # Tack one on the end. BAW: should we raise an exception
725 # instead???
726 newparams.append(('boundary', '"%s"' % boundary))
727 # Replace the existing Content-Type header with the new value
728 newheaders = []
729 for h, v in self._headers:
730 if h.lower() == 'content-type':
731 parts = []
732 for k, v in newparams:
733 if v == '':
734 parts.append(k)
735 else:
736 parts.append('%s=%s' % (k, v))
737 newheaders.append((h, SEMISPACE.join(parts)))
738
739 else:
740 newheaders.append((h, v))
741 self._headers = newheaders
742
743 def get_content_charset(self, failobj=None):
744 """Return the charset parameter of the Content-Type header.
745
746 The returned string is always coerced to lower case. If there is no
747 Content-Type header, or if that header has no charset parameter,
748 failobj is returned.
749 """
750 missing = object()
751 charset = self.get_param('charset', missing)
752 if charset is missing:
753 return failobj
754 if isinstance(charset, tuple):
755 # RFC 2231 encoded, so decode it, and it better end up as ascii.
756 pcharset = charset[0] or 'us-ascii'
757 try:
758 # LookupError will be raised if the charset isn't known to
759 # Python. UnicodeError will be raised if the encoded text
760 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000761 as_bytes = charset[2].encode('raw-unicode-escape')
762 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000763 except (LookupError, UnicodeError):
764 charset = charset[2]
765 # charset characters must be in us-ascii range
766 try:
767 charset.encode('us-ascii')
768 except UnicodeError:
769 return failobj
770 # RFC 2046, $4.1.2 says charsets are not case sensitive
771 return charset.lower()
772
773 def get_charsets(self, failobj=None):
774 """Return a list containing the charset(s) used in this message.
775
776 The returned list of items describes the Content-Type headers'
777 charset parameter for this message and all the subparts in its
778 payload.
779
780 Each item will either be a string (the value of the charset parameter
781 in the Content-Type header of that part) or the value of the
782 'failobj' parameter (defaults to None), if the part does not have a
783 main MIME type of "text", or the charset is not defined.
784
785 The list will contain one string for each part of the message, plus
786 one for the container message (i.e. self), so that a non-multipart
787 message will still return a list of length 1.
788 """
789 return [part.get_content_charset(failobj) for part in self.walk()]
790
791 # I.e. def walk(self): ...
792 from email.iterators import walk