blob: 412a550d27509756de6bd3f56989c5ebf66fa8a2 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
Guido van Rossum9604e662007-08-30 03:46:43 +000019from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
27
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028# Helper functions
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000029def _splitparam(param):
30 # Split header parameters. BAW: this may be too simple. It isn't
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32 # found in the wild. We may eventually need a full fledged parser
33 # eventually.
34 a, sep, b = param.partition(';')
35 if not sep:
36 return a.strip(), None
37 return a.strip(), b.strip()
38
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039def _formatparam(param, value=None, quote=True):
40 """Convenience function to format and return a key=value pair.
41
42 This will quote the value if needed or if quote is true.
43 """
44 if value is not None and len(value) > 0:
45 # A tuple is used for RFC 2231 encoded parameter values where items
46 # are (charset, language, value). charset is a string, not a Charset
47 # instance.
48 if isinstance(value, tuple):
49 # Encode as per RFC 2231
50 param += '*'
51 value = utils.encode_rfc2231(value[2], value[0], value[1])
52 # BAW: Please check this. I think that if quote is set it should
53 # force quoting even if not necessary.
54 if quote or tspecials.search(value):
55 return '%s="%s"' % (param, utils.quote(value))
56 else:
57 return '%s=%s' % (param, value)
58 else:
59 return param
60
61def _parseparam(s):
62 plist = []
63 while s[:1] == ';':
64 s = s[1:]
65 end = s.find(';')
66 while end > 0 and s.count('"', 0, end) % 2:
67 end = s.find(';', end + 1)
68 if end < 0:
69 end = len(s)
70 f = s[:end]
71 if '=' in f:
72 i = f.index('=')
73 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
74 plist.append(f.strip())
75 s = s[end:]
76 return plist
77
78
79def _unquotevalue(value):
80 # This is different than utils.collapse_rfc2231_value() because it doesn't
81 # try to convert the value to a unicode. Message.get_param() and
82 # Message.get_params() are both currently defined to return the tuple in
83 # the face of RFC 2231 parameters.
84 if isinstance(value, tuple):
85 return value[0], value[1], utils.unquote(value[2])
86 else:
87 return utils.unquote(value)
88
89
90
91class Message:
92 """Basic message object.
93
94 A message object is defined as something that has a bunch of RFC 2822
95 headers and a payload. It may optionally have an envelope header
96 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
97 multipart or a message/rfc822), then the payload is a list of Message
98 objects, otherwise it is a string.
99
100 Message objects implement part of the `mapping' interface, which assumes
101 there is exactly one occurrance of the header per message. Some headers
102 do in fact appear multiple times (e.g. Received) and for those headers,
103 you must use the explicit API to set or get all the headers. Not all of
104 the mapping methods are implemented.
105 """
106 def __init__(self):
107 self._headers = []
108 self._unixfrom = None
109 self._payload = None
110 self._charset = None
111 # Defaults for multipart messages
112 self.preamble = self.epilogue = None
113 self.defects = []
114 # Default content type
115 self._default_type = 'text/plain'
116
117 def __str__(self):
118 """Return the entire formatted message as a string.
119 This includes the headers, body, and envelope header.
120 """
121 return self.as_string()
122
123 def as_string(self, unixfrom=False, maxheaderlen=0):
124 """Return the entire formatted message as a string.
125 Optional `unixfrom' when True, means include the Unix From_ envelope
126 header.
127
128 This is a convenience method and may not generate the message exactly
129 as you intend because by default it mangles lines that begin with
130 "From ". For more flexibility, use the flatten() method of a
131 Generator instance.
132 """
133 from email.generator import Generator
134 fp = StringIO()
135 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
136 g.flatten(self, unixfrom=unixfrom)
137 return fp.getvalue()
138
139 def is_multipart(self):
140 """Return True if the message consists of multiple parts."""
141 return isinstance(self._payload, list)
142
143 #
144 # Unix From_ line
145 #
146 def set_unixfrom(self, unixfrom):
147 self._unixfrom = unixfrom
148
149 def get_unixfrom(self):
150 return self._unixfrom
151
152 #
153 # Payload manipulation.
154 #
155 def attach(self, payload):
156 """Add the given payload to the current payload.
157
158 The current payload will always be a list of objects after this method
159 is called. If you want to set the payload to a scalar object, use
160 set_payload() instead.
161 """
162 if self._payload is None:
163 self._payload = [payload]
164 else:
165 self._payload.append(payload)
166
167 def get_payload(self, i=None, decode=False):
168 """Return a reference to the payload.
169
170 The payload will either be a list object or a string. If you mutate
171 the list object, you modify the message's payload in place. Optional
172 i returns that index into the payload.
173
174 Optional decode is a flag indicating whether the payload should be
175 decoded or not, according to the Content-Transfer-Encoding header
176 (default is False).
177
178 When True and the message is not a multipart, the payload will be
179 decoded if this header's value is `quoted-printable' or `base64'. If
180 some other encoding is used, or the header is missing, or if the
181 payload has bogus data (i.e. bogus base64 or uuencoded data), the
182 payload is returned as-is.
183
184 If the message is a multipart and the decode flag is True, then None
185 is returned.
186 """
187 if i is None:
188 payload = self._payload
189 elif not isinstance(self._payload, list):
190 raise TypeError('Expected list, got %s' % type(self._payload))
191 else:
192 payload = self._payload[i]
193 if not decode:
194 return payload
195 # Decoded payloads always return bytes. XXX split this part out into
196 # a new method called .get_decoded_payload().
197 if self.is_multipart():
198 return None
199 cte = self.get('content-transfer-encoding', '').lower()
200 if cte == 'quoted-printable':
201 return utils._qdecode(payload)
202 elif cte == 'base64':
203 try:
Barry Warsaw8b2af272007-08-31 03:04:26 +0000204 if isinstance(payload, str):
205 payload = payload.encode('raw-unicode-escape')
206 return base64.b64decode(payload)
207 #return utils._bdecode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 except binascii.Error:
209 # Incorrect padding
210 pass
211 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000212 in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000213 out_file = BytesIO()
214 try:
215 uu.decode(in_file, out_file, quiet=True)
216 return out_file.getvalue()
217 except uu.Error:
218 # Some decoding problem
219 pass
220 # Is there a better way to do this? We can't use the bytes
221 # constructor.
Barry Warsaw8b2af272007-08-31 03:04:26 +0000222 if isinstance(payload, str):
223 return payload.encode('raw-unicode-escape')
224 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000225
226 def set_payload(self, payload, charset=None):
227 """Set the payload to the given value.
228
229 Optional charset sets the message's default character set. See
230 set_charset() for details.
231 """
232 self._payload = payload
233 if charset is not None:
234 self.set_charset(charset)
235
236 def set_charset(self, charset):
237 """Set the charset of the payload to a given character set.
238
239 charset can be a Charset instance, a string naming a character set, or
240 None. If it is a string it will be converted to a Charset instance.
241 If charset is None, the charset parameter will be removed from the
242 Content-Type field. Anything else will generate a TypeError.
243
244 The message will be assumed to be of type text/* encoded with
245 charset.input_charset. It will be converted to charset.output_charset
246 and encoded properly, if needed, when generating the plain text
247 representation of the message. MIME headers (MIME-Version,
248 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249 """
250 if charset is None:
251 self.del_param('charset')
252 self._charset = None
253 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000254 if not isinstance(charset, Charset):
255 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256 self._charset = charset
257 if 'MIME-Version' not in self:
258 self.add_header('MIME-Version', '1.0')
259 if 'Content-Type' not in self:
260 self.add_header('Content-Type', 'text/plain',
261 charset=charset.get_output_charset())
262 else:
263 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000264 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265 self._payload = charset.body_encode(self._payload)
266 if 'Content-Transfer-Encoding' not in self:
267 cte = charset.get_body_encoding()
268 try:
269 cte(self)
270 except TypeError:
271 self._payload = charset.body_encode(self._payload)
272 self.add_header('Content-Transfer-Encoding', cte)
273
274 def get_charset(self):
275 """Return the Charset instance associated with the message's payload.
276 """
277 return self._charset
278
279 #
280 # MAPPING INTERFACE (partial)
281 #
282 def __len__(self):
283 """Return the total number of headers, including duplicates."""
284 return len(self._headers)
285
286 def __getitem__(self, name):
287 """Get a header value.
288
289 Return None if the header is missing instead of raising an exception.
290
291 Note that if the header appeared multiple times, exactly which
292 occurrance gets returned is undefined. Use get_all() to get all
293 the values matching a header field name.
294 """
295 return self.get(name)
296
297 def __setitem__(self, name, val):
298 """Set the value of a header.
299
300 Note: this does not overwrite an existing header with the same field
301 name. Use __delitem__() first to delete any existing headers.
302 """
303 self._headers.append((name, val))
304
305 def __delitem__(self, name):
306 """Delete all occurrences of a header, if present.
307
308 Does not raise an exception if the header is missing.
309 """
310 name = name.lower()
311 newheaders = []
312 for k, v in self._headers:
313 if k.lower() != name:
314 newheaders.append((k, v))
315 self._headers = newheaders
316
317 def __contains__(self, name):
318 return name.lower() in [k.lower() for k, v in self._headers]
319
320 def __iter__(self):
321 for field, value in self._headers:
322 yield field
323
324 def __len__(self):
325 return len(self._headers)
326
327 def keys(self):
328 """Return a list of all the message's header field names.
329
330 These will be sorted in the order they appeared in the original
331 message, or were added to the message, and may contain duplicates.
332 Any fields deleted and re-inserted are always appended to the header
333 list.
334 """
335 return [k for k, v in self._headers]
336
337 def values(self):
338 """Return a list of all the message's header values.
339
340 These will be sorted in the order they appeared in the original
341 message, or were added to the message, and may contain duplicates.
342 Any fields deleted and re-inserted are always appended to the header
343 list.
344 """
345 return [v for k, v in self._headers]
346
347 def items(self):
348 """Get all the message's header fields and values.
349
350 These will be sorted in the order they appeared in the original
351 message, or were added to the message, and may contain duplicates.
352 Any fields deleted and re-inserted are always appended to the header
353 list.
354 """
355 return self._headers[:]
356
357 def get(self, name, failobj=None):
358 """Get a header value.
359
360 Like __getitem__() but return failobj instead of None when the field
361 is missing.
362 """
363 name = name.lower()
364 for k, v in self._headers:
365 if k.lower() == name:
366 return v
367 return failobj
368
369 #
370 # Additional useful stuff
371 #
372
373 def get_all(self, name, failobj=None):
374 """Return a list of all the values for the named field.
375
376 These will be sorted in the order they appeared in the original
377 message, and may contain duplicates. Any fields deleted and
378 re-inserted are always appended to the header list.
379
380 If no such fields exist, failobj is returned (defaults to None).
381 """
382 values = []
383 name = name.lower()
384 for k, v in self._headers:
385 if k.lower() == name:
386 values.append(v)
387 if not values:
388 return failobj
389 return values
390
391 def add_header(self, _name, _value, **_params):
392 """Extended header setting.
393
394 name is the header field to add. keyword arguments can be used to set
395 additional parameters for the header field, with underscores converted
396 to dashes. Normally the parameter will be added as key="value" unless
397 value is None, in which case only the key will be added.
398
399 Example:
400
401 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
402 """
403 parts = []
404 for k, v in _params.items():
405 if v is None:
406 parts.append(k.replace('_', '-'))
407 else:
408 parts.append(_formatparam(k.replace('_', '-'), v))
409 if _value is not None:
410 parts.insert(0, _value)
411 self._headers.append((_name, SEMISPACE.join(parts)))
412
413 def replace_header(self, _name, _value):
414 """Replace a header.
415
416 Replace the first matching header found in the message, retaining
417 header order and case. If no matching header was found, a KeyError is
418 raised.
419 """
420 _name = _name.lower()
421 for i, (k, v) in zip(range(len(self._headers)), self._headers):
422 if k.lower() == _name:
423 self._headers[i] = (k, _value)
424 break
425 else:
426 raise KeyError(_name)
427
428 #
429 # Use these three methods instead of the three above.
430 #
431
432 def get_content_type(self):
433 """Return the message's content type.
434
435 The returned string is coerced to lower case of the form
436 `maintype/subtype'. If there was no Content-Type header in the
437 message, the default type as given by get_default_type() will be
438 returned. Since according to RFC 2045, messages always have a default
439 type this will always return a value.
440
441 RFC 2045 defines a message's default type to be text/plain unless it
442 appears inside a multipart/digest container, in which case it would be
443 message/rfc822.
444 """
445 missing = object()
446 value = self.get('content-type', missing)
447 if value is missing:
448 # This should have no parameters
449 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000450 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000451 # RFC 2045, section 5.2 says if its invalid, use text/plain
452 if ctype.count('/') != 1:
453 return 'text/plain'
454 return ctype
455
456 def get_content_maintype(self):
457 """Return the message's main content type.
458
459 This is the `maintype' part of the string returned by
460 get_content_type().
461 """
462 ctype = self.get_content_type()
463 return ctype.split('/')[0]
464
465 def get_content_subtype(self):
466 """Returns the message's sub-content type.
467
468 This is the `subtype' part of the string returned by
469 get_content_type().
470 """
471 ctype = self.get_content_type()
472 return ctype.split('/')[1]
473
474 def get_default_type(self):
475 """Return the `default' content type.
476
477 Most messages have a default content type of text/plain, except for
478 messages that are subparts of multipart/digest containers. Such
479 subparts have a default content type of message/rfc822.
480 """
481 return self._default_type
482
483 def set_default_type(self, ctype):
484 """Set the `default' content type.
485
486 ctype should be either "text/plain" or "message/rfc822", although this
487 is not enforced. The default content type is not stored in the
488 Content-Type header.
489 """
490 self._default_type = ctype
491
492 def _get_params_preserve(self, failobj, header):
493 # Like get_params() but preserves the quoting of values. BAW:
494 # should this be part of the public interface?
495 missing = object()
496 value = self.get(header, missing)
497 if value is missing:
498 return failobj
499 params = []
500 for p in _parseparam(';' + value):
501 try:
502 name, val = p.split('=', 1)
503 name = name.strip()
504 val = val.strip()
505 except ValueError:
506 # Must have been a bare attribute
507 name = p.strip()
508 val = ''
509 params.append((name, val))
510 params = utils.decode_params(params)
511 return params
512
513 def get_params(self, failobj=None, header='content-type', unquote=True):
514 """Return the message's Content-Type parameters, as a list.
515
516 The elements of the returned list are 2-tuples of key/value pairs, as
517 split on the `=' sign. The left hand side of the `=' is the key,
518 while the right hand side is the value. If there is no `=' sign in
519 the parameter the value is the empty string. The value is as
520 described in the get_param() method.
521
522 Optional failobj is the object to return if there is no Content-Type
523 header. Optional header is the header to search instead of
524 Content-Type. If unquote is True, the value is unquoted.
525 """
526 missing = object()
527 params = self._get_params_preserve(missing, header)
528 if params is missing:
529 return failobj
530 if unquote:
531 return [(k, _unquotevalue(v)) for k, v in params]
532 else:
533 return params
534
535 def get_param(self, param, failobj=None, header='content-type',
536 unquote=True):
537 """Return the parameter value if found in the Content-Type header.
538
539 Optional failobj is the object to return if there is no Content-Type
540 header, or the Content-Type header has no such parameter. Optional
541 header is the header to search instead of Content-Type.
542
543 Parameter keys are always compared case insensitively. The return
544 value can either be a string, or a 3-tuple if the parameter was RFC
545 2231 encoded. When it's a 3-tuple, the elements of the value are of
546 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
547 LANGUAGE can be None, in which case you should consider VALUE to be
548 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
549
550 Your application should be prepared to deal with 3-tuple return
551 values, and can convert the parameter to a Unicode string like so:
552
553 param = msg.get_param('foo')
554 if isinstance(param, tuple):
555 param = unicode(param[2], param[0] or 'us-ascii')
556
557 In any case, the parameter value (either the returned string, or the
558 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
559 to False.
560 """
561 if header not in self:
562 return failobj
563 for k, v in self._get_params_preserve(failobj, header):
564 if k.lower() == param.lower():
565 if unquote:
566 return _unquotevalue(v)
567 else:
568 return v
569 return failobj
570
571 def set_param(self, param, value, header='Content-Type', requote=True,
572 charset=None, language=''):
573 """Set a parameter in the Content-Type header.
574
575 If the parameter already exists in the header, its value will be
576 replaced with the new value.
577
578 If header is Content-Type and has not yet been defined for this
579 message, it will be set to "text/plain" and the new parameter and
580 value will be appended as per RFC 2045.
581
582 An alternate header can specified in the header argument, and all
583 parameters will be quoted as necessary unless requote is False.
584
585 If charset is specified, the parameter will be encoded according to RFC
586 2231. Optional language specifies the RFC 2231 language, defaulting
587 to the empty string. Both charset and language should be strings.
588 """
589 if not isinstance(value, tuple) and charset:
590 value = (charset, language, value)
591
592 if header not in self and header.lower() == 'content-type':
593 ctype = 'text/plain'
594 else:
595 ctype = self.get(header)
596 if not self.get_param(param, header=header):
597 if not ctype:
598 ctype = _formatparam(param, value, requote)
599 else:
600 ctype = SEMISPACE.join(
601 [ctype, _formatparam(param, value, requote)])
602 else:
603 ctype = ''
604 for old_param, old_value in self.get_params(header=header,
605 unquote=requote):
606 append_param = ''
607 if old_param.lower() == param.lower():
608 append_param = _formatparam(param, value, requote)
609 else:
610 append_param = _formatparam(old_param, old_value, requote)
611 if not ctype:
612 ctype = append_param
613 else:
614 ctype = SEMISPACE.join([ctype, append_param])
615 if ctype != self.get(header):
616 del self[header]
617 self[header] = ctype
618
619 def del_param(self, param, header='content-type', requote=True):
620 """Remove the given parameter completely from the Content-Type header.
621
622 The header will be re-written in place without the parameter or its
623 value. All values will be quoted as necessary unless requote is
624 False. Optional header specifies an alternative to the Content-Type
625 header.
626 """
627 if header not in self:
628 return
629 new_ctype = ''
630 for p, v in self.get_params(header=header, unquote=requote):
631 if p.lower() != param.lower():
632 if not new_ctype:
633 new_ctype = _formatparam(p, v, requote)
634 else:
635 new_ctype = SEMISPACE.join([new_ctype,
636 _formatparam(p, v, requote)])
637 if new_ctype != self.get(header):
638 del self[header]
639 self[header] = new_ctype
640
641 def set_type(self, type, header='Content-Type', requote=True):
642 """Set the main type and subtype for the Content-Type header.
643
644 type must be a string in the form "maintype/subtype", otherwise a
645 ValueError is raised.
646
647 This method replaces the Content-Type header, keeping all the
648 parameters in place. If requote is False, this leaves the existing
649 header's quoting as is. Otherwise, the parameters will be quoted (the
650 default).
651
652 An alternative header can be specified in the header argument. When
653 the Content-Type header is set, we'll always also add a MIME-Version
654 header.
655 """
656 # BAW: should we be strict?
657 if not type.count('/') == 1:
658 raise ValueError
659 # Set the Content-Type, you get a MIME-Version
660 if header.lower() == 'content-type':
661 del self['mime-version']
662 self['MIME-Version'] = '1.0'
663 if header not in self:
664 self[header] = type
665 return
666 params = self.get_params(header=header, unquote=requote)
667 del self[header]
668 self[header] = type
669 # Skip the first param; it's the old type.
670 for p, v in params[1:]:
671 self.set_param(p, v, header, requote)
672
673 def get_filename(self, failobj=None):
674 """Return the filename associated with the payload if present.
675
676 The filename is extracted from the Content-Disposition header's
677 `filename' parameter, and it is unquoted. If that header is missing
678 the `filename' parameter, this method falls back to looking for the
679 `name' parameter.
680 """
681 missing = object()
682 filename = self.get_param('filename', missing, 'content-disposition')
683 if filename is missing:
684 filename = self.get_param('name', missing, 'content-disposition')
685 if filename is missing:
686 return failobj
687 return utils.collapse_rfc2231_value(filename).strip()
688
689 def get_boundary(self, failobj=None):
690 """Return the boundary associated with the payload if present.
691
692 The boundary is extracted from the Content-Type header's `boundary'
693 parameter, and it is unquoted.
694 """
695 missing = object()
696 boundary = self.get_param('boundary', missing)
697 if boundary is missing:
698 return failobj
699 # RFC 2046 says that boundaries may begin but not end in w/s
700 return utils.collapse_rfc2231_value(boundary).rstrip()
701
702 def set_boundary(self, boundary):
703 """Set the boundary parameter in Content-Type to 'boundary'.
704
705 This is subtly different than deleting the Content-Type header and
706 adding a new one with a new boundary parameter via add_header(). The
707 main difference is that using the set_boundary() method preserves the
708 order of the Content-Type header in the original message.
709
710 HeaderParseError is raised if the message has no Content-Type header.
711 """
712 missing = object()
713 params = self._get_params_preserve(missing, 'content-type')
714 if params is missing:
715 # There was no Content-Type header, and we don't know what type
716 # to set it to, so raise an exception.
717 raise errors.HeaderParseError('No Content-Type header found')
718 newparams = []
719 foundp = False
720 for pk, pv in params:
721 if pk.lower() == 'boundary':
722 newparams.append(('boundary', '"%s"' % boundary))
723 foundp = True
724 else:
725 newparams.append((pk, pv))
726 if not foundp:
727 # The original Content-Type header had no boundary attribute.
728 # Tack one on the end. BAW: should we raise an exception
729 # instead???
730 newparams.append(('boundary', '"%s"' % boundary))
731 # Replace the existing Content-Type header with the new value
732 newheaders = []
733 for h, v in self._headers:
734 if h.lower() == 'content-type':
735 parts = []
736 for k, v in newparams:
737 if v == '':
738 parts.append(k)
739 else:
740 parts.append('%s=%s' % (k, v))
741 newheaders.append((h, SEMISPACE.join(parts)))
742
743 else:
744 newheaders.append((h, v))
745 self._headers = newheaders
746
747 def get_content_charset(self, failobj=None):
748 """Return the charset parameter of the Content-Type header.
749
750 The returned string is always coerced to lower case. If there is no
751 Content-Type header, or if that header has no charset parameter,
752 failobj is returned.
753 """
754 missing = object()
755 charset = self.get_param('charset', missing)
756 if charset is missing:
757 return failobj
758 if isinstance(charset, tuple):
759 # RFC 2231 encoded, so decode it, and it better end up as ascii.
760 pcharset = charset[0] or 'us-ascii'
761 try:
762 # LookupError will be raised if the charset isn't known to
763 # Python. UnicodeError will be raised if the encoded text
764 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000765 as_bytes = charset[2].encode('raw-unicode-escape')
766 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000767 except (LookupError, UnicodeError):
768 charset = charset[2]
769 # charset characters must be in us-ascii range
770 try:
771 charset.encode('us-ascii')
772 except UnicodeError:
773 return failobj
774 # RFC 2046, $4.1.2 says charsets are not case sensitive
775 return charset.lower()
776
777 def get_charsets(self, failobj=None):
778 """Return a list containing the charset(s) used in this message.
779
780 The returned list of items describes the Content-Type headers'
781 charset parameter for this message and all the subparts in its
782 payload.
783
784 Each item will either be a string (the value of the charset parameter
785 in the Content-Type header of that part) or the value of the
786 'failobj' parameter (defaults to None), if the part does not have a
787 main MIME type of "text", or the charset is not defined.
788
789 The list will contain one string for each part of the message, plus
790 one for the container message (i.e. self), so that a non-multipart
791 message will still return a list of length 1.
792 """
793 return [part.get_content_charset(failobj) for part in self.walk()]
794
795 # I.e. def walk(self): ...
796 from email.iterators import walk