blob: a6bf146d7c0b6658c1834f5c44d37e84feb9cbd8 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
Guido van Rossum9604e662007-08-30 03:46:43 +000019from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
27
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028# Helper functions
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000029def _splitparam(param):
30 # Split header parameters. BAW: this may be too simple. It isn't
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32 # found in the wild. We may eventually need a full fledged parser
33 # eventually.
34 a, sep, b = param.partition(';')
35 if not sep:
36 return a.strip(), None
37 return a.strip(), b.strip()
38
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039def _formatparam(param, value=None, quote=True):
40 """Convenience function to format and return a key=value pair.
41
42 This will quote the value if needed or if quote is true.
43 """
44 if value is not None and len(value) > 0:
45 # A tuple is used for RFC 2231 encoded parameter values where items
46 # are (charset, language, value). charset is a string, not a Charset
47 # instance.
48 if isinstance(value, tuple):
49 # Encode as per RFC 2231
50 param += '*'
51 value = utils.encode_rfc2231(value[2], value[0], value[1])
52 # BAW: Please check this. I think that if quote is set it should
53 # force quoting even if not necessary.
54 if quote or tspecials.search(value):
55 return '%s="%s"' % (param, utils.quote(value))
56 else:
57 return '%s=%s' % (param, value)
58 else:
59 return param
60
61def _parseparam(s):
62 plist = []
63 while s[:1] == ';':
64 s = s[1:]
65 end = s.find(';')
R. David Murray84ee3102010-04-14 19:05:38 +000066 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000067 end = s.find(';', end + 1)
68 if end < 0:
69 end = len(s)
70 f = s[:end]
71 if '=' in f:
72 i = f.index('=')
73 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
74 plist.append(f.strip())
75 s = s[end:]
76 return plist
77
78
79def _unquotevalue(value):
80 # This is different than utils.collapse_rfc2231_value() because it doesn't
81 # try to convert the value to a unicode. Message.get_param() and
82 # Message.get_params() are both currently defined to return the tuple in
83 # the face of RFC 2231 parameters.
84 if isinstance(value, tuple):
85 return value[0], value[1], utils.unquote(value[2])
86 else:
87 return utils.unquote(value)
88
89
90
91class Message:
92 """Basic message object.
93
94 A message object is defined as something that has a bunch of RFC 2822
95 headers and a payload. It may optionally have an envelope header
96 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
97 multipart or a message/rfc822), then the payload is a list of Message
98 objects, otherwise it is a string.
99
100 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayc1b3ed52010-12-06 18:39:32 +0000101 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000102 do in fact appear multiple times (e.g. Received) and for those headers,
103 you must use the explicit API to set or get all the headers. Not all of
104 the mapping methods are implemented.
105 """
106 def __init__(self):
107 self._headers = []
108 self._unixfrom = None
109 self._payload = None
110 self._charset = None
111 # Defaults for multipart messages
112 self.preamble = self.epilogue = None
113 self.defects = []
114 # Default content type
115 self._default_type = 'text/plain'
116
117 def __str__(self):
118 """Return the entire formatted message as a string.
119 This includes the headers, body, and envelope header.
120 """
121 return self.as_string()
122
123 def as_string(self, unixfrom=False, maxheaderlen=0):
124 """Return the entire formatted message as a string.
125 Optional `unixfrom' when True, means include the Unix From_ envelope
126 header.
127
128 This is a convenience method and may not generate the message exactly
129 as you intend because by default it mangles lines that begin with
130 "From ". For more flexibility, use the flatten() method of a
131 Generator instance.
132 """
133 from email.generator import Generator
134 fp = StringIO()
135 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
136 g.flatten(self, unixfrom=unixfrom)
137 return fp.getvalue()
138
139 def is_multipart(self):
140 """Return True if the message consists of multiple parts."""
141 return isinstance(self._payload, list)
142
143 #
144 # Unix From_ line
145 #
146 def set_unixfrom(self, unixfrom):
147 self._unixfrom = unixfrom
148
149 def get_unixfrom(self):
150 return self._unixfrom
151
152 #
153 # Payload manipulation.
154 #
155 def attach(self, payload):
156 """Add the given payload to the current payload.
157
158 The current payload will always be a list of objects after this method
159 is called. If you want to set the payload to a scalar object, use
160 set_payload() instead.
161 """
162 if self._payload is None:
163 self._payload = [payload]
164 else:
165 self._payload.append(payload)
166
167 def get_payload(self, i=None, decode=False):
168 """Return a reference to the payload.
169
170 The payload will either be a list object or a string. If you mutate
171 the list object, you modify the message's payload in place. Optional
172 i returns that index into the payload.
173
174 Optional decode is a flag indicating whether the payload should be
175 decoded or not, according to the Content-Transfer-Encoding header
176 (default is False).
177
178 When True and the message is not a multipart, the payload will be
179 decoded if this header's value is `quoted-printable' or `base64'. If
180 some other encoding is used, or the header is missing, or if the
181 payload has bogus data (i.e. bogus base64 or uuencoded data), the
182 payload is returned as-is.
183
184 If the message is a multipart and the decode flag is True, then None
185 is returned.
186 """
187 if i is None:
188 payload = self._payload
189 elif not isinstance(self._payload, list):
190 raise TypeError('Expected list, got %s' % type(self._payload))
191 else:
192 payload = self._payload[i]
193 if not decode:
194 return payload
195 # Decoded payloads always return bytes. XXX split this part out into
196 # a new method called .get_decoded_payload().
197 if self.is_multipart():
198 return None
199 cte = self.get('content-transfer-encoding', '').lower()
200 if cte == 'quoted-printable':
201 return utils._qdecode(payload)
202 elif cte == 'base64':
203 try:
Barry Warsaw8b2af272007-08-31 03:04:26 +0000204 if isinstance(payload, str):
205 payload = payload.encode('raw-unicode-escape')
206 return base64.b64decode(payload)
207 #return utils._bdecode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 except binascii.Error:
209 # Incorrect padding
210 pass
211 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000212 in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000213 out_file = BytesIO()
214 try:
215 uu.decode(in_file, out_file, quiet=True)
216 return out_file.getvalue()
217 except uu.Error:
218 # Some decoding problem
219 pass
220 # Is there a better way to do this? We can't use the bytes
221 # constructor.
Barry Warsaw8b2af272007-08-31 03:04:26 +0000222 if isinstance(payload, str):
223 return payload.encode('raw-unicode-escape')
224 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000225
226 def set_payload(self, payload, charset=None):
227 """Set the payload to the given value.
228
229 Optional charset sets the message's default character set. See
230 set_charset() for details.
231 """
232 self._payload = payload
233 if charset is not None:
234 self.set_charset(charset)
235
236 def set_charset(self, charset):
237 """Set the charset of the payload to a given character set.
238
239 charset can be a Charset instance, a string naming a character set, or
240 None. If it is a string it will be converted to a Charset instance.
241 If charset is None, the charset parameter will be removed from the
242 Content-Type field. Anything else will generate a TypeError.
243
244 The message will be assumed to be of type text/* encoded with
245 charset.input_charset. It will be converted to charset.output_charset
246 and encoded properly, if needed, when generating the plain text
247 representation of the message. MIME headers (MIME-Version,
248 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249 """
250 if charset is None:
251 self.del_param('charset')
252 self._charset = None
253 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000254 if not isinstance(charset, Charset):
255 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256 self._charset = charset
257 if 'MIME-Version' not in self:
258 self.add_header('MIME-Version', '1.0')
259 if 'Content-Type' not in self:
260 self.add_header('Content-Type', 'text/plain',
261 charset=charset.get_output_charset())
262 else:
263 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000264 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265 self._payload = charset.body_encode(self._payload)
266 if 'Content-Transfer-Encoding' not in self:
267 cte = charset.get_body_encoding()
268 try:
269 cte(self)
270 except TypeError:
271 self._payload = charset.body_encode(self._payload)
272 self.add_header('Content-Transfer-Encoding', cte)
273
274 def get_charset(self):
275 """Return the Charset instance associated with the message's payload.
276 """
277 return self._charset
278
279 #
280 # MAPPING INTERFACE (partial)
281 #
282 def __len__(self):
283 """Return the total number of headers, including duplicates."""
284 return len(self._headers)
285
286 def __getitem__(self, name):
287 """Get a header value.
288
289 Return None if the header is missing instead of raising an exception.
290
291 Note that if the header appeared multiple times, exactly which
R. David Murrayc1b3ed52010-12-06 18:39:32 +0000292 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000293 the values matching a header field name.
294 """
295 return self.get(name)
296
297 def __setitem__(self, name, val):
298 """Set the value of a header.
299
300 Note: this does not overwrite an existing header with the same field
301 name. Use __delitem__() first to delete any existing headers.
302 """
303 self._headers.append((name, val))
304
305 def __delitem__(self, name):
306 """Delete all occurrences of a header, if present.
307
308 Does not raise an exception if the header is missing.
309 """
310 name = name.lower()
311 newheaders = []
312 for k, v in self._headers:
313 if k.lower() != name:
314 newheaders.append((k, v))
315 self._headers = newheaders
316
317 def __contains__(self, name):
318 return name.lower() in [k.lower() for k, v in self._headers]
319
320 def __iter__(self):
321 for field, value in self._headers:
322 yield field
323
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000324 def keys(self):
325 """Return a list of all the message's header field names.
326
327 These will be sorted in the order they appeared in the original
328 message, or were added to the message, and may contain duplicates.
329 Any fields deleted and re-inserted are always appended to the header
330 list.
331 """
332 return [k for k, v in self._headers]
333
334 def values(self):
335 """Return a list of all the message's header values.
336
337 These will be sorted in the order they appeared in the original
338 message, or were added to the message, and may contain duplicates.
339 Any fields deleted and re-inserted are always appended to the header
340 list.
341 """
342 return [v for k, v in self._headers]
343
344 def items(self):
345 """Get all the message's header fields and values.
346
347 These will be sorted in the order they appeared in the original
348 message, or were added to the message, and may contain duplicates.
349 Any fields deleted and re-inserted are always appended to the header
350 list.
351 """
352 return self._headers[:]
353
354 def get(self, name, failobj=None):
355 """Get a header value.
356
357 Like __getitem__() but return failobj instead of None when the field
358 is missing.
359 """
360 name = name.lower()
361 for k, v in self._headers:
362 if k.lower() == name:
363 return v
364 return failobj
365
366 #
367 # Additional useful stuff
368 #
369
370 def get_all(self, name, failobj=None):
371 """Return a list of all the values for the named field.
372
373 These will be sorted in the order they appeared in the original
374 message, and may contain duplicates. Any fields deleted and
375 re-inserted are always appended to the header list.
376
377 If no such fields exist, failobj is returned (defaults to None).
378 """
379 values = []
380 name = name.lower()
381 for k, v in self._headers:
382 if k.lower() == name:
383 values.append(v)
384 if not values:
385 return failobj
386 return values
387
388 def add_header(self, _name, _value, **_params):
389 """Extended header setting.
390
391 name is the header field to add. keyword arguments can be used to set
392 additional parameters for the header field, with underscores converted
393 to dashes. Normally the parameter will be added as key="value" unless
394 value is None, in which case only the key will be added.
395
396 Example:
397
398 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
399 """
400 parts = []
401 for k, v in _params.items():
402 if v is None:
403 parts.append(k.replace('_', '-'))
404 else:
405 parts.append(_formatparam(k.replace('_', '-'), v))
406 if _value is not None:
407 parts.insert(0, _value)
408 self._headers.append((_name, SEMISPACE.join(parts)))
409
410 def replace_header(self, _name, _value):
411 """Replace a header.
412
413 Replace the first matching header found in the message, retaining
414 header order and case. If no matching header was found, a KeyError is
415 raised.
416 """
417 _name = _name.lower()
418 for i, (k, v) in zip(range(len(self._headers)), self._headers):
419 if k.lower() == _name:
420 self._headers[i] = (k, _value)
421 break
422 else:
423 raise KeyError(_name)
424
425 #
426 # Use these three methods instead of the three above.
427 #
428
429 def get_content_type(self):
430 """Return the message's content type.
431
432 The returned string is coerced to lower case of the form
433 `maintype/subtype'. If there was no Content-Type header in the
434 message, the default type as given by get_default_type() will be
435 returned. Since according to RFC 2045, messages always have a default
436 type this will always return a value.
437
438 RFC 2045 defines a message's default type to be text/plain unless it
439 appears inside a multipart/digest container, in which case it would be
440 message/rfc822.
441 """
442 missing = object()
443 value = self.get('content-type', missing)
444 if value is missing:
445 # This should have no parameters
446 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000447 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000448 # RFC 2045, section 5.2 says if its invalid, use text/plain
449 if ctype.count('/') != 1:
450 return 'text/plain'
451 return ctype
452
453 def get_content_maintype(self):
454 """Return the message's main content type.
455
456 This is the `maintype' part of the string returned by
457 get_content_type().
458 """
459 ctype = self.get_content_type()
460 return ctype.split('/')[0]
461
462 def get_content_subtype(self):
463 """Returns the message's sub-content type.
464
465 This is the `subtype' part of the string returned by
466 get_content_type().
467 """
468 ctype = self.get_content_type()
469 return ctype.split('/')[1]
470
471 def get_default_type(self):
472 """Return the `default' content type.
473
474 Most messages have a default content type of text/plain, except for
475 messages that are subparts of multipart/digest containers. Such
476 subparts have a default content type of message/rfc822.
477 """
478 return self._default_type
479
480 def set_default_type(self, ctype):
481 """Set the `default' content type.
482
483 ctype should be either "text/plain" or "message/rfc822", although this
484 is not enforced. The default content type is not stored in the
485 Content-Type header.
486 """
487 self._default_type = ctype
488
489 def _get_params_preserve(self, failobj, header):
490 # Like get_params() but preserves the quoting of values. BAW:
491 # should this be part of the public interface?
492 missing = object()
493 value = self.get(header, missing)
494 if value is missing:
495 return failobj
496 params = []
497 for p in _parseparam(';' + value):
498 try:
499 name, val = p.split('=', 1)
500 name = name.strip()
501 val = val.strip()
502 except ValueError:
503 # Must have been a bare attribute
504 name = p.strip()
505 val = ''
506 params.append((name, val))
507 params = utils.decode_params(params)
508 return params
509
510 def get_params(self, failobj=None, header='content-type', unquote=True):
511 """Return the message's Content-Type parameters, as a list.
512
513 The elements of the returned list are 2-tuples of key/value pairs, as
514 split on the `=' sign. The left hand side of the `=' is the key,
515 while the right hand side is the value. If there is no `=' sign in
516 the parameter the value is the empty string. The value is as
517 described in the get_param() method.
518
519 Optional failobj is the object to return if there is no Content-Type
520 header. Optional header is the header to search instead of
521 Content-Type. If unquote is True, the value is unquoted.
522 """
523 missing = object()
524 params = self._get_params_preserve(missing, header)
525 if params is missing:
526 return failobj
527 if unquote:
528 return [(k, _unquotevalue(v)) for k, v in params]
529 else:
530 return params
531
532 def get_param(self, param, failobj=None, header='content-type',
533 unquote=True):
534 """Return the parameter value if found in the Content-Type header.
535
536 Optional failobj is the object to return if there is no Content-Type
537 header, or the Content-Type header has no such parameter. Optional
538 header is the header to search instead of Content-Type.
539
540 Parameter keys are always compared case insensitively. The return
541 value can either be a string, or a 3-tuple if the parameter was RFC
542 2231 encoded. When it's a 3-tuple, the elements of the value are of
543 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
544 LANGUAGE can be None, in which case you should consider VALUE to be
545 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
546
547 Your application should be prepared to deal with 3-tuple return
548 values, and can convert the parameter to a Unicode string like so:
549
550 param = msg.get_param('foo')
551 if isinstance(param, tuple):
552 param = unicode(param[2], param[0] or 'us-ascii')
553
554 In any case, the parameter value (either the returned string, or the
555 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
556 to False.
557 """
558 if header not in self:
559 return failobj
560 for k, v in self._get_params_preserve(failobj, header):
561 if k.lower() == param.lower():
562 if unquote:
563 return _unquotevalue(v)
564 else:
565 return v
566 return failobj
567
568 def set_param(self, param, value, header='Content-Type', requote=True,
569 charset=None, language=''):
570 """Set a parameter in the Content-Type header.
571
572 If the parameter already exists in the header, its value will be
573 replaced with the new value.
574
575 If header is Content-Type and has not yet been defined for this
576 message, it will be set to "text/plain" and the new parameter and
577 value will be appended as per RFC 2045.
578
579 An alternate header can specified in the header argument, and all
580 parameters will be quoted as necessary unless requote is False.
581
582 If charset is specified, the parameter will be encoded according to RFC
583 2231. Optional language specifies the RFC 2231 language, defaulting
584 to the empty string. Both charset and language should be strings.
585 """
586 if not isinstance(value, tuple) and charset:
587 value = (charset, language, value)
588
589 if header not in self and header.lower() == 'content-type':
590 ctype = 'text/plain'
591 else:
592 ctype = self.get(header)
593 if not self.get_param(param, header=header):
594 if not ctype:
595 ctype = _formatparam(param, value, requote)
596 else:
597 ctype = SEMISPACE.join(
598 [ctype, _formatparam(param, value, requote)])
599 else:
600 ctype = ''
601 for old_param, old_value in self.get_params(header=header,
602 unquote=requote):
603 append_param = ''
604 if old_param.lower() == param.lower():
605 append_param = _formatparam(param, value, requote)
606 else:
607 append_param = _formatparam(old_param, old_value, requote)
608 if not ctype:
609 ctype = append_param
610 else:
611 ctype = SEMISPACE.join([ctype, append_param])
612 if ctype != self.get(header):
613 del self[header]
614 self[header] = ctype
615
616 def del_param(self, param, header='content-type', requote=True):
617 """Remove the given parameter completely from the Content-Type header.
618
619 The header will be re-written in place without the parameter or its
620 value. All values will be quoted as necessary unless requote is
621 False. Optional header specifies an alternative to the Content-Type
622 header.
623 """
624 if header not in self:
625 return
626 new_ctype = ''
627 for p, v in self.get_params(header=header, unquote=requote):
628 if p.lower() != param.lower():
629 if not new_ctype:
630 new_ctype = _formatparam(p, v, requote)
631 else:
632 new_ctype = SEMISPACE.join([new_ctype,
633 _formatparam(p, v, requote)])
634 if new_ctype != self.get(header):
635 del self[header]
636 self[header] = new_ctype
637
638 def set_type(self, type, header='Content-Type', requote=True):
639 """Set the main type and subtype for the Content-Type header.
640
641 type must be a string in the form "maintype/subtype", otherwise a
642 ValueError is raised.
643
644 This method replaces the Content-Type header, keeping all the
645 parameters in place. If requote is False, this leaves the existing
646 header's quoting as is. Otherwise, the parameters will be quoted (the
647 default).
648
649 An alternative header can be specified in the header argument. When
650 the Content-Type header is set, we'll always also add a MIME-Version
651 header.
652 """
653 # BAW: should we be strict?
654 if not type.count('/') == 1:
655 raise ValueError
656 # Set the Content-Type, you get a MIME-Version
657 if header.lower() == 'content-type':
658 del self['mime-version']
659 self['MIME-Version'] = '1.0'
660 if header not in self:
661 self[header] = type
662 return
663 params = self.get_params(header=header, unquote=requote)
664 del self[header]
665 self[header] = type
666 # Skip the first param; it's the old type.
667 for p, v in params[1:]:
668 self.set_param(p, v, header, requote)
669
670 def get_filename(self, failobj=None):
671 """Return the filename associated with the payload if present.
672
673 The filename is extracted from the Content-Disposition header's
674 `filename' parameter, and it is unquoted. If that header is missing
675 the `filename' parameter, this method falls back to looking for the
676 `name' parameter.
677 """
678 missing = object()
679 filename = self.get_param('filename', missing, 'content-disposition')
680 if filename is missing:
R. David Murray290e9392009-10-10 00:57:04 +0000681 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000682 if filename is missing:
683 return failobj
684 return utils.collapse_rfc2231_value(filename).strip()
685
686 def get_boundary(self, failobj=None):
687 """Return the boundary associated with the payload if present.
688
689 The boundary is extracted from the Content-Type header's `boundary'
690 parameter, and it is unquoted.
691 """
692 missing = object()
693 boundary = self.get_param('boundary', missing)
694 if boundary is missing:
695 return failobj
696 # RFC 2046 says that boundaries may begin but not end in w/s
697 return utils.collapse_rfc2231_value(boundary).rstrip()
698
699 def set_boundary(self, boundary):
700 """Set the boundary parameter in Content-Type to 'boundary'.
701
702 This is subtly different than deleting the Content-Type header and
703 adding a new one with a new boundary parameter via add_header(). The
704 main difference is that using the set_boundary() method preserves the
705 order of the Content-Type header in the original message.
706
707 HeaderParseError is raised if the message has no Content-Type header.
708 """
709 missing = object()
710 params = self._get_params_preserve(missing, 'content-type')
711 if params is missing:
712 # There was no Content-Type header, and we don't know what type
713 # to set it to, so raise an exception.
714 raise errors.HeaderParseError('No Content-Type header found')
715 newparams = []
716 foundp = False
717 for pk, pv in params:
718 if pk.lower() == 'boundary':
719 newparams.append(('boundary', '"%s"' % boundary))
720 foundp = True
721 else:
722 newparams.append((pk, pv))
723 if not foundp:
724 # The original Content-Type header had no boundary attribute.
725 # Tack one on the end. BAW: should we raise an exception
726 # instead???
727 newparams.append(('boundary', '"%s"' % boundary))
728 # Replace the existing Content-Type header with the new value
729 newheaders = []
730 for h, v in self._headers:
731 if h.lower() == 'content-type':
732 parts = []
733 for k, v in newparams:
734 if v == '':
735 parts.append(k)
736 else:
737 parts.append('%s=%s' % (k, v))
738 newheaders.append((h, SEMISPACE.join(parts)))
739
740 else:
741 newheaders.append((h, v))
742 self._headers = newheaders
743
744 def get_content_charset(self, failobj=None):
745 """Return the charset parameter of the Content-Type header.
746
747 The returned string is always coerced to lower case. If there is no
748 Content-Type header, or if that header has no charset parameter,
749 failobj is returned.
750 """
751 missing = object()
752 charset = self.get_param('charset', missing)
753 if charset is missing:
754 return failobj
755 if isinstance(charset, tuple):
756 # RFC 2231 encoded, so decode it, and it better end up as ascii.
757 pcharset = charset[0] or 'us-ascii'
758 try:
759 # LookupError will be raised if the charset isn't known to
760 # Python. UnicodeError will be raised if the encoded text
761 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000762 as_bytes = charset[2].encode('raw-unicode-escape')
763 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000764 except (LookupError, UnicodeError):
765 charset = charset[2]
766 # charset characters must be in us-ascii range
767 try:
768 charset.encode('us-ascii')
769 except UnicodeError:
770 return failobj
771 # RFC 2046, $4.1.2 says charsets are not case sensitive
772 return charset.lower()
773
774 def get_charsets(self, failobj=None):
775 """Return a list containing the charset(s) used in this message.
776
777 The returned list of items describes the Content-Type headers'
778 charset parameter for this message and all the subparts in its
779 payload.
780
781 Each item will either be a string (the value of the charset parameter
782 in the Content-Type header of that part) or the value of the
783 'failobj' parameter (defaults to None), if the part does not have a
784 main MIME type of "text", or the charset is not defined.
785
786 The list will contain one string for each part of the message, plus
787 one for the container message (i.e. self), so that a non-multipart
788 message will still return a list of length 1.
789 """
790 return [part.get_content_charset(failobj) for part in self.walk()]
791
792 # I.e. def walk(self): ...
793 from email.iterators import walk