blob: 9ef2363e625fb3bffa62569958cc83c87e75809f [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message']
8
9import re
10import uu
Barry Warsaw8b2af272007-08-31 03:04:26 +000011import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012import binascii
13import warnings
14from io import BytesIO, StringIO
15
16# Intrapackage imports
Guido van Rossum8b3febe2007-08-30 01:15:14 +000017from email import utils
18from email import errors
Guido van Rossum9604e662007-08-30 03:46:43 +000019from email.charset import Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21SEMISPACE = '; '
22
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023# Regular expression that matches `special' characters in parameters, the
Mark Dickinson934896d2009-02-21 20:59:32 +000024# existence of which force quoting of the parameter value.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
26
27
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028# Helper functions
Benjamin Peterson4cd6a952008-08-17 20:23:46 +000029def _splitparam(param):
30 # Split header parameters. BAW: this may be too simple. It isn't
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32 # found in the wild. We may eventually need a full fledged parser
33 # eventually.
34 a, sep, b = param.partition(';')
35 if not sep:
36 return a.strip(), None
37 return a.strip(), b.strip()
38
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039def _formatparam(param, value=None, quote=True):
40 """Convenience function to format and return a key=value pair.
41
R. David Murrayccb9d052010-12-13 23:57:01 +000042 This will quote the value if needed or if quote is true. If value is a
43 three tuple (charset, language, value), it will be encoded according
44 to RFC2231 rules. If it contains non-ascii characters it will likewise
45 be encoded according to RFC2231 rules, using the utf-8 charset and
46 a null language.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047 """
48 if value is not None and len(value) > 0:
49 # A tuple is used for RFC 2231 encoded parameter values where items
50 # are (charset, language, value). charset is a string, not a Charset
51 # instance.
52 if isinstance(value, tuple):
53 # Encode as per RFC 2231
54 param += '*'
55 value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murrayccb9d052010-12-13 23:57:01 +000056 else:
57 try:
58 value.encode('ascii')
59 except UnicodeEncodeError:
60 param += '*'
61 value = utils.encode_rfc2231(value, 'utf-8', '')
Guido van Rossum8b3febe2007-08-30 01:15:14 +000062 # BAW: Please check this. I think that if quote is set it should
63 # force quoting even if not necessary.
64 if quote or tspecials.search(value):
65 return '%s="%s"' % (param, utils.quote(value))
66 else:
67 return '%s=%s' % (param, value)
68 else:
69 return param
70
71def _parseparam(s):
72 plist = []
73 while s[:1] == ';':
74 s = s[1:]
75 end = s.find(';')
R. David Murray84ee3102010-04-14 19:05:38 +000076 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 end = s.find(';', end + 1)
78 if end < 0:
79 end = len(s)
80 f = s[:end]
81 if '=' in f:
82 i = f.index('=')
83 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
84 plist.append(f.strip())
85 s = s[end:]
86 return plist
87
88
89def _unquotevalue(value):
90 # This is different than utils.collapse_rfc2231_value() because it doesn't
91 # try to convert the value to a unicode. Message.get_param() and
92 # Message.get_params() are both currently defined to return the tuple in
93 # the face of RFC 2231 parameters.
94 if isinstance(value, tuple):
95 return value[0], value[1], utils.unquote(value[2])
96 else:
97 return utils.unquote(value)
98
99
100
101class Message:
102 """Basic message object.
103
104 A message object is defined as something that has a bunch of RFC 2822
105 headers and a payload. It may optionally have an envelope header
106 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
107 multipart or a message/rfc822), then the payload is a list of Message
108 objects, otherwise it is a string.
109
110 Message objects implement part of the `mapping' interface, which assumes
R. David Murrayc1b3ed52010-12-06 18:39:32 +0000111 there is exactly one occurrence of the header per message. Some headers
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000112 do in fact appear multiple times (e.g. Received) and for those headers,
113 you must use the explicit API to set or get all the headers. Not all of
114 the mapping methods are implemented.
115 """
116 def __init__(self):
117 self._headers = []
118 self._unixfrom = None
119 self._payload = None
120 self._charset = None
121 # Defaults for multipart messages
122 self.preamble = self.epilogue = None
123 self.defects = []
124 # Default content type
125 self._default_type = 'text/plain'
126
127 def __str__(self):
128 """Return the entire formatted message as a string.
129 This includes the headers, body, and envelope header.
130 """
131 return self.as_string()
132
133 def as_string(self, unixfrom=False, maxheaderlen=0):
134 """Return the entire formatted message as a string.
135 Optional `unixfrom' when True, means include the Unix From_ envelope
136 header.
137
138 This is a convenience method and may not generate the message exactly
139 as you intend because by default it mangles lines that begin with
140 "From ". For more flexibility, use the flatten() method of a
141 Generator instance.
142 """
143 from email.generator import Generator
144 fp = StringIO()
145 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
146 g.flatten(self, unixfrom=unixfrom)
147 return fp.getvalue()
148
149 def is_multipart(self):
150 """Return True if the message consists of multiple parts."""
151 return isinstance(self._payload, list)
152
153 #
154 # Unix From_ line
155 #
156 def set_unixfrom(self, unixfrom):
157 self._unixfrom = unixfrom
158
159 def get_unixfrom(self):
160 return self._unixfrom
161
162 #
163 # Payload manipulation.
164 #
165 def attach(self, payload):
166 """Add the given payload to the current payload.
167
168 The current payload will always be a list of objects after this method
169 is called. If you want to set the payload to a scalar object, use
170 set_payload() instead.
171 """
172 if self._payload is None:
173 self._payload = [payload]
174 else:
175 self._payload.append(payload)
176
177 def get_payload(self, i=None, decode=False):
178 """Return a reference to the payload.
179
180 The payload will either be a list object or a string. If you mutate
181 the list object, you modify the message's payload in place. Optional
182 i returns that index into the payload.
183
184 Optional decode is a flag indicating whether the payload should be
185 decoded or not, according to the Content-Transfer-Encoding header
186 (default is False).
187
188 When True and the message is not a multipart, the payload will be
189 decoded if this header's value is `quoted-printable' or `base64'. If
190 some other encoding is used, or the header is missing, or if the
191 payload has bogus data (i.e. bogus base64 or uuencoded data), the
192 payload is returned as-is.
193
194 If the message is a multipart and the decode flag is True, then None
195 is returned.
196 """
197 if i is None:
198 payload = self._payload
199 elif not isinstance(self._payload, list):
200 raise TypeError('Expected list, got %s' % type(self._payload))
201 else:
202 payload = self._payload[i]
203 if not decode:
204 return payload
205 # Decoded payloads always return bytes. XXX split this part out into
206 # a new method called .get_decoded_payload().
207 if self.is_multipart():
208 return None
209 cte = self.get('content-transfer-encoding', '').lower()
210 if cte == 'quoted-printable':
211 return utils._qdecode(payload)
212 elif cte == 'base64':
213 try:
Barry Warsaw8b2af272007-08-31 03:04:26 +0000214 if isinstance(payload, str):
215 payload = payload.encode('raw-unicode-escape')
216 return base64.b64decode(payload)
217 #return utils._bdecode(payload)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000218 except binascii.Error:
219 # Incorrect padding
220 pass
221 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000222 in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000223 out_file = BytesIO()
224 try:
225 uu.decode(in_file, out_file, quiet=True)
226 return out_file.getvalue()
227 except uu.Error:
228 # Some decoding problem
229 pass
230 # Is there a better way to do this? We can't use the bytes
231 # constructor.
Barry Warsaw8b2af272007-08-31 03:04:26 +0000232 if isinstance(payload, str):
233 return payload.encode('raw-unicode-escape')
234 return payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000235
236 def set_payload(self, payload, charset=None):
237 """Set the payload to the given value.
238
239 Optional charset sets the message's default character set. See
240 set_charset() for details.
241 """
242 self._payload = payload
243 if charset is not None:
244 self.set_charset(charset)
245
246 def set_charset(self, charset):
247 """Set the charset of the payload to a given character set.
248
249 charset can be a Charset instance, a string naming a character set, or
250 None. If it is a string it will be converted to a Charset instance.
251 If charset is None, the charset parameter will be removed from the
252 Content-Type field. Anything else will generate a TypeError.
253
254 The message will be assumed to be of type text/* encoded with
255 charset.input_charset. It will be converted to charset.output_charset
256 and encoded properly, if needed, when generating the plain text
257 representation of the message. MIME headers (MIME-Version,
258 Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 """
260 if charset is None:
261 self.del_param('charset')
262 self._charset = None
263 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000264 if not isinstance(charset, Charset):
265 charset = Charset(charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000266 self._charset = charset
267 if 'MIME-Version' not in self:
268 self.add_header('MIME-Version', '1.0')
269 if 'Content-Type' not in self:
270 self.add_header('Content-Type', 'text/plain',
271 charset=charset.get_output_charset())
272 else:
273 self.set_param('charset', charset.get_output_charset())
Guido van Rossum9604e662007-08-30 03:46:43 +0000274 if charset != charset.get_output_charset():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000275 self._payload = charset.body_encode(self._payload)
276 if 'Content-Transfer-Encoding' not in self:
277 cte = charset.get_body_encoding()
278 try:
279 cte(self)
280 except TypeError:
281 self._payload = charset.body_encode(self._payload)
282 self.add_header('Content-Transfer-Encoding', cte)
283
284 def get_charset(self):
285 """Return the Charset instance associated with the message's payload.
286 """
287 return self._charset
288
289 #
290 # MAPPING INTERFACE (partial)
291 #
292 def __len__(self):
293 """Return the total number of headers, including duplicates."""
294 return len(self._headers)
295
296 def __getitem__(self, name):
297 """Get a header value.
298
299 Return None if the header is missing instead of raising an exception.
300
301 Note that if the header appeared multiple times, exactly which
R. David Murrayc1b3ed52010-12-06 18:39:32 +0000302 occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000303 the values matching a header field name.
304 """
305 return self.get(name)
306
307 def __setitem__(self, name, val):
308 """Set the value of a header.
309
310 Note: this does not overwrite an existing header with the same field
311 name. Use __delitem__() first to delete any existing headers.
312 """
313 self._headers.append((name, val))
314
315 def __delitem__(self, name):
316 """Delete all occurrences of a header, if present.
317
318 Does not raise an exception if the header is missing.
319 """
320 name = name.lower()
321 newheaders = []
322 for k, v in self._headers:
323 if k.lower() != name:
324 newheaders.append((k, v))
325 self._headers = newheaders
326
327 def __contains__(self, name):
328 return name.lower() in [k.lower() for k, v in self._headers]
329
330 def __iter__(self):
331 for field, value in self._headers:
332 yield field
333
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000334 def keys(self):
335 """Return a list of all the message's header field names.
336
337 These will be sorted in the order they appeared in the original
338 message, or were added to the message, and may contain duplicates.
339 Any fields deleted and re-inserted are always appended to the header
340 list.
341 """
342 return [k for k, v in self._headers]
343
344 def values(self):
345 """Return a list of all the message's header values.
346
347 These will be sorted in the order they appeared in the original
348 message, or were added to the message, and may contain duplicates.
349 Any fields deleted and re-inserted are always appended to the header
350 list.
351 """
352 return [v for k, v in self._headers]
353
354 def items(self):
355 """Get all the message's header fields and values.
356
357 These will be sorted in the order they appeared in the original
358 message, or were added to the message, and may contain duplicates.
359 Any fields deleted and re-inserted are always appended to the header
360 list.
361 """
362 return self._headers[:]
363
364 def get(self, name, failobj=None):
365 """Get a header value.
366
367 Like __getitem__() but return failobj instead of None when the field
368 is missing.
369 """
370 name = name.lower()
371 for k, v in self._headers:
372 if k.lower() == name:
373 return v
374 return failobj
375
376 #
377 # Additional useful stuff
378 #
379
380 def get_all(self, name, failobj=None):
381 """Return a list of all the values for the named field.
382
383 These will be sorted in the order they appeared in the original
384 message, and may contain duplicates. Any fields deleted and
385 re-inserted are always appended to the header list.
386
387 If no such fields exist, failobj is returned (defaults to None).
388 """
389 values = []
390 name = name.lower()
391 for k, v in self._headers:
392 if k.lower() == name:
393 values.append(v)
394 if not values:
395 return failobj
396 return values
397
398 def add_header(self, _name, _value, **_params):
399 """Extended header setting.
400
401 name is the header field to add. keyword arguments can be used to set
402 additional parameters for the header field, with underscores converted
403 to dashes. Normally the parameter will be added as key="value" unless
R. David Murrayccb9d052010-12-13 23:57:01 +0000404 value is None, in which case only the key will be added. If a
405 parameter value contains non-ASCII characters it can be specified as a
406 three-tuple of (charset, language, value), in which case it will be
407 encoded according to RFC2231 rules. Otherwise it will be encoded using
408 the utf-8 charset and a language of ''.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409
R. David Murrayccb9d052010-12-13 23:57:01 +0000410 Examples:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000411
412 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murrayccb9d052010-12-13 23:57:01 +0000413 msg.add_header('content-disposition', 'attachment',
414 filename=('utf-8', '', Fußballer.ppt'))
415 msg.add_header('content-disposition', 'attachment',
416 filename='Fußballer.ppt'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000417 """
418 parts = []
419 for k, v in _params.items():
420 if v is None:
421 parts.append(k.replace('_', '-'))
422 else:
423 parts.append(_formatparam(k.replace('_', '-'), v))
424 if _value is not None:
425 parts.insert(0, _value)
426 self._headers.append((_name, SEMISPACE.join(parts)))
427
428 def replace_header(self, _name, _value):
429 """Replace a header.
430
431 Replace the first matching header found in the message, retaining
432 header order and case. If no matching header was found, a KeyError is
433 raised.
434 """
435 _name = _name.lower()
436 for i, (k, v) in zip(range(len(self._headers)), self._headers):
437 if k.lower() == _name:
438 self._headers[i] = (k, _value)
439 break
440 else:
441 raise KeyError(_name)
442
443 #
444 # Use these three methods instead of the three above.
445 #
446
447 def get_content_type(self):
448 """Return the message's content type.
449
450 The returned string is coerced to lower case of the form
451 `maintype/subtype'. If there was no Content-Type header in the
452 message, the default type as given by get_default_type() will be
453 returned. Since according to RFC 2045, messages always have a default
454 type this will always return a value.
455
456 RFC 2045 defines a message's default type to be text/plain unless it
457 appears inside a multipart/digest container, in which case it would be
458 message/rfc822.
459 """
460 missing = object()
461 value = self.get('content-type', missing)
462 if value is missing:
463 # This should have no parameters
464 return self.get_default_type()
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000465 ctype = _splitparam(value)[0].lower()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000466 # RFC 2045, section 5.2 says if its invalid, use text/plain
467 if ctype.count('/') != 1:
468 return 'text/plain'
469 return ctype
470
471 def get_content_maintype(self):
472 """Return the message's main content type.
473
474 This is the `maintype' part of the string returned by
475 get_content_type().
476 """
477 ctype = self.get_content_type()
478 return ctype.split('/')[0]
479
480 def get_content_subtype(self):
481 """Returns the message's sub-content type.
482
483 This is the `subtype' part of the string returned by
484 get_content_type().
485 """
486 ctype = self.get_content_type()
487 return ctype.split('/')[1]
488
489 def get_default_type(self):
490 """Return the `default' content type.
491
492 Most messages have a default content type of text/plain, except for
493 messages that are subparts of multipart/digest containers. Such
494 subparts have a default content type of message/rfc822.
495 """
496 return self._default_type
497
498 def set_default_type(self, ctype):
499 """Set the `default' content type.
500
501 ctype should be either "text/plain" or "message/rfc822", although this
502 is not enforced. The default content type is not stored in the
503 Content-Type header.
504 """
505 self._default_type = ctype
506
507 def _get_params_preserve(self, failobj, header):
508 # Like get_params() but preserves the quoting of values. BAW:
509 # should this be part of the public interface?
510 missing = object()
511 value = self.get(header, missing)
512 if value is missing:
513 return failobj
514 params = []
515 for p in _parseparam(';' + value):
516 try:
517 name, val = p.split('=', 1)
518 name = name.strip()
519 val = val.strip()
520 except ValueError:
521 # Must have been a bare attribute
522 name = p.strip()
523 val = ''
524 params.append((name, val))
525 params = utils.decode_params(params)
526 return params
527
528 def get_params(self, failobj=None, header='content-type', unquote=True):
529 """Return the message's Content-Type parameters, as a list.
530
531 The elements of the returned list are 2-tuples of key/value pairs, as
532 split on the `=' sign. The left hand side of the `=' is the key,
533 while the right hand side is the value. If there is no `=' sign in
534 the parameter the value is the empty string. The value is as
535 described in the get_param() method.
536
537 Optional failobj is the object to return if there is no Content-Type
538 header. Optional header is the header to search instead of
539 Content-Type. If unquote is True, the value is unquoted.
540 """
541 missing = object()
542 params = self._get_params_preserve(missing, header)
543 if params is missing:
544 return failobj
545 if unquote:
546 return [(k, _unquotevalue(v)) for k, v in params]
547 else:
548 return params
549
550 def get_param(self, param, failobj=None, header='content-type',
551 unquote=True):
552 """Return the parameter value if found in the Content-Type header.
553
554 Optional failobj is the object to return if there is no Content-Type
555 header, or the Content-Type header has no such parameter. Optional
556 header is the header to search instead of Content-Type.
557
558 Parameter keys are always compared case insensitively. The return
559 value can either be a string, or a 3-tuple if the parameter was RFC
560 2231 encoded. When it's a 3-tuple, the elements of the value are of
561 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
562 LANGUAGE can be None, in which case you should consider VALUE to be
563 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
564
565 Your application should be prepared to deal with 3-tuple return
566 values, and can convert the parameter to a Unicode string like so:
567
568 param = msg.get_param('foo')
569 if isinstance(param, tuple):
570 param = unicode(param[2], param[0] or 'us-ascii')
571
572 In any case, the parameter value (either the returned string, or the
573 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
574 to False.
575 """
576 if header not in self:
577 return failobj
578 for k, v in self._get_params_preserve(failobj, header):
579 if k.lower() == param.lower():
580 if unquote:
581 return _unquotevalue(v)
582 else:
583 return v
584 return failobj
585
586 def set_param(self, param, value, header='Content-Type', requote=True,
587 charset=None, language=''):
588 """Set a parameter in the Content-Type header.
589
590 If the parameter already exists in the header, its value will be
591 replaced with the new value.
592
593 If header is Content-Type and has not yet been defined for this
594 message, it will be set to "text/plain" and the new parameter and
595 value will be appended as per RFC 2045.
596
597 An alternate header can specified in the header argument, and all
598 parameters will be quoted as necessary unless requote is False.
599
600 If charset is specified, the parameter will be encoded according to RFC
601 2231. Optional language specifies the RFC 2231 language, defaulting
602 to the empty string. Both charset and language should be strings.
603 """
604 if not isinstance(value, tuple) and charset:
605 value = (charset, language, value)
606
607 if header not in self and header.lower() == 'content-type':
608 ctype = 'text/plain'
609 else:
610 ctype = self.get(header)
611 if not self.get_param(param, header=header):
612 if not ctype:
613 ctype = _formatparam(param, value, requote)
614 else:
615 ctype = SEMISPACE.join(
616 [ctype, _formatparam(param, value, requote)])
617 else:
618 ctype = ''
619 for old_param, old_value in self.get_params(header=header,
620 unquote=requote):
621 append_param = ''
622 if old_param.lower() == param.lower():
623 append_param = _formatparam(param, value, requote)
624 else:
625 append_param = _formatparam(old_param, old_value, requote)
626 if not ctype:
627 ctype = append_param
628 else:
629 ctype = SEMISPACE.join([ctype, append_param])
630 if ctype != self.get(header):
631 del self[header]
632 self[header] = ctype
633
634 def del_param(self, param, header='content-type', requote=True):
635 """Remove the given parameter completely from the Content-Type header.
636
637 The header will be re-written in place without the parameter or its
638 value. All values will be quoted as necessary unless requote is
639 False. Optional header specifies an alternative to the Content-Type
640 header.
641 """
642 if header not in self:
643 return
644 new_ctype = ''
645 for p, v in self.get_params(header=header, unquote=requote):
646 if p.lower() != param.lower():
647 if not new_ctype:
648 new_ctype = _formatparam(p, v, requote)
649 else:
650 new_ctype = SEMISPACE.join([new_ctype,
651 _formatparam(p, v, requote)])
652 if new_ctype != self.get(header):
653 del self[header]
654 self[header] = new_ctype
655
656 def set_type(self, type, header='Content-Type', requote=True):
657 """Set the main type and subtype for the Content-Type header.
658
659 type must be a string in the form "maintype/subtype", otherwise a
660 ValueError is raised.
661
662 This method replaces the Content-Type header, keeping all the
663 parameters in place. If requote is False, this leaves the existing
664 header's quoting as is. Otherwise, the parameters will be quoted (the
665 default).
666
667 An alternative header can be specified in the header argument. When
668 the Content-Type header is set, we'll always also add a MIME-Version
669 header.
670 """
671 # BAW: should we be strict?
672 if not type.count('/') == 1:
673 raise ValueError
674 # Set the Content-Type, you get a MIME-Version
675 if header.lower() == 'content-type':
676 del self['mime-version']
677 self['MIME-Version'] = '1.0'
678 if header not in self:
679 self[header] = type
680 return
681 params = self.get_params(header=header, unquote=requote)
682 del self[header]
683 self[header] = type
684 # Skip the first param; it's the old type.
685 for p, v in params[1:]:
686 self.set_param(p, v, header, requote)
687
688 def get_filename(self, failobj=None):
689 """Return the filename associated with the payload if present.
690
691 The filename is extracted from the Content-Disposition header's
692 `filename' parameter, and it is unquoted. If that header is missing
693 the `filename' parameter, this method falls back to looking for the
694 `name' parameter.
695 """
696 missing = object()
697 filename = self.get_param('filename', missing, 'content-disposition')
698 if filename is missing:
R. David Murray290e9392009-10-10 00:57:04 +0000699 filename = self.get_param('name', missing, 'content-type')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000700 if filename is missing:
701 return failobj
702 return utils.collapse_rfc2231_value(filename).strip()
703
704 def get_boundary(self, failobj=None):
705 """Return the boundary associated with the payload if present.
706
707 The boundary is extracted from the Content-Type header's `boundary'
708 parameter, and it is unquoted.
709 """
710 missing = object()
711 boundary = self.get_param('boundary', missing)
712 if boundary is missing:
713 return failobj
714 # RFC 2046 says that boundaries may begin but not end in w/s
715 return utils.collapse_rfc2231_value(boundary).rstrip()
716
717 def set_boundary(self, boundary):
718 """Set the boundary parameter in Content-Type to 'boundary'.
719
720 This is subtly different than deleting the Content-Type header and
721 adding a new one with a new boundary parameter via add_header(). The
722 main difference is that using the set_boundary() method preserves the
723 order of the Content-Type header in the original message.
724
725 HeaderParseError is raised if the message has no Content-Type header.
726 """
727 missing = object()
728 params = self._get_params_preserve(missing, 'content-type')
729 if params is missing:
730 # There was no Content-Type header, and we don't know what type
731 # to set it to, so raise an exception.
732 raise errors.HeaderParseError('No Content-Type header found')
733 newparams = []
734 foundp = False
735 for pk, pv in params:
736 if pk.lower() == 'boundary':
737 newparams.append(('boundary', '"%s"' % boundary))
738 foundp = True
739 else:
740 newparams.append((pk, pv))
741 if not foundp:
742 # The original Content-Type header had no boundary attribute.
743 # Tack one on the end. BAW: should we raise an exception
744 # instead???
745 newparams.append(('boundary', '"%s"' % boundary))
746 # Replace the existing Content-Type header with the new value
747 newheaders = []
748 for h, v in self._headers:
749 if h.lower() == 'content-type':
750 parts = []
751 for k, v in newparams:
752 if v == '':
753 parts.append(k)
754 else:
755 parts.append('%s=%s' % (k, v))
756 newheaders.append((h, SEMISPACE.join(parts)))
757
758 else:
759 newheaders.append((h, v))
760 self._headers = newheaders
761
762 def get_content_charset(self, failobj=None):
763 """Return the charset parameter of the Content-Type header.
764
765 The returned string is always coerced to lower case. If there is no
766 Content-Type header, or if that header has no charset parameter,
767 failobj is returned.
768 """
769 missing = object()
770 charset = self.get_param('charset', missing)
771 if charset is missing:
772 return failobj
773 if isinstance(charset, tuple):
774 # RFC 2231 encoded, so decode it, and it better end up as ascii.
775 pcharset = charset[0] or 'us-ascii'
776 try:
777 # LookupError will be raised if the charset isn't known to
778 # Python. UnicodeError will be raised if the encoded text
779 # contains a character not in the charset.
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000780 as_bytes = charset[2].encode('raw-unicode-escape')
781 charset = str(as_bytes, pcharset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000782 except (LookupError, UnicodeError):
783 charset = charset[2]
784 # charset characters must be in us-ascii range
785 try:
786 charset.encode('us-ascii')
787 except UnicodeError:
788 return failobj
789 # RFC 2046, $4.1.2 says charsets are not case sensitive
790 return charset.lower()
791
792 def get_charsets(self, failobj=None):
793 """Return a list containing the charset(s) used in this message.
794
795 The returned list of items describes the Content-Type headers'
796 charset parameter for this message and all the subparts in its
797 payload.
798
799 Each item will either be a string (the value of the charset parameter
800 in the Content-Type header of that part) or the value of the
801 'failobj' parameter (defaults to None), if the part does not have a
802 main MIME type of "text", or the charset is not defined.
803
804 The list will contain one string for each part of the message, plus
805 one for the container message (i.e. self), so that a non-multipart
806 message will still return a list of length 1.
807 """
808 return [part.get_content_charset(failobj) for part in self.walk()]
809
810 # I.e. def walk(self): ...
811 from email.iterators import walk