blob: 8b4f8bdb5b74619a103df74a4119d610dc71fe2e [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw409a4c02002-04-10 21:01:31 +00008import warnings
Barry Warsawba925802001-09-23 03:17:28 +00009from cStringIO import StringIO
Barry Warsaw908dc4b2002-06-29 05:56:15 +000010from types import ListType, TupleType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000011
Barry Warsawba925802001-09-23 03:17:28 +000012# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000013from email import Errors
14from email import Utils
15from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000016
Barry Warsawbeb59452001-09-26 05:41:51 +000017SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000018
19# Regular expression used to split header parameters. BAW: this may be too
20# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
21# most headers found in the wild. We may eventually need a full fledged
22# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000023paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000024# Regular expression that matches `special' characters in parameters, the
25# existance of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29
Barry Warsaw908dc4b2002-06-29 05:56:15 +000030# Helper functions
Barry Warsaw409a4c02002-04-10 21:01:31 +000031def _formatparam(param, value=None, quote=1):
32 """Convenience function to format and return a key=value pair.
33
Barry Warsaw908dc4b2002-06-29 05:56:15 +000034 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000035 """
36 if value is not None and len(value) > 0:
Barry Warsaw908dc4b2002-06-29 05:56:15 +000037 # TupleType is used for RFC 2231 encoded parameter values where items
38 # are (charset, language, value). charset is a string, not a Charset
39 # instance.
40 if isinstance(value, TupleType):
41 # Convert to ascii, ignore language
42 value = unicode(value[2], value[0]).encode("ascii")
Barry Warsaw409a4c02002-04-10 21:01:31 +000043 # BAW: Please check this. I think that if quote is set it should
44 # force quoting even if not necessary.
45 if quote or tspecials.search(value):
46 return '%s="%s"' % (param, Utils.quote(value))
47 else:
48 return '%s=%s' % (param, value)
49 else:
50 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000051
Barry Warsawba925802001-09-23 03:17:28 +000052
Barry Warsaw908dc4b2002-06-29 05:56:15 +000053def _unquotevalue(value):
54 if isinstance(value, TupleType):
Tim Peters280488b2002-08-23 18:19:30 +000055 return (value[0], value[1], Utils.unquote(value[2]))
Barry Warsaw908dc4b2002-06-29 05:56:15 +000056 else:
Tim Peters280488b2002-08-23 18:19:30 +000057 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000058
59
Tim Peters280488b2002-08-23 18:19:30 +000060
Barry Warsawba925802001-09-23 03:17:28 +000061class Message:
62 """Basic message object for use inside the object tree.
63
64 A message object is defined as something that has a bunch of RFC 2822
65 headers and a payload. If the body of the message is a multipart, then
66 the payload is a list of Messages, otherwise it is a string.
67
68 These objects implement part of the `mapping' interface, which assumes
69 there is exactly one occurrance of the header per message. Some headers
70 do in fact appear multiple times (e.g. Received:) and for those headers,
71 you must use the explicit API to set or get all the headers. Not all of
72 the mapping methods are implemented.
73
74 """
75 def __init__(self):
76 self._headers = []
77 self._unixfrom = None
78 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +000079 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +000080 # Defaults for multipart messages
81 self.preamble = self.epilogue = None
Barry Warsawa0c8b9d2002-07-09 02:46:12 +000082 # Default content type
83 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +000084
85 def __str__(self):
86 """Return the entire formatted message as a string.
87 This includes the headers, body, and `unixfrom' line.
88 """
89 return self.as_string(unixfrom=1)
90
91 def as_string(self, unixfrom=0):
92 """Return the entire formatted message as a string.
93 Optional `unixfrom' when true, means include the Unix From_ envelope
94 header.
95 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +000096 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +000097 fp = StringIO()
98 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +000099 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000100 return fp.getvalue()
101
102 def is_multipart(self):
103 """Return true if the message consists of multiple parts."""
104 if type(self._payload) is ListType:
105 return 1
106 return 0
107
108 #
109 # Unix From_ line
110 #
111 def set_unixfrom(self, unixfrom):
112 self._unixfrom = unixfrom
113
114 def get_unixfrom(self):
115 return self._unixfrom
116
117 #
118 # Payload manipulation.
119 #
120 def add_payload(self, payload):
121 """Add the given payload to the current payload.
122
123 If the current payload is empty, then the current payload will be made
124 a scalar, set to the given value.
125 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000126 warnings.warn('add_payload() is deprecated, use attach() instead.',
127 DeprecationWarning, 2)
Barry Warsawba925802001-09-23 03:17:28 +0000128 if self._payload is None:
129 self._payload = payload
130 elif type(self._payload) is ListType:
131 self._payload.append(payload)
132 elif self.get_main_type() not in (None, 'multipart'):
133 raise Errors.MultipartConversionError(
134 'Message main Content-Type: must be "multipart" or missing')
135 else:
136 self._payload = [self._payload, payload]
137
Barry Warsaw409a4c02002-04-10 21:01:31 +0000138 def attach(self, payload):
139 """Add the given payload to the current payload.
140
141 The current payload will always be a list of objects after this method
142 is called. If you want to set the payload to a scalar object
143 (e.g. because you're attaching a message/rfc822 subpart), use
144 set_payload() instead.
145 """
146 if self._payload is None:
147 self._payload = [payload]
148 else:
149 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000150
151 def get_payload(self, i=None, decode=0):
152 """Return the current payload exactly as is.
153
154 Optional i returns that index into the payload.
155
156 Optional decode is a flag indicating whether the payload should be
157 decoded or not, according to the Content-Transfer-Encoding: header.
158 When true and the message is not a multipart, the payload will be
159 decoded if this header's value is `quoted-printable' or `base64'. If
160 some other encoding is used, or the header is missing, the payload is
161 returned as-is (undecoded). If the message is a multipart and the
162 decode flag is true, then None is returned.
163 """
164 if i is None:
165 payload = self._payload
166 elif type(self._payload) is not ListType:
167 raise TypeError, i
168 else:
169 payload = self._payload[i]
170 if decode:
171 if self.is_multipart():
172 return None
173 cte = self.get('content-transfer-encoding', '')
174 if cte.lower() == 'quoted-printable':
175 return Utils._qdecode(payload)
176 elif cte.lower() == 'base64':
177 return Utils._bdecode(payload)
178 # Everything else, including encodings with 8bit or 7bit are returned
179 # unchanged.
180 return payload
181
182
Barry Warsaw409a4c02002-04-10 21:01:31 +0000183 def set_payload(self, payload, charset=None):
184 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000185
Barry Warsaw409a4c02002-04-10 21:01:31 +0000186 Optionally set the charset, which must be a Charset instance."""
187 self._payload = payload
188 if charset is not None:
189 self.set_charset(charset)
190
191 def set_charset(self, charset):
192 """Set the charset of the payload to a given character set.
193
194 charset can be a string or a Charset object. If it is a string, it
195 will be converted to a Charset object by calling Charset's
196 constructor. If charset is None, the charset parameter will be
197 removed from the Content-Type: field. Anything else will generate a
198 TypeError.
199
200 The message will be assumed to be a text message encoded with
201 charset.input_charset. It will be converted to charset.output_charset
202 and encoded properly, if needed, when generating the plain text
203 representation of the message. MIME headers (MIME-Version,
204 Content-Type, Content-Transfer-Encoding) will be added as needed.
205 """
206 if charset is None:
207 self.del_param('charset')
208 self._charset = None
209 return
210 if isinstance(charset, StringType):
211 charset = Charset.Charset(charset)
212 if not isinstance(charset, Charset.Charset):
213 raise TypeError, charset
214 # BAW: should we accept strings that can serve as arguments to the
215 # Charset constructor?
216 self._charset = charset
217 if not self.has_key('MIME-Version'):
218 self.add_header('MIME-Version', '1.0')
219 if not self.has_key('Content-Type'):
220 self.add_header('Content-Type', 'text/plain',
221 charset=charset.get_output_charset())
222 else:
223 self.set_param('charset', charset.get_output_charset())
224 if not self.has_key('Content-Transfer-Encoding'):
225 cte = charset.get_body_encoding()
226 if callable(cte):
227 cte(self)
228 else:
229 self.add_header('Content-Transfer-Encoding', cte)
230
231 def get_charset(self):
232 """Return the Charset object associated with the message's payload."""
233 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000234
Barry Warsawba925802001-09-23 03:17:28 +0000235 #
236 # MAPPING INTERFACE (partial)
237 #
238 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000239 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000240 return len(self._headers)
241
242 def __getitem__(self, name):
243 """Get a header value.
244
245 Return None if the header is missing instead of raising an exception.
246
247 Note that if the header appeared multiple times, exactly which
248 occurrance gets returned is undefined. Use getall() to get all
249 the values matching a header field name.
250 """
251 return self.get(name)
252
253 def __setitem__(self, name, val):
254 """Set the value of a header.
255
256 Note: this does not overwrite an existing header with the same field
257 name. Use __delitem__() first to delete any existing headers.
258 """
259 self._headers.append((name, val))
260
261 def __delitem__(self, name):
262 """Delete all occurrences of a header, if present.
263
264 Does not raise an exception if the header is missing.
265 """
266 name = name.lower()
267 newheaders = []
268 for k, v in self._headers:
269 if k.lower() <> name:
270 newheaders.append((k, v))
271 self._headers = newheaders
272
273 def __contains__(self, key):
274 return key.lower() in [k.lower() for k, v in self._headers]
275
276 def has_key(self, name):
277 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000278 missing = []
279 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000280
281 def keys(self):
282 """Return a list of all the message's header field names.
283
284 These will be sorted in the order they appeared in the original
285 message, and may contain duplicates. Any fields deleted and
286 re-inserted are always appended to the header list.
287 """
288 return [k for k, v in self._headers]
289
290 def values(self):
291 """Return a list of all the message's header values.
292
293 These will be sorted in the order they appeared in the original
294 message, and may contain duplicates. Any fields deleted and
Barry Warsawbf7c52c2001-11-24 16:56:56 +0000295 re-inserted are always appended to the header list.
Barry Warsawba925802001-09-23 03:17:28 +0000296 """
297 return [v for k, v in self._headers]
298
299 def items(self):
300 """Get all the message's header fields and values.
301
302 These will be sorted in the order they appeared in the original
303 message, and may contain duplicates. Any fields deleted and
Barry Warsawbf7c52c2001-11-24 16:56:56 +0000304 re-inserted are always appended to the header list.
Barry Warsawba925802001-09-23 03:17:28 +0000305 """
306 return self._headers[:]
307
308 def get(self, name, failobj=None):
309 """Get a header value.
310
311 Like __getitem__() but return failobj instead of None when the field
312 is missing.
313 """
314 name = name.lower()
315 for k, v in self._headers:
316 if k.lower() == name:
317 return v
318 return failobj
319
320 #
321 # Additional useful stuff
322 #
323
324 def get_all(self, name, failobj=None):
325 """Return a list of all the values for the named field.
326
327 These will be sorted in the order they appeared in the original
328 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000329 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000330
331 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000332 """
333 values = []
334 name = name.lower()
335 for k, v in self._headers:
336 if k.lower() == name:
337 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000338 if not values:
339 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000340 return values
341
342 def add_header(self, _name, _value, **_params):
343 """Extended header setting.
344
345 name is the header field to add. keyword arguments can be used to set
346 additional parameters for the header field, with underscores converted
347 to dashes. Normally the parameter will be added as key="value" unless
348 value is None, in which case only the key will be added.
349
350 Example:
351
352 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
353
354 """
355 parts = []
356 for k, v in _params.items():
357 if v is None:
358 parts.append(k.replace('_', '-'))
359 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000360 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000361 if _value is not None:
362 parts.insert(0, _value)
363 self._headers.append((_name, SEMISPACE.join(parts)))
364
Barry Warsawc1068642002-07-19 22:24:55 +0000365 #
366 # These methods are silently deprecated in favor of get_content_type() and
367 # friends (see below). They will be noisily deprecated in email 3.0.
368 #
369
Barry Warsawba925802001-09-23 03:17:28 +0000370 def get_type(self, failobj=None):
371 """Returns the message's content type.
372
373 The returned string is coerced to lowercase and returned as a single
374 string of the form `maintype/subtype'. If there was no Content-Type:
375 header in the message, failobj is returned (defaults to None).
376 """
377 missing = []
378 value = self.get('content-type', missing)
379 if value is missing:
380 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000381 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000382
383 def get_main_type(self, failobj=None):
384 """Return the message's main content type if present."""
385 missing = []
386 ctype = self.get_type(missing)
387 if ctype is missing:
388 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000389 if ctype.count('/') <> 1:
390 return failobj
391 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000392
393 def get_subtype(self, failobj=None):
394 """Return the message's content subtype if present."""
395 missing = []
396 ctype = self.get_type(missing)
397 if ctype is missing:
398 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000399 if ctype.count('/') <> 1:
400 return failobj
401 return ctype.split('/')[1]
402
403 #
404 # Use these three methods instead of the three above.
405 #
406
407 def get_content_type(self):
408 """Returns the message's content type.
409
410 The returned string is coerced to lowercase and returned as a ingle
411 string of the form `maintype/subtype'. If there was no Content-Type:
412 header in the message, the default type as give by get_default_type()
413 will be returned. Since messages always have a default type this will
414 always return a value.
415
416 The current state of RFC standards define a message's default type to
417 be text/plain unless it appears inside a multipart/digest container,
418 in which case it would be message/rfc822.
419 """
420 missing = []
421 value = self.get('content-type', missing)
422 if value is missing:
423 # This should have no parameters
424 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000425 ctype = paramre.split(value)[0].lower().strip()
426 # RFC 2045, section 5.2 says if its invalid, use text/plain
427 if ctype.count('/') <> 1:
428 return 'text/plain'
429 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000430
431 def get_content_maintype(self):
432 """Returns the message's main content type.
433
434 This is the `maintype' part of the string returned by
435 get_content_type(). If no slash is found in the full content type, a
436 ValueError is raised.
437 """
438 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000439 return ctype.split('/')[0]
440
441 def get_content_subtype(self):
442 """Returns the message's sub content type.
443
444 This is the `subtype' part of the string returned by
445 get_content_type(). If no slash is found in the full content type, a
446 ValueError is raised.
447 """
448 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000449 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000450
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000451 def get_default_type(self):
452 """Return the `default' content type.
453
454 Most messages have a default content type of text/plain, except for
455 messages that are subparts of multipart/digest containers. Such
456 subparts then have a default content type of message/rfc822.
457 """
458 return self._default_type
459
460 def set_default_type(self, ctype):
461 """Set the `default' content type.
462
Barry Warsawc1068642002-07-19 22:24:55 +0000463 ctype should be either "text/plain" or "message/rfc822", although this
464 is not enforced. The default content type is not stored in the
465 Content-Type: header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000466 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000467 self._default_type = ctype
468
Barry Warsawbeb59452001-09-26 05:41:51 +0000469 def _get_params_preserve(self, failobj, header):
470 # Like get_params() but preserves the quoting of values. BAW:
471 # should this be part of the public interface?
472 missing = []
473 value = self.get(header, missing)
474 if value is missing:
475 return failobj
476 params = []
477 for p in paramre.split(value):
478 try:
479 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000480 name = name.strip()
481 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000482 except ValueError:
483 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000484 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000485 val = ''
486 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000487 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000488 return params
489
Barry Warsaw409a4c02002-04-10 21:01:31 +0000490 def get_params(self, failobj=None, header='content-type', unquote=1):
Barry Warsawba925802001-09-23 03:17:28 +0000491 """Return the message's Content-Type: parameters, as a list.
492
Barry Warsawbeb59452001-09-26 05:41:51 +0000493 The elements of the returned list are 2-tuples of key/value pairs, as
494 split on the `=' sign. The left hand side of the `=' is the key,
495 while the right hand side is the value. If there is no `=' sign in
496 the parameter the value is the empty string. The value is always
Barry Warsaw409a4c02002-04-10 21:01:31 +0000497 unquoted, unless unquote is set to a false value.
Barry Warsawbeb59452001-09-26 05:41:51 +0000498
Barry Warsawba925802001-09-23 03:17:28 +0000499 Optional failobj is the object to return if there is no Content-Type:
500 header. Optional header is the header to search instead of
Barry Warsaw409a4c02002-04-10 21:01:31 +0000501 Content-Type:.
Barry Warsawba925802001-09-23 03:17:28 +0000502 """
503 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000504 params = self._get_params_preserve(missing, header)
505 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000506 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000507 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000508 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000509 else:
510 return params
Barry Warsawba925802001-09-23 03:17:28 +0000511
Barry Warsaw409a4c02002-04-10 21:01:31 +0000512 def get_param(self, param, failobj=None, header='content-type', unquote=1):
Barry Warsawba925802001-09-23 03:17:28 +0000513 """Return the parameter value if found in the Content-Type: header.
514
515 Optional failobj is the object to return if there is no Content-Type:
516 header. Optional header is the header to search instead of
517 Content-Type:
Barry Warsawbeb59452001-09-26 05:41:51 +0000518
519 Parameter keys are always compared case insensitively. Values are
Barry Warsaw409a4c02002-04-10 21:01:31 +0000520 always unquoted, unless unquote is set to a false value.
Barry Warsawba925802001-09-23 03:17:28 +0000521 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000522 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000523 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000524 for k, v in self._get_params_preserve(failobj, header):
525 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000526 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000527 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000528 else:
529 return v
Barry Warsawba925802001-09-23 03:17:28 +0000530 return failobj
531
Barry Warsaw409a4c02002-04-10 21:01:31 +0000532 def set_param(self, param, value, header='Content-Type', requote=1):
533 """Set a parameter in the Content-Type: header.
534
535 If the parameter already exists in the header, its value will be
536 replaced with the new value.
537
538 If header is Content-Type: and has not yet been defined in this
539 message, it will be set to "text/plain" and the new parameter and
540 value will be appended, as per RFC 2045.
541
542 An alternate header can specified in the header argument, and
543 all parameters will be quoted as appropriate unless requote is
544 set to a false value.
545 """
546 if not self.has_key(header) and header.lower() == 'content-type':
547 ctype = 'text/plain'
548 else:
549 ctype = self.get(header)
550 if not self.get_param(param, header=header):
551 if not ctype:
552 ctype = _formatparam(param, value, requote)
553 else:
554 ctype = SEMISPACE.join(
555 [ctype, _formatparam(param, value, requote)])
556 else:
557 ctype = ''
558 for old_param, old_value in self.get_params(header=header,
559 unquote=requote):
560 append_param = ''
561 if old_param.lower() == param.lower():
562 append_param = _formatparam(param, value, requote)
563 else:
564 append_param = _formatparam(old_param, old_value, requote)
565 if not ctype:
566 ctype = append_param
567 else:
568 ctype = SEMISPACE.join([ctype, append_param])
569 if ctype <> self.get(header):
570 del self[header]
571 self[header] = ctype
572
573 def del_param(self, param, header='content-type', requote=1):
574 """Remove the given parameter completely from the Content-Type header.
575
576 The header will be re-written in place without param or its value.
577 All values will be quoted as appropriate unless requote is set to a
578 false value.
579 """
580 if not self.has_key(header):
581 return
582 new_ctype = ''
583 for p, v in self.get_params(header, unquote=requote):
584 if p.lower() <> param.lower():
585 if not new_ctype:
586 new_ctype = _formatparam(p, v, requote)
587 else:
588 new_ctype = SEMISPACE.join([new_ctype,
589 _formatparam(p, v, requote)])
590 if new_ctype <> self.get(header):
591 del self[header]
592 self[header] = new_ctype
593
594 def set_type(self, type, header='Content-Type', requote=1):
595 """Set the main type and subtype for the Content-Type: header.
596
597 type must be a string in the form "maintype/subtype", otherwise a
598 ValueError is raised.
599
600 This method replaces the Content-Type: header, keeping all the
601 parameters in place. If requote is false, this leaves the existing
602 header's quoting as is. Otherwise, the parameters will be quoted (the
603 default).
604
605 An alternate header can be specified in the header argument. When the
606 Content-Type: header is set, we'll always also add a MIME-Version:
607 header.
608 """
609 # BAW: should we be strict?
610 if not type.count('/') == 1:
611 raise ValueError
612 # Set the Content-Type: you get a MIME-Version:
613 if header.lower() == 'content-type':
614 del self['mime-version']
615 self['MIME-Version'] = '1.0'
616 if not self.has_key(header):
617 self[header] = type
618 return
619 params = self.get_params(header, unquote=requote)
620 del self[header]
621 self[header] = type
622 # Skip the first param; it's the old type.
623 for p, v in params[1:]:
624 self.set_param(p, v, header, requote)
625
Barry Warsawba925802001-09-23 03:17:28 +0000626 def get_filename(self, failobj=None):
627 """Return the filename associated with the payload if present.
628
629 The filename is extracted from the Content-Disposition: header's
630 `filename' parameter, and it is unquoted.
631 """
632 missing = []
633 filename = self.get_param('filename', missing, 'content-disposition')
634 if filename is missing:
635 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000636 if isinstance(filename, TupleType):
637 # It's an RFC 2231 encoded parameter
638 newvalue = _unquotevalue(filename)
639 return unicode(newvalue[2], newvalue[0])
640 else:
641 newvalue = _unquotevalue(filename.strip())
642 return newvalue
Barry Warsawba925802001-09-23 03:17:28 +0000643
644 def get_boundary(self, failobj=None):
645 """Return the boundary associated with the payload if present.
646
647 The boundary is extracted from the Content-Type: header's `boundary'
648 parameter, and it is unquoted.
649 """
650 missing = []
651 boundary = self.get_param('boundary', missing)
652 if boundary is missing:
653 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000654 return _unquotevalue(boundary.strip())
Barry Warsawba925802001-09-23 03:17:28 +0000655
656 def set_boundary(self, boundary):
657 """Set the boundary parameter in Content-Type: to 'boundary'.
658
659 This is subtly different than deleting the Content-Type: header and
660 adding a new one with a new boundary parameter via add_header(). The
661 main difference is that using the set_boundary() method preserves the
662 order of the Content-Type: header in the original message.
663
664 HeaderParseError is raised if the message has no Content-Type: header.
665 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000666 missing = []
667 params = self._get_params_preserve(missing, 'content-type')
668 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000669 # There was no Content-Type: header, and we don't know what type
670 # to set it to, so raise an exception.
671 raise Errors.HeaderParseError, 'No Content-Type: header found'
672 newparams = []
673 foundp = 0
Barry Warsawbeb59452001-09-26 05:41:51 +0000674 for pk, pv in params:
675 if pk.lower() == 'boundary':
676 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000677 foundp = 1
678 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000679 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000680 if not foundp:
681 # The original Content-Type: header had no boundary attribute.
682 # Tack one one the end. BAW: should we raise an exception
683 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000684 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000685 # Replace the existing Content-Type: header with the new value
686 newheaders = []
687 for h, v in self._headers:
688 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000689 parts = []
690 for k, v in newparams:
691 if v == '':
692 parts.append(k)
693 else:
694 parts.append('%s=%s' % (k, v))
695 newheaders.append((h, SEMISPACE.join(parts)))
696
Barry Warsawba925802001-09-23 03:17:28 +0000697 else:
698 newheaders.append((h, v))
699 self._headers = newheaders
700
Barry Warsaw8c1aac22002-05-19 23:44:19 +0000701 try:
702 from email._compat22 import walk
703 except SyntaxError:
704 # Must be using Python 2.1
705 from email._compat21 import walk
Barry Warsawba925802001-09-23 03:17:28 +0000706
707 def get_charsets(self, failobj=None):
708 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000709
Barry Warsawba925802001-09-23 03:17:28 +0000710 The returned list of items describes the Content-Type: headers'
711 charset parameter for this message and all the subparts in its
712 payload.
713
714 Each item will either be a string (the value of the charset parameter
715 in the Content-Type: header of that part) or the value of the
716 'failobj' parameter (defaults to None), if the part does not have a
717 main MIME type of "text", or the charset is not defined.
718
719 The list will contain one string for each part of the message, plus
720 one for the container message (i.e. self), so that a non-multipart
721 message will still return a list of length 1.
722 """
723 return [part.get_param('charset', failobj) for part in self.walk()]