blob: 0a8d90be7ad692ed519750cd23eb4f82821bf5d7 [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw409a4c02002-04-10 21:01:31 +00008import warnings
Barry Warsawba925802001-09-23 03:17:28 +00009from cStringIO import StringIO
Barry Warsaw908dc4b2002-06-29 05:56:15 +000010from types import ListType, TupleType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000011
Barry Warsawba925802001-09-23 03:17:28 +000012# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000013from email import Errors
14from email import Utils
15from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000016
Barry Warsawbeb59452001-09-26 05:41:51 +000017SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000018
19# Regular expression used to split header parameters. BAW: this may be too
20# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
21# most headers found in the wild. We may eventually need a full fledged
22# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000023paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000024# Regular expression that matches `special' characters in parameters, the
25# existance of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29
Barry Warsaw908dc4b2002-06-29 05:56:15 +000030# Helper functions
Barry Warsaw409a4c02002-04-10 21:01:31 +000031def _formatparam(param, value=None, quote=1):
32 """Convenience function to format and return a key=value pair.
33
Barry Warsaw908dc4b2002-06-29 05:56:15 +000034 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000035 """
36 if value is not None and len(value) > 0:
Barry Warsaw908dc4b2002-06-29 05:56:15 +000037 # TupleType is used for RFC 2231 encoded parameter values where items
38 # are (charset, language, value). charset is a string, not a Charset
39 # instance.
40 if isinstance(value, TupleType):
41 # Convert to ascii, ignore language
42 value = unicode(value[2], value[0]).encode("ascii")
Barry Warsaw409a4c02002-04-10 21:01:31 +000043 # BAW: Please check this. I think that if quote is set it should
44 # force quoting even if not necessary.
45 if quote or tspecials.search(value):
46 return '%s="%s"' % (param, Utils.quote(value))
47 else:
48 return '%s=%s' % (param, value)
49 else:
50 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000051
Barry Warsawba925802001-09-23 03:17:28 +000052
Barry Warsaw908dc4b2002-06-29 05:56:15 +000053def _unquotevalue(value):
54 if isinstance(value, TupleType):
Tim Peters280488b2002-08-23 18:19:30 +000055 return (value[0], value[1], Utils.unquote(value[2]))
Barry Warsaw908dc4b2002-06-29 05:56:15 +000056 else:
Tim Peters280488b2002-08-23 18:19:30 +000057 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000058
59
Barry Warsaw48b0d362002-08-27 22:34:44 +000060
Barry Warsawba925802001-09-23 03:17:28 +000061class Message:
62 """Basic message object for use inside the object tree.
63
64 A message object is defined as something that has a bunch of RFC 2822
65 headers and a payload. If the body of the message is a multipart, then
66 the payload is a list of Messages, otherwise it is a string.
67
68 These objects implement part of the `mapping' interface, which assumes
69 there is exactly one occurrance of the header per message. Some headers
70 do in fact appear multiple times (e.g. Received:) and for those headers,
71 you must use the explicit API to set or get all the headers. Not all of
72 the mapping methods are implemented.
73
74 """
75 def __init__(self):
76 self._headers = []
77 self._unixfrom = None
78 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +000079 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +000080 # Defaults for multipart messages
81 self.preamble = self.epilogue = None
Barry Warsawa0c8b9d2002-07-09 02:46:12 +000082 # Default content type
83 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +000084
85 def __str__(self):
86 """Return the entire formatted message as a string.
87 This includes the headers, body, and `unixfrom' line.
88 """
89 return self.as_string(unixfrom=1)
90
91 def as_string(self, unixfrom=0):
92 """Return the entire formatted message as a string.
93 Optional `unixfrom' when true, means include the Unix From_ envelope
94 header.
95 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +000096 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +000097 fp = StringIO()
98 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +000099 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000100 return fp.getvalue()
101
102 def is_multipart(self):
103 """Return true if the message consists of multiple parts."""
104 if type(self._payload) is ListType:
105 return 1
106 return 0
107
108 #
109 # Unix From_ line
110 #
111 def set_unixfrom(self, unixfrom):
112 self._unixfrom = unixfrom
113
114 def get_unixfrom(self):
115 return self._unixfrom
116
117 #
118 # Payload manipulation.
119 #
120 def add_payload(self, payload):
121 """Add the given payload to the current payload.
122
123 If the current payload is empty, then the current payload will be made
124 a scalar, set to the given value.
125 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000126 warnings.warn('add_payload() is deprecated, use attach() instead.',
127 DeprecationWarning, 2)
Barry Warsawba925802001-09-23 03:17:28 +0000128 if self._payload is None:
129 self._payload = payload
130 elif type(self._payload) is ListType:
131 self._payload.append(payload)
132 elif self.get_main_type() not in (None, 'multipart'):
133 raise Errors.MultipartConversionError(
134 'Message main Content-Type: must be "multipart" or missing')
135 else:
136 self._payload = [self._payload, payload]
137
Barry Warsaw409a4c02002-04-10 21:01:31 +0000138 def attach(self, payload):
139 """Add the given payload to the current payload.
140
141 The current payload will always be a list of objects after this method
142 is called. If you want to set the payload to a scalar object
143 (e.g. because you're attaching a message/rfc822 subpart), use
144 set_payload() instead.
145 """
146 if self._payload is None:
147 self._payload = [payload]
148 else:
149 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000150
151 def get_payload(self, i=None, decode=0):
152 """Return the current payload exactly as is.
153
154 Optional i returns that index into the payload.
155
156 Optional decode is a flag indicating whether the payload should be
157 decoded or not, according to the Content-Transfer-Encoding: header.
158 When true and the message is not a multipart, the payload will be
159 decoded if this header's value is `quoted-printable' or `base64'. If
160 some other encoding is used, or the header is missing, the payload is
161 returned as-is (undecoded). If the message is a multipart and the
162 decode flag is true, then None is returned.
163 """
164 if i is None:
165 payload = self._payload
166 elif type(self._payload) is not ListType:
167 raise TypeError, i
168 else:
169 payload = self._payload[i]
170 if decode:
171 if self.is_multipart():
172 return None
173 cte = self.get('content-transfer-encoding', '')
174 if cte.lower() == 'quoted-printable':
175 return Utils._qdecode(payload)
176 elif cte.lower() == 'base64':
177 return Utils._bdecode(payload)
178 # Everything else, including encodings with 8bit or 7bit are returned
179 # unchanged.
180 return payload
181
182
Barry Warsaw409a4c02002-04-10 21:01:31 +0000183 def set_payload(self, payload, charset=None):
184 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000185
Barry Warsaw409a4c02002-04-10 21:01:31 +0000186 Optionally set the charset, which must be a Charset instance."""
187 self._payload = payload
188 if charset is not None:
189 self.set_charset(charset)
190
191 def set_charset(self, charset):
192 """Set the charset of the payload to a given character set.
193
194 charset can be a string or a Charset object. If it is a string, it
195 will be converted to a Charset object by calling Charset's
196 constructor. If charset is None, the charset parameter will be
197 removed from the Content-Type: field. Anything else will generate a
198 TypeError.
199
200 The message will be assumed to be a text message encoded with
201 charset.input_charset. It will be converted to charset.output_charset
202 and encoded properly, if needed, when generating the plain text
203 representation of the message. MIME headers (MIME-Version,
204 Content-Type, Content-Transfer-Encoding) will be added as needed.
205 """
206 if charset is None:
207 self.del_param('charset')
208 self._charset = None
209 return
210 if isinstance(charset, StringType):
211 charset = Charset.Charset(charset)
212 if not isinstance(charset, Charset.Charset):
213 raise TypeError, charset
214 # BAW: should we accept strings that can serve as arguments to the
215 # Charset constructor?
216 self._charset = charset
217 if not self.has_key('MIME-Version'):
218 self.add_header('MIME-Version', '1.0')
219 if not self.has_key('Content-Type'):
220 self.add_header('Content-Type', 'text/plain',
221 charset=charset.get_output_charset())
222 else:
223 self.set_param('charset', charset.get_output_charset())
224 if not self.has_key('Content-Transfer-Encoding'):
225 cte = charset.get_body_encoding()
226 if callable(cte):
227 cte(self)
228 else:
229 self.add_header('Content-Transfer-Encoding', cte)
230
231 def get_charset(self):
232 """Return the Charset object associated with the message's payload."""
233 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000234
Barry Warsawba925802001-09-23 03:17:28 +0000235 #
236 # MAPPING INTERFACE (partial)
237 #
238 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000239 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000240 return len(self._headers)
241
242 def __getitem__(self, name):
243 """Get a header value.
244
245 Return None if the header is missing instead of raising an exception.
246
247 Note that if the header appeared multiple times, exactly which
248 occurrance gets returned is undefined. Use getall() to get all
249 the values matching a header field name.
250 """
251 return self.get(name)
252
253 def __setitem__(self, name, val):
254 """Set the value of a header.
255
256 Note: this does not overwrite an existing header with the same field
257 name. Use __delitem__() first to delete any existing headers.
258 """
259 self._headers.append((name, val))
260
261 def __delitem__(self, name):
262 """Delete all occurrences of a header, if present.
263
264 Does not raise an exception if the header is missing.
265 """
266 name = name.lower()
267 newheaders = []
268 for k, v in self._headers:
269 if k.lower() <> name:
270 newheaders.append((k, v))
271 self._headers = newheaders
272
273 def __contains__(self, key):
274 return key.lower() in [k.lower() for k, v in self._headers]
275
276 def has_key(self, name):
277 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000278 missing = []
279 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000280
281 def keys(self):
282 """Return a list of all the message's header field names.
283
284 These will be sorted in the order they appeared in the original
285 message, and may contain duplicates. Any fields deleted and
286 re-inserted are always appended to the header list.
287 """
288 return [k for k, v in self._headers]
289
290 def values(self):
291 """Return a list of all the message's header values.
292
293 These will be sorted in the order they appeared in the original
294 message, and may contain duplicates. Any fields deleted and
Barry Warsawbf7c52c2001-11-24 16:56:56 +0000295 re-inserted are always appended to the header list.
Barry Warsawba925802001-09-23 03:17:28 +0000296 """
297 return [v for k, v in self._headers]
298
299 def items(self):
300 """Get all the message's header fields and values.
301
302 These will be sorted in the order they appeared in the original
303 message, and may contain duplicates. Any fields deleted and
Barry Warsawbf7c52c2001-11-24 16:56:56 +0000304 re-inserted are always appended to the header list.
Barry Warsawba925802001-09-23 03:17:28 +0000305 """
306 return self._headers[:]
307
308 def get(self, name, failobj=None):
309 """Get a header value.
310
311 Like __getitem__() but return failobj instead of None when the field
312 is missing.
313 """
314 name = name.lower()
315 for k, v in self._headers:
316 if k.lower() == name:
317 return v
318 return failobj
319
320 #
321 # Additional useful stuff
322 #
323
324 def get_all(self, name, failobj=None):
325 """Return a list of all the values for the named field.
326
327 These will be sorted in the order they appeared in the original
328 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000329 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000330
331 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000332 """
333 values = []
334 name = name.lower()
335 for k, v in self._headers:
336 if k.lower() == name:
337 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000338 if not values:
339 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000340 return values
341
342 def add_header(self, _name, _value, **_params):
343 """Extended header setting.
344
345 name is the header field to add. keyword arguments can be used to set
346 additional parameters for the header field, with underscores converted
347 to dashes. Normally the parameter will be added as key="value" unless
348 value is None, in which case only the key will be added.
349
350 Example:
351
352 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000353 """
354 parts = []
355 for k, v in _params.items():
356 if v is None:
357 parts.append(k.replace('_', '-'))
358 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000359 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000360 if _value is not None:
361 parts.insert(0, _value)
362 self._headers.append((_name, SEMISPACE.join(parts)))
363
Barry Warsaw229727f2002-09-06 03:38:12 +0000364 def replace_header(self, _name, _value):
365 """Replace a header.
366
367 Replace the first matching header found in the message, retaining
368 header order and case. If no matching header was found, a KeyError is
369 raised.
370 """
371 _name = _name.lower()
372 for i, (k, v) in zip(range(len(self._headers)), self._headers):
373 if k.lower() == _name:
374 self._headers[i] = (k, _value)
375 break
376 else:
377 raise KeyError, _name
378
Barry Warsawc1068642002-07-19 22:24:55 +0000379 #
380 # These methods are silently deprecated in favor of get_content_type() and
381 # friends (see below). They will be noisily deprecated in email 3.0.
382 #
383
Barry Warsawba925802001-09-23 03:17:28 +0000384 def get_type(self, failobj=None):
385 """Returns the message's content type.
386
387 The returned string is coerced to lowercase and returned as a single
388 string of the form `maintype/subtype'. If there was no Content-Type:
389 header in the message, failobj is returned (defaults to None).
390 """
391 missing = []
392 value = self.get('content-type', missing)
393 if value is missing:
394 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000395 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000396
397 def get_main_type(self, failobj=None):
398 """Return the message's main content type if present."""
399 missing = []
400 ctype = self.get_type(missing)
401 if ctype is missing:
402 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000403 if ctype.count('/') <> 1:
404 return failobj
405 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000406
407 def get_subtype(self, failobj=None):
408 """Return the message's content subtype if present."""
409 missing = []
410 ctype = self.get_type(missing)
411 if ctype is missing:
412 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000413 if ctype.count('/') <> 1:
414 return failobj
415 return ctype.split('/')[1]
416
417 #
418 # Use these three methods instead of the three above.
419 #
420
421 def get_content_type(self):
422 """Returns the message's content type.
423
424 The returned string is coerced to lowercase and returned as a ingle
425 string of the form `maintype/subtype'. If there was no Content-Type:
426 header in the message, the default type as give by get_default_type()
427 will be returned. Since messages always have a default type this will
428 always return a value.
429
430 The current state of RFC standards define a message's default type to
431 be text/plain unless it appears inside a multipart/digest container,
432 in which case it would be message/rfc822.
433 """
434 missing = []
435 value = self.get('content-type', missing)
436 if value is missing:
437 # This should have no parameters
438 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000439 ctype = paramre.split(value)[0].lower().strip()
440 # RFC 2045, section 5.2 says if its invalid, use text/plain
441 if ctype.count('/') <> 1:
442 return 'text/plain'
443 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000444
445 def get_content_maintype(self):
446 """Returns the message's main content type.
447
448 This is the `maintype' part of the string returned by
449 get_content_type(). If no slash is found in the full content type, a
450 ValueError is raised.
451 """
452 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000453 return ctype.split('/')[0]
454
455 def get_content_subtype(self):
456 """Returns the message's sub content type.
457
458 This is the `subtype' part of the string returned by
459 get_content_type(). If no slash is found in the full content type, a
460 ValueError is raised.
461 """
462 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000463 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000464
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000465 def get_default_type(self):
466 """Return the `default' content type.
467
468 Most messages have a default content type of text/plain, except for
469 messages that are subparts of multipart/digest containers. Such
470 subparts then have a default content type of message/rfc822.
471 """
472 return self._default_type
473
474 def set_default_type(self, ctype):
475 """Set the `default' content type.
476
Barry Warsawc1068642002-07-19 22:24:55 +0000477 ctype should be either "text/plain" or "message/rfc822", although this
478 is not enforced. The default content type is not stored in the
479 Content-Type: header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000480 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000481 self._default_type = ctype
482
Barry Warsawbeb59452001-09-26 05:41:51 +0000483 def _get_params_preserve(self, failobj, header):
484 # Like get_params() but preserves the quoting of values. BAW:
485 # should this be part of the public interface?
486 missing = []
487 value = self.get(header, missing)
488 if value is missing:
489 return failobj
490 params = []
491 for p in paramre.split(value):
492 try:
493 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000494 name = name.strip()
495 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000496 except ValueError:
497 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000498 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000499 val = ''
500 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000501 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000502 return params
503
Barry Warsaw409a4c02002-04-10 21:01:31 +0000504 def get_params(self, failobj=None, header='content-type', unquote=1):
Barry Warsawba925802001-09-23 03:17:28 +0000505 """Return the message's Content-Type: parameters, as a list.
506
Barry Warsawbeb59452001-09-26 05:41:51 +0000507 The elements of the returned list are 2-tuples of key/value pairs, as
508 split on the `=' sign. The left hand side of the `=' is the key,
509 while the right hand side is the value. If there is no `=' sign in
510 the parameter the value is the empty string. The value is always
Barry Warsaw409a4c02002-04-10 21:01:31 +0000511 unquoted, unless unquote is set to a false value.
Barry Warsawbeb59452001-09-26 05:41:51 +0000512
Barry Warsawba925802001-09-23 03:17:28 +0000513 Optional failobj is the object to return if there is no Content-Type:
514 header. Optional header is the header to search instead of
Barry Warsaw409a4c02002-04-10 21:01:31 +0000515 Content-Type:.
Barry Warsawba925802001-09-23 03:17:28 +0000516 """
517 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000518 params = self._get_params_preserve(missing, header)
519 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000520 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000521 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000522 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000523 else:
524 return params
Barry Warsawba925802001-09-23 03:17:28 +0000525
Barry Warsaw409a4c02002-04-10 21:01:31 +0000526 def get_param(self, param, failobj=None, header='content-type', unquote=1):
Barry Warsawba925802001-09-23 03:17:28 +0000527 """Return the parameter value if found in the Content-Type: header.
528
529 Optional failobj is the object to return if there is no Content-Type:
530 header. Optional header is the header to search instead of
531 Content-Type:
Barry Warsawbeb59452001-09-26 05:41:51 +0000532
533 Parameter keys are always compared case insensitively. Values are
Barry Warsaw409a4c02002-04-10 21:01:31 +0000534 always unquoted, unless unquote is set to a false value.
Barry Warsawba925802001-09-23 03:17:28 +0000535 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000536 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000537 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000538 for k, v in self._get_params_preserve(failobj, header):
539 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000540 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000541 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000542 else:
543 return v
Barry Warsawba925802001-09-23 03:17:28 +0000544 return failobj
545
Barry Warsaw409a4c02002-04-10 21:01:31 +0000546 def set_param(self, param, value, header='Content-Type', requote=1):
547 """Set a parameter in the Content-Type: header.
548
549 If the parameter already exists in the header, its value will be
550 replaced with the new value.
551
552 If header is Content-Type: and has not yet been defined in this
553 message, it will be set to "text/plain" and the new parameter and
554 value will be appended, as per RFC 2045.
555
556 An alternate header can specified in the header argument, and
557 all parameters will be quoted as appropriate unless requote is
558 set to a false value.
559 """
560 if not self.has_key(header) and header.lower() == 'content-type':
561 ctype = 'text/plain'
562 else:
563 ctype = self.get(header)
564 if not self.get_param(param, header=header):
565 if not ctype:
566 ctype = _formatparam(param, value, requote)
567 else:
568 ctype = SEMISPACE.join(
569 [ctype, _formatparam(param, value, requote)])
570 else:
571 ctype = ''
572 for old_param, old_value in self.get_params(header=header,
573 unquote=requote):
574 append_param = ''
575 if old_param.lower() == param.lower():
576 append_param = _formatparam(param, value, requote)
577 else:
578 append_param = _formatparam(old_param, old_value, requote)
579 if not ctype:
580 ctype = append_param
581 else:
582 ctype = SEMISPACE.join([ctype, append_param])
583 if ctype <> self.get(header):
584 del self[header]
585 self[header] = ctype
586
587 def del_param(self, param, header='content-type', requote=1):
588 """Remove the given parameter completely from the Content-Type header.
589
590 The header will be re-written in place without param or its value.
591 All values will be quoted as appropriate unless requote is set to a
592 false value.
593 """
594 if not self.has_key(header):
595 return
596 new_ctype = ''
597 for p, v in self.get_params(header, unquote=requote):
598 if p.lower() <> param.lower():
599 if not new_ctype:
600 new_ctype = _formatparam(p, v, requote)
601 else:
602 new_ctype = SEMISPACE.join([new_ctype,
603 _formatparam(p, v, requote)])
604 if new_ctype <> self.get(header):
605 del self[header]
606 self[header] = new_ctype
607
608 def set_type(self, type, header='Content-Type', requote=1):
609 """Set the main type and subtype for the Content-Type: header.
610
611 type must be a string in the form "maintype/subtype", otherwise a
612 ValueError is raised.
613
614 This method replaces the Content-Type: header, keeping all the
615 parameters in place. If requote is false, this leaves the existing
616 header's quoting as is. Otherwise, the parameters will be quoted (the
617 default).
618
619 An alternate header can be specified in the header argument. When the
620 Content-Type: header is set, we'll always also add a MIME-Version:
621 header.
622 """
623 # BAW: should we be strict?
624 if not type.count('/') == 1:
625 raise ValueError
626 # Set the Content-Type: you get a MIME-Version:
627 if header.lower() == 'content-type':
628 del self['mime-version']
629 self['MIME-Version'] = '1.0'
630 if not self.has_key(header):
631 self[header] = type
632 return
633 params = self.get_params(header, unquote=requote)
634 del self[header]
635 self[header] = type
636 # Skip the first param; it's the old type.
637 for p, v in params[1:]:
638 self.set_param(p, v, header, requote)
639
Barry Warsawba925802001-09-23 03:17:28 +0000640 def get_filename(self, failobj=None):
641 """Return the filename associated with the payload if present.
642
643 The filename is extracted from the Content-Disposition: header's
644 `filename' parameter, and it is unquoted.
645 """
646 missing = []
647 filename = self.get_param('filename', missing, 'content-disposition')
648 if filename is missing:
649 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000650 if isinstance(filename, TupleType):
651 # It's an RFC 2231 encoded parameter
652 newvalue = _unquotevalue(filename)
653 return unicode(newvalue[2], newvalue[0])
654 else:
655 newvalue = _unquotevalue(filename.strip())
656 return newvalue
Barry Warsawba925802001-09-23 03:17:28 +0000657
658 def get_boundary(self, failobj=None):
659 """Return the boundary associated with the payload if present.
660
661 The boundary is extracted from the Content-Type: header's `boundary'
662 parameter, and it is unquoted.
663 """
664 missing = []
665 boundary = self.get_param('boundary', missing)
666 if boundary is missing:
667 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000668 return _unquotevalue(boundary.strip())
Barry Warsawba925802001-09-23 03:17:28 +0000669
670 def set_boundary(self, boundary):
671 """Set the boundary parameter in Content-Type: to 'boundary'.
672
673 This is subtly different than deleting the Content-Type: header and
674 adding a new one with a new boundary parameter via add_header(). The
675 main difference is that using the set_boundary() method preserves the
676 order of the Content-Type: header in the original message.
677
678 HeaderParseError is raised if the message has no Content-Type: header.
679 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000680 missing = []
681 params = self._get_params_preserve(missing, 'content-type')
682 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000683 # There was no Content-Type: header, and we don't know what type
684 # to set it to, so raise an exception.
685 raise Errors.HeaderParseError, 'No Content-Type: header found'
686 newparams = []
687 foundp = 0
Barry Warsawbeb59452001-09-26 05:41:51 +0000688 for pk, pv in params:
689 if pk.lower() == 'boundary':
690 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000691 foundp = 1
692 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000693 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000694 if not foundp:
695 # The original Content-Type: header had no boundary attribute.
696 # Tack one one the end. BAW: should we raise an exception
697 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000698 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawba925802001-09-23 03:17:28 +0000699 # Replace the existing Content-Type: header with the new value
700 newheaders = []
701 for h, v in self._headers:
702 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000703 parts = []
704 for k, v in newparams:
705 if v == '':
706 parts.append(k)
707 else:
708 parts.append('%s=%s' % (k, v))
709 newheaders.append((h, SEMISPACE.join(parts)))
710
Barry Warsawba925802001-09-23 03:17:28 +0000711 else:
712 newheaders.append((h, v))
713 self._headers = newheaders
714
Barry Warsaw8c1aac22002-05-19 23:44:19 +0000715 try:
716 from email._compat22 import walk
717 except SyntaxError:
718 # Must be using Python 2.1
719 from email._compat21 import walk
Barry Warsawba925802001-09-23 03:17:28 +0000720
721 def get_charsets(self, failobj=None):
722 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000723
Barry Warsawba925802001-09-23 03:17:28 +0000724 The returned list of items describes the Content-Type: headers'
725 charset parameter for this message and all the subparts in its
726 payload.
727
728 Each item will either be a string (the value of the charset parameter
729 in the Content-Type: header of that part) or the value of the
730 'failobj' parameter (defaults to None), if the part does not have a
731 main MIME type of "text", or the charset is not defined.
732
733 The list will contain one string for each part of the message, plus
734 one for the container message (i.e. self), so that a non-multipart
735 message will still return a list of length 1.
736 """
737 return [part.get_param('charset', failobj) for part in self.walk()]