blob: 16ae12082eea94ed891cc3f077a862457a86bce5 [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw409a4c02002-04-10 21:01:31 +00008import warnings
Barry Warsawba925802001-09-23 03:17:28 +00009from cStringIO import StringIO
Barry Warsaw908dc4b2002-06-29 05:56:15 +000010from types import ListType, TupleType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000011
Barry Warsawba925802001-09-23 03:17:28 +000012# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000013from email import Errors
14from email import Utils
15from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000016
Barry Warsawbeb59452001-09-26 05:41:51 +000017SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000018
Barry Warsawc4945492002-09-28 20:40:25 +000019try:
20 True, False
21except NameError:
22 True = 1
23 False = 0
24
Barry Warsaw409a4c02002-04-10 21:01:31 +000025# Regular expression used to split header parameters. BAW: this may be too
26# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
27# most headers found in the wild. We may eventually need a full fledged
28# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000029paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000030# Regular expression that matches `special' characters in parameters, the
31# existance of which force quoting of the parameter value.
32tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
33
34
35
Barry Warsaw908dc4b2002-06-29 05:56:15 +000036# Helper functions
Barry Warsawc4945492002-09-28 20:40:25 +000037def _formatparam(param, value=None, quote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +000038 """Convenience function to format and return a key=value pair.
39
Barry Warsaw908dc4b2002-06-29 05:56:15 +000040 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000041 """
42 if value is not None and len(value) > 0:
Barry Warsaw908dc4b2002-06-29 05:56:15 +000043 # TupleType is used for RFC 2231 encoded parameter values where items
44 # are (charset, language, value). charset is a string, not a Charset
45 # instance.
46 if isinstance(value, TupleType):
Barry Warsaw3c255352002-09-06 03:55:04 +000047 # Encode as per RFC 2231
48 param += '*'
49 value = Utils.encode_rfc2231(value[2], value[0], value[1])
Barry Warsaw409a4c02002-04-10 21:01:31 +000050 # BAW: Please check this. I think that if quote is set it should
51 # force quoting even if not necessary.
52 if quote or tspecials.search(value):
53 return '%s="%s"' % (param, Utils.quote(value))
54 else:
55 return '%s=%s' % (param, value)
56 else:
57 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000058
Barry Warsawba925802001-09-23 03:17:28 +000059
Barry Warsaw908dc4b2002-06-29 05:56:15 +000060def _unquotevalue(value):
61 if isinstance(value, TupleType):
Barry Warsaw15aefa92002-09-26 17:19:34 +000062 return value[0], value[1], Utils.unquote(value[2])
Barry Warsaw908dc4b2002-06-29 05:56:15 +000063 else:
Tim Peters280488b2002-08-23 18:19:30 +000064 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000065
66
Barry Warsaw48b0d362002-08-27 22:34:44 +000067
Barry Warsawba925802001-09-23 03:17:28 +000068class Message:
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000069 """Basic message object.
Barry Warsawba925802001-09-23 03:17:28 +000070
71 A message object is defined as something that has a bunch of RFC 2822
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000072 headers and a payload. It may optionally have an envelope header
73 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
74 multipart or a message/rfc822), then the payload is a list of Message
75 objects, otherwise it is a string.
Barry Warsawba925802001-09-23 03:17:28 +000076
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000077 Message objects implement part of the `mapping' interface, which assumes
Barry Warsawba925802001-09-23 03:17:28 +000078 there is exactly one occurrance of the header per message. Some headers
Barry Warsawc4945492002-09-28 20:40:25 +000079 do in fact appear multiple times (e.g. Received) and for those headers,
Barry Warsawba925802001-09-23 03:17:28 +000080 you must use the explicit API to set or get all the headers. Not all of
81 the mapping methods are implemented.
Barry Warsawba925802001-09-23 03:17:28 +000082 """
83 def __init__(self):
84 self._headers = []
85 self._unixfrom = None
86 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +000087 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +000088 # Defaults for multipart messages
89 self.preamble = self.epilogue = None
Barry Warsawa0c8b9d2002-07-09 02:46:12 +000090 # Default content type
91 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +000092
93 def __str__(self):
94 """Return the entire formatted message as a string.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000095 This includes the headers, body, and envelope header.
Barry Warsawba925802001-09-23 03:17:28 +000096 """
Barry Warsawc4945492002-09-28 20:40:25 +000097 return self.as_string(unixfrom=True)
Barry Warsawba925802001-09-23 03:17:28 +000098
Barry Warsawc4945492002-09-28 20:40:25 +000099 def as_string(self, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +0000100 """Return the entire formatted message as a string.
Barry Warsawc4945492002-09-28 20:40:25 +0000101 Optional `unixfrom' when True, means include the Unix From_ envelope
Barry Warsawba925802001-09-23 03:17:28 +0000102 header.
103 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000104 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +0000105 fp = StringIO()
106 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000107 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000108 return fp.getvalue()
109
110 def is_multipart(self):
Barry Warsawc4945492002-09-28 20:40:25 +0000111 """Return True if the message consists of multiple parts."""
Barry Warsaw4ece7782002-09-28 20:41:39 +0000112 if isinstance(self._payload, ListType):
Barry Warsawc4945492002-09-28 20:40:25 +0000113 return True
114 return False
Barry Warsawba925802001-09-23 03:17:28 +0000115
116 #
117 # Unix From_ line
118 #
119 def set_unixfrom(self, unixfrom):
120 self._unixfrom = unixfrom
121
122 def get_unixfrom(self):
123 return self._unixfrom
124
125 #
126 # Payload manipulation.
127 #
128 def add_payload(self, payload):
129 """Add the given payload to the current payload.
130
131 If the current payload is empty, then the current payload will be made
132 a scalar, set to the given value.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000133
134 Note: This method is deprecated. Use .attach() instead.
Barry Warsawba925802001-09-23 03:17:28 +0000135 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000136 warnings.warn('add_payload() is deprecated, use attach() instead.',
137 DeprecationWarning, 2)
Barry Warsawba925802001-09-23 03:17:28 +0000138 if self._payload is None:
139 self._payload = payload
Barry Warsawc4945492002-09-28 20:40:25 +0000140 elif isinstance(self._payload, ListType):
Barry Warsawba925802001-09-23 03:17:28 +0000141 self._payload.append(payload)
142 elif self.get_main_type() not in (None, 'multipart'):
143 raise Errors.MultipartConversionError(
Barry Warsawc4945492002-09-28 20:40:25 +0000144 'Message main content type must be "multipart" or missing')
Barry Warsawba925802001-09-23 03:17:28 +0000145 else:
146 self._payload = [self._payload, payload]
147
Barry Warsaw409a4c02002-04-10 21:01:31 +0000148 def attach(self, payload):
149 """Add the given payload to the current payload.
150
151 The current payload will always be a list of objects after this method
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000152 is called. If you want to set the payload to a scalar object, use
Barry Warsaw409a4c02002-04-10 21:01:31 +0000153 set_payload() instead.
154 """
155 if self._payload is None:
156 self._payload = [payload]
157 else:
158 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000159
Barry Warsawc4945492002-09-28 20:40:25 +0000160 def get_payload(self, i=None, decode=False):
Barry Warsawfbcde752002-09-11 14:11:35 +0000161 """Return a reference to the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000162
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000163 The payload will either be a list object or a string. If you mutate
164 the list object, you modify the message's payload in place. Optional
165 i returns that index into the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000166
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000167 Optional decode is a flag (defaulting to False) indicating whether the
168 payload should be decoded or not, according to the
169 Content-Transfer-Encoding header. When True and the message is not a
170 multipart, the payload will be decoded if this header's value is
171 `quoted-printable' or `base64'. If some other encoding is used, or
172 the header is missing, the payload is returned as-is (undecoded). If
173 the message is a multipart and the decode flag is True, then None is
174 returned.
Barry Warsawba925802001-09-23 03:17:28 +0000175 """
176 if i is None:
177 payload = self._payload
Barry Warsawc4945492002-09-28 20:40:25 +0000178 elif not isinstance(self._payload, ListType):
Barry Warsawba925802001-09-23 03:17:28 +0000179 raise TypeError, i
180 else:
181 payload = self._payload[i]
182 if decode:
183 if self.is_multipart():
184 return None
185 cte = self.get('content-transfer-encoding', '')
186 if cte.lower() == 'quoted-printable':
187 return Utils._qdecode(payload)
188 elif cte.lower() == 'base64':
189 return Utils._bdecode(payload)
190 # Everything else, including encodings with 8bit or 7bit are returned
191 # unchanged.
192 return payload
193
Barry Warsaw409a4c02002-04-10 21:01:31 +0000194 def set_payload(self, payload, charset=None):
195 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000196
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000197 Optional charset sets the message's default character set. See
198 set_charset() for details.
199 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000200 self._payload = payload
201 if charset is not None:
202 self.set_charset(charset)
203
204 def set_charset(self, charset):
205 """Set the charset of the payload to a given character set.
206
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000207 charset can be a Charset instance, a string naming a character set, or
208 None. If it is a string it will be converted to a Charset instance.
209 If charset is None, the charset parameter will be removed from the
210 Content-Type field. Anything else will generate a TypeError.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000211
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000212 The message will be assumed to be of type text/* encoded with
Barry Warsaw409a4c02002-04-10 21:01:31 +0000213 charset.input_charset. It will be converted to charset.output_charset
214 and encoded properly, if needed, when generating the plain text
215 representation of the message. MIME headers (MIME-Version,
216 Content-Type, Content-Transfer-Encoding) will be added as needed.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000217
Barry Warsaw409a4c02002-04-10 21:01:31 +0000218 """
219 if charset is None:
220 self.del_param('charset')
221 self._charset = None
222 return
223 if isinstance(charset, StringType):
224 charset = Charset.Charset(charset)
225 if not isinstance(charset, Charset.Charset):
226 raise TypeError, charset
227 # BAW: should we accept strings that can serve as arguments to the
228 # Charset constructor?
229 self._charset = charset
230 if not self.has_key('MIME-Version'):
231 self.add_header('MIME-Version', '1.0')
232 if not self.has_key('Content-Type'):
233 self.add_header('Content-Type', 'text/plain',
234 charset=charset.get_output_charset())
235 else:
236 self.set_param('charset', charset.get_output_charset())
237 if not self.has_key('Content-Transfer-Encoding'):
238 cte = charset.get_body_encoding()
239 if callable(cte):
240 cte(self)
241 else:
242 self.add_header('Content-Transfer-Encoding', cte)
243
244 def get_charset(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000245 """Return the Charset instance associated with the message's payload.
246 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000247 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000248
Barry Warsawba925802001-09-23 03:17:28 +0000249 #
250 # MAPPING INTERFACE (partial)
251 #
252 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000253 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000254 return len(self._headers)
255
256 def __getitem__(self, name):
257 """Get a header value.
258
259 Return None if the header is missing instead of raising an exception.
260
261 Note that if the header appeared multiple times, exactly which
262 occurrance gets returned is undefined. Use getall() to get all
263 the values matching a header field name.
264 """
265 return self.get(name)
266
267 def __setitem__(self, name, val):
268 """Set the value of a header.
269
270 Note: this does not overwrite an existing header with the same field
271 name. Use __delitem__() first to delete any existing headers.
272 """
273 self._headers.append((name, val))
274
275 def __delitem__(self, name):
276 """Delete all occurrences of a header, if present.
277
278 Does not raise an exception if the header is missing.
279 """
280 name = name.lower()
281 newheaders = []
282 for k, v in self._headers:
283 if k.lower() <> name:
284 newheaders.append((k, v))
285 self._headers = newheaders
286
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000287 def __contains__(self, name):
288 return name.lower() in [k.lower() for k, v in self._headers]
Barry Warsawba925802001-09-23 03:17:28 +0000289
290 def has_key(self, name):
291 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000292 missing = []
293 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000294
295 def keys(self):
296 """Return a list of all the message's header field names.
297
298 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000299 message, or were added to the message, and may contain duplicates.
300 Any fields deleted and re-inserted are always appended to the header
301 list.
Barry Warsawba925802001-09-23 03:17:28 +0000302 """
303 return [k for k, v in self._headers]
304
305 def values(self):
306 """Return a list of all the message's header values.
307
308 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000309 message, or were added to the message, and may contain duplicates.
310 Any fields deleted and re-inserted are always appended to the header
311 list.
Barry Warsawba925802001-09-23 03:17:28 +0000312 """
313 return [v for k, v in self._headers]
314
315 def items(self):
316 """Get all the message's header fields and values.
317
318 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000319 message, or were added to the message, and may contain duplicates.
320 Any fields deleted and re-inserted are always appended to the header
321 list.
Barry Warsawba925802001-09-23 03:17:28 +0000322 """
323 return self._headers[:]
324
325 def get(self, name, failobj=None):
326 """Get a header value.
327
328 Like __getitem__() but return failobj instead of None when the field
329 is missing.
330 """
331 name = name.lower()
332 for k, v in self._headers:
333 if k.lower() == name:
334 return v
335 return failobj
336
337 #
338 # Additional useful stuff
339 #
340
341 def get_all(self, name, failobj=None):
342 """Return a list of all the values for the named field.
343
344 These will be sorted in the order they appeared in the original
345 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000346 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000347
348 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000349 """
350 values = []
351 name = name.lower()
352 for k, v in self._headers:
353 if k.lower() == name:
354 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000355 if not values:
356 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000357 return values
358
359 def add_header(self, _name, _value, **_params):
360 """Extended header setting.
361
362 name is the header field to add. keyword arguments can be used to set
363 additional parameters for the header field, with underscores converted
364 to dashes. Normally the parameter will be added as key="value" unless
365 value is None, in which case only the key will be added.
366
367 Example:
368
369 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000370 """
371 parts = []
372 for k, v in _params.items():
373 if v is None:
374 parts.append(k.replace('_', '-'))
375 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000376 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000377 if _value is not None:
378 parts.insert(0, _value)
379 self._headers.append((_name, SEMISPACE.join(parts)))
380
Barry Warsaw229727f2002-09-06 03:38:12 +0000381 def replace_header(self, _name, _value):
382 """Replace a header.
383
384 Replace the first matching header found in the message, retaining
385 header order and case. If no matching header was found, a KeyError is
386 raised.
387 """
388 _name = _name.lower()
389 for i, (k, v) in zip(range(len(self._headers)), self._headers):
390 if k.lower() == _name:
391 self._headers[i] = (k, _value)
392 break
393 else:
394 raise KeyError, _name
395
Barry Warsawc1068642002-07-19 22:24:55 +0000396 #
397 # These methods are silently deprecated in favor of get_content_type() and
398 # friends (see below). They will be noisily deprecated in email 3.0.
399 #
400
Barry Warsawba925802001-09-23 03:17:28 +0000401 def get_type(self, failobj=None):
402 """Returns the message's content type.
403
404 The returned string is coerced to lowercase and returned as a single
Barry Warsawc4945492002-09-28 20:40:25 +0000405 string of the form `maintype/subtype'. If there was no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000406 header in the message, failobj is returned (defaults to None).
407 """
408 missing = []
409 value = self.get('content-type', missing)
410 if value is missing:
411 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000412 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000413
414 def get_main_type(self, failobj=None):
415 """Return the message's main content type if present."""
416 missing = []
417 ctype = self.get_type(missing)
418 if ctype is missing:
419 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000420 if ctype.count('/') <> 1:
421 return failobj
422 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000423
424 def get_subtype(self, failobj=None):
425 """Return the message's content subtype if present."""
426 missing = []
427 ctype = self.get_type(missing)
428 if ctype is missing:
429 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000430 if ctype.count('/') <> 1:
431 return failobj
432 return ctype.split('/')[1]
433
434 #
435 # Use these three methods instead of the three above.
436 #
437
438 def get_content_type(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000439 """Return the message's content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000440
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000441 The returned string is coerced to lower case of the form
442 `maintype/subtype'. If there was no Content-Type header in the
443 message, the default type as given by get_default_type() will be
444 returned. Since according to RFC 2045, messages always have a default
445 type this will always return a value.
Barry Warsawc1068642002-07-19 22:24:55 +0000446
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000447 RFC 2045 defines a message's default type to be text/plain unless it
448 appears inside a multipart/digest container, in which case it would be
449 message/rfc822.
Barry Warsawc1068642002-07-19 22:24:55 +0000450 """
451 missing = []
452 value = self.get('content-type', missing)
453 if value is missing:
454 # This should have no parameters
455 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000456 ctype = paramre.split(value)[0].lower().strip()
457 # RFC 2045, section 5.2 says if its invalid, use text/plain
458 if ctype.count('/') <> 1:
459 return 'text/plain'
460 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000461
462 def get_content_maintype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000463 """Return the message's main content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000464
465 This is the `maintype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000466 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000467 """
468 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000469 return ctype.split('/')[0]
470
471 def get_content_subtype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000472 """Returns the message's sub-content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000473
474 This is the `subtype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000475 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000476 """
477 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000478 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000479
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000480 def get_default_type(self):
481 """Return the `default' content type.
482
483 Most messages have a default content type of text/plain, except for
484 messages that are subparts of multipart/digest containers. Such
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000485 subparts have a default content type of message/rfc822.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000486 """
487 return self._default_type
488
489 def set_default_type(self, ctype):
490 """Set the `default' content type.
491
Barry Warsawc1068642002-07-19 22:24:55 +0000492 ctype should be either "text/plain" or "message/rfc822", although this
493 is not enforced. The default content type is not stored in the
Barry Warsawc4945492002-09-28 20:40:25 +0000494 Content-Type header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000495 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000496 self._default_type = ctype
497
Barry Warsawbeb59452001-09-26 05:41:51 +0000498 def _get_params_preserve(self, failobj, header):
499 # Like get_params() but preserves the quoting of values. BAW:
500 # should this be part of the public interface?
501 missing = []
502 value = self.get(header, missing)
503 if value is missing:
504 return failobj
505 params = []
506 for p in paramre.split(value):
507 try:
508 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000509 name = name.strip()
510 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000511 except ValueError:
512 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000513 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000514 val = ''
515 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000516 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000517 return params
518
Barry Warsawc4945492002-09-28 20:40:25 +0000519 def get_params(self, failobj=None, header='content-type', unquote=True):
520 """Return the message's Content-Type parameters, as a list.
Barry Warsawba925802001-09-23 03:17:28 +0000521
Barry Warsawbeb59452001-09-26 05:41:51 +0000522 The elements of the returned list are 2-tuples of key/value pairs, as
523 split on the `=' sign. The left hand side of the `=' is the key,
524 while the right hand side is the value. If there is no `=' sign in
Barry Warsaw15aefa92002-09-26 17:19:34 +0000525 the parameter the value is the empty string. The value is as
526 described in the get_param() method.
Barry Warsawbeb59452001-09-26 05:41:51 +0000527
Barry Warsawc4945492002-09-28 20:40:25 +0000528 Optional failobj is the object to return if there is no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000529 header. Optional header is the header to search instead of
Barry Warsawc4945492002-09-28 20:40:25 +0000530 Content-Type. If unquote is True, the value is unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000531 """
532 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000533 params = self._get_params_preserve(missing, header)
534 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000535 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000536 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000537 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000538 else:
539 return params
Barry Warsawba925802001-09-23 03:17:28 +0000540
Barry Warsawc4945492002-09-28 20:40:25 +0000541 def get_param(self, param, failobj=None, header='content-type',
542 unquote=True):
543 """Return the parameter value if found in the Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000544
Barry Warsawc4945492002-09-28 20:40:25 +0000545 Optional failobj is the object to return if there is no Content-Type
Barry Warsaw15aefa92002-09-26 17:19:34 +0000546 header, or the Content-Type header has no such parameter. Optional
Barry Warsawc4945492002-09-28 20:40:25 +0000547 header is the header to search instead of Content-Type.
Barry Warsawbeb59452001-09-26 05:41:51 +0000548
Barry Warsaw15aefa92002-09-26 17:19:34 +0000549 Parameter keys are always compared case insensitively. The return
550 value can either be a string, or a 3-tuple if the parameter was RFC
551 2231 encoded. When it's a 3-tuple, the elements of the value are of
552 the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
553 string. Your application should be prepared to deal with these, and
554 can convert the parameter to a Unicode string like so:
555
556 param = msg.get_param('foo')
557 if isinstance(param, tuple):
558 param = unicode(param[2], param[0])
559
560 In any case, the parameter value (either the returned string, or the
561 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
Barry Warsawc4945492002-09-28 20:40:25 +0000562 to False.
Barry Warsawba925802001-09-23 03:17:28 +0000563 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000564 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000565 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000566 for k, v in self._get_params_preserve(failobj, header):
567 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000568 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000569 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000570 else:
571 return v
Barry Warsawba925802001-09-23 03:17:28 +0000572 return failobj
573
Barry Warsawc4945492002-09-28 20:40:25 +0000574 def set_param(self, param, value, header='Content-Type', requote=True,
Barry Warsaw3c255352002-09-06 03:55:04 +0000575 charset=None, language=''):
Barry Warsawc4945492002-09-28 20:40:25 +0000576 """Set a parameter in the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000577
578 If the parameter already exists in the header, its value will be
579 replaced with the new value.
580
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000581 If header is Content-Type and has not yet been defined for this
Barry Warsaw409a4c02002-04-10 21:01:31 +0000582 message, it will be set to "text/plain" and the new parameter and
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000583 value will be appended as per RFC 2045.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000584
Barry Warsawc4945492002-09-28 20:40:25 +0000585 An alternate header can specified in the header argument, and all
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000586 parameters will be quoted as necessary unless requote is False.
Barry Warsaw3c255352002-09-06 03:55:04 +0000587
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000588 If charset is specified, the parameter will be encoded according to RFC
589 2231. Optional language specifies the RFC 2231 language, defaulting
590 to the empty string. Both charset and language should be strings.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000591 """
Barry Warsaw3c255352002-09-06 03:55:04 +0000592 if not isinstance(value, TupleType) and charset:
593 value = (charset, language, value)
594
Barry Warsaw409a4c02002-04-10 21:01:31 +0000595 if not self.has_key(header) and header.lower() == 'content-type':
596 ctype = 'text/plain'
597 else:
598 ctype = self.get(header)
599 if not self.get_param(param, header=header):
600 if not ctype:
601 ctype = _formatparam(param, value, requote)
602 else:
603 ctype = SEMISPACE.join(
604 [ctype, _formatparam(param, value, requote)])
605 else:
606 ctype = ''
607 for old_param, old_value in self.get_params(header=header,
608 unquote=requote):
609 append_param = ''
610 if old_param.lower() == param.lower():
611 append_param = _formatparam(param, value, requote)
612 else:
613 append_param = _formatparam(old_param, old_value, requote)
614 if not ctype:
615 ctype = append_param
616 else:
617 ctype = SEMISPACE.join([ctype, append_param])
618 if ctype <> self.get(header):
619 del self[header]
620 self[header] = ctype
621
Barry Warsawc4945492002-09-28 20:40:25 +0000622 def del_param(self, param, header='content-type', requote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000623 """Remove the given parameter completely from the Content-Type header.
624
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000625 The header will be re-written in place without the parameter or its
626 value. All values will be quoted as necessary unless requote is
627 False. Optional header specifies an alternative to the Content-Type
628 header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000629 """
630 if not self.has_key(header):
631 return
632 new_ctype = ''
633 for p, v in self.get_params(header, unquote=requote):
634 if p.lower() <> param.lower():
635 if not new_ctype:
636 new_ctype = _formatparam(p, v, requote)
637 else:
638 new_ctype = SEMISPACE.join([new_ctype,
639 _formatparam(p, v, requote)])
640 if new_ctype <> self.get(header):
641 del self[header]
642 self[header] = new_ctype
643
Barry Warsawc4945492002-09-28 20:40:25 +0000644 def set_type(self, type, header='Content-Type', requote=True):
645 """Set the main type and subtype for the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000646
647 type must be a string in the form "maintype/subtype", otherwise a
648 ValueError is raised.
649
Barry Warsawc4945492002-09-28 20:40:25 +0000650 This method replaces the Content-Type header, keeping all the
651 parameters in place. If requote is False, this leaves the existing
Barry Warsaw409a4c02002-04-10 21:01:31 +0000652 header's quoting as is. Otherwise, the parameters will be quoted (the
653 default).
654
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000655 An alternative header can be specified in the header argument. When
656 the Content-Type header is set, we'll always also add a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000657 header.
658 """
659 # BAW: should we be strict?
660 if not type.count('/') == 1:
661 raise ValueError
Barry Warsawc4945492002-09-28 20:40:25 +0000662 # Set the Content-Type, you get a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000663 if header.lower() == 'content-type':
664 del self['mime-version']
665 self['MIME-Version'] = '1.0'
666 if not self.has_key(header):
667 self[header] = type
668 return
669 params = self.get_params(header, unquote=requote)
670 del self[header]
671 self[header] = type
672 # Skip the first param; it's the old type.
673 for p, v in params[1:]:
674 self.set_param(p, v, header, requote)
675
Barry Warsawba925802001-09-23 03:17:28 +0000676 def get_filename(self, failobj=None):
677 """Return the filename associated with the payload if present.
678
Barry Warsawc4945492002-09-28 20:40:25 +0000679 The filename is extracted from the Content-Disposition header's
Barry Warsawba925802001-09-23 03:17:28 +0000680 `filename' parameter, and it is unquoted.
681 """
682 missing = []
683 filename = self.get_param('filename', missing, 'content-disposition')
684 if filename is missing:
685 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000686 if isinstance(filename, TupleType):
687 # It's an RFC 2231 encoded parameter
688 newvalue = _unquotevalue(filename)
689 return unicode(newvalue[2], newvalue[0])
690 else:
691 newvalue = _unquotevalue(filename.strip())
692 return newvalue
Barry Warsawba925802001-09-23 03:17:28 +0000693
694 def get_boundary(self, failobj=None):
695 """Return the boundary associated with the payload if present.
696
Barry Warsawc4945492002-09-28 20:40:25 +0000697 The boundary is extracted from the Content-Type header's `boundary'
Barry Warsawba925802001-09-23 03:17:28 +0000698 parameter, and it is unquoted.
699 """
700 missing = []
701 boundary = self.get_param('boundary', missing)
702 if boundary is missing:
703 return failobj
Barry Warsaw15aefa92002-09-26 17:19:34 +0000704 if isinstance(boundary, TupleType):
705 # RFC 2231 encoded, so decode. It better end up as ascii
706 return unicode(boundary[2], boundary[0]).encode('us-ascii')
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000707 return _unquotevalue(boundary.strip())
Barry Warsawba925802001-09-23 03:17:28 +0000708
709 def set_boundary(self, boundary):
Barry Warsawc4945492002-09-28 20:40:25 +0000710 """Set the boundary parameter in Content-Type to 'boundary'.
Barry Warsawba925802001-09-23 03:17:28 +0000711
Barry Warsawc4945492002-09-28 20:40:25 +0000712 This is subtly different than deleting the Content-Type header and
Barry Warsawba925802001-09-23 03:17:28 +0000713 adding a new one with a new boundary parameter via add_header(). The
714 main difference is that using the set_boundary() method preserves the
Barry Warsawc4945492002-09-28 20:40:25 +0000715 order of the Content-Type header in the original message.
Barry Warsawba925802001-09-23 03:17:28 +0000716
Barry Warsawc4945492002-09-28 20:40:25 +0000717 HeaderParseError is raised if the message has no Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000718 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000719 missing = []
720 params = self._get_params_preserve(missing, 'content-type')
721 if params is missing:
Barry Warsawc4945492002-09-28 20:40:25 +0000722 # There was no Content-Type header, and we don't know what type
Barry Warsawba925802001-09-23 03:17:28 +0000723 # to set it to, so raise an exception.
Barry Warsawc4945492002-09-28 20:40:25 +0000724 raise Errors.HeaderParseError, 'No Content-Type header found'
Barry Warsawba925802001-09-23 03:17:28 +0000725 newparams = []
Barry Warsawc4945492002-09-28 20:40:25 +0000726 foundp = False
Barry Warsawbeb59452001-09-26 05:41:51 +0000727 for pk, pv in params:
728 if pk.lower() == 'boundary':
729 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000730 foundp = True
Barry Warsawba925802001-09-23 03:17:28 +0000731 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000732 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000733 if not foundp:
Barry Warsawc4945492002-09-28 20:40:25 +0000734 # The original Content-Type header had no boundary attribute.
Barry Warsawba925802001-09-23 03:17:28 +0000735 # Tack one one the end. BAW: should we raise an exception
736 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000737 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000738 # Replace the existing Content-Type header with the new value
Barry Warsawba925802001-09-23 03:17:28 +0000739 newheaders = []
740 for h, v in self._headers:
741 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000742 parts = []
743 for k, v in newparams:
744 if v == '':
745 parts.append(k)
746 else:
747 parts.append('%s=%s' % (k, v))
748 newheaders.append((h, SEMISPACE.join(parts)))
749
Barry Warsawba925802001-09-23 03:17:28 +0000750 else:
751 newheaders.append((h, v))
752 self._headers = newheaders
753
Barry Warsaw8c1aac22002-05-19 23:44:19 +0000754 try:
755 from email._compat22 import walk
756 except SyntaxError:
757 # Must be using Python 2.1
758 from email._compat21 import walk
Barry Warsawba925802001-09-23 03:17:28 +0000759
Barry Warsaw15aefa92002-09-26 17:19:34 +0000760 def get_content_charset(self, failobj=None):
761 """Return the charset parameter of the Content-Type header.
762
Barry Warsawee07cb12002-10-10 15:13:26 +0000763 The returned string is always coerced to lower case. If there is no
764 Content-Type header, or if that header has no charset parameter,
765 failobj is returned.
Barry Warsaw15aefa92002-09-26 17:19:34 +0000766 """
767 missing = []
768 charset = self.get_param('charset', missing)
769 if charset is missing:
770 return failobj
771 if isinstance(charset, TupleType):
772 # RFC 2231 encoded, so decode it, and it better end up as ascii.
Barry Warsawee07cb12002-10-10 15:13:26 +0000773 charset = unicode(charset[2], charset[0]).encode('us-ascii')
774 # RFC 2046, $4.1.2 says charsets are not case sensitive
775 return charset.lower()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000776
Barry Warsawba925802001-09-23 03:17:28 +0000777 def get_charsets(self, failobj=None):
778 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000779
Barry Warsawc4945492002-09-28 20:40:25 +0000780 The returned list of items describes the Content-Type headers'
Barry Warsawba925802001-09-23 03:17:28 +0000781 charset parameter for this message and all the subparts in its
782 payload.
783
784 Each item will either be a string (the value of the charset parameter
Barry Warsawc4945492002-09-28 20:40:25 +0000785 in the Content-Type header of that part) or the value of the
Barry Warsawba925802001-09-23 03:17:28 +0000786 'failobj' parameter (defaults to None), if the part does not have a
787 main MIME type of "text", or the charset is not defined.
788
789 The list will contain one string for each part of the message, plus
790 one for the container message (i.e. self), so that a non-multipart
791 message will still return a list of length 1.
792 """
Barry Warsaw15aefa92002-09-26 17:19:34 +0000793 return [part.get_content_charset(failobj) for part in self.walk()]