blob: 4c6b3dae9b2fc4d1c9c6139c9070e42697afe74e [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsawba925802001-09-23 03:17:28 +00002# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw21191d32003-03-10 16:13:14 +00008import binascii
Barry Warsaw409a4c02002-04-10 21:01:31 +00009import warnings
Barry Warsawba925802001-09-23 03:17:28 +000010from cStringIO import StringIO
Barry Warsaw908dc4b2002-06-29 05:56:15 +000011from types import ListType, TupleType, StringType
Barry Warsawba925802001-09-23 03:17:28 +000012
Barry Warsawba925802001-09-23 03:17:28 +000013# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000014from email import Utils
Barry Warsaw21191d32003-03-10 16:13:14 +000015from email import Errors
Barry Warsaw8ba76e82002-06-02 19:05:51 +000016from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000017
Barry Warsawbeb59452001-09-26 05:41:51 +000018SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000019
Barry Warsawc4945492002-09-28 20:40:25 +000020try:
21 True, False
22except NameError:
23 True = 1
24 False = 0
25
Barry Warsaw409a4c02002-04-10 21:01:31 +000026# Regular expression used to split header parameters. BAW: this may be too
27# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
28# most headers found in the wild. We may eventually need a full fledged
29# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000030paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000031# Regular expression that matches `special' characters in parameters, the
32# existance of which force quoting of the parameter value.
33tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
34
35
36
Barry Warsaw908dc4b2002-06-29 05:56:15 +000037# Helper functions
Barry Warsawc4945492002-09-28 20:40:25 +000038def _formatparam(param, value=None, quote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +000039 """Convenience function to format and return a key=value pair.
40
Barry Warsaw908dc4b2002-06-29 05:56:15 +000041 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000042 """
43 if value is not None and len(value) > 0:
Barry Warsaw908dc4b2002-06-29 05:56:15 +000044 # TupleType is used for RFC 2231 encoded parameter values where items
45 # are (charset, language, value). charset is a string, not a Charset
46 # instance.
47 if isinstance(value, TupleType):
Barry Warsaw3c255352002-09-06 03:55:04 +000048 # Encode as per RFC 2231
49 param += '*'
50 value = Utils.encode_rfc2231(value[2], value[0], value[1])
Barry Warsaw409a4c02002-04-10 21:01:31 +000051 # BAW: Please check this. I think that if quote is set it should
52 # force quoting even if not necessary.
53 if quote or tspecials.search(value):
54 return '%s="%s"' % (param, Utils.quote(value))
55 else:
56 return '%s=%s' % (param, value)
57 else:
58 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000059
Barry Warsawba925802001-09-23 03:17:28 +000060
Barry Warsaw908dc4b2002-06-29 05:56:15 +000061def _unquotevalue(value):
62 if isinstance(value, TupleType):
Barry Warsaw15aefa92002-09-26 17:19:34 +000063 return value[0], value[1], Utils.unquote(value[2])
Barry Warsaw908dc4b2002-06-29 05:56:15 +000064 else:
Tim Peters280488b2002-08-23 18:19:30 +000065 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000066
67
Barry Warsaw48b0d362002-08-27 22:34:44 +000068
Barry Warsawba925802001-09-23 03:17:28 +000069class Message:
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000070 """Basic message object.
Barry Warsawba925802001-09-23 03:17:28 +000071
72 A message object is defined as something that has a bunch of RFC 2822
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000073 headers and a payload. It may optionally have an envelope header
74 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
75 multipart or a message/rfc822), then the payload is a list of Message
76 objects, otherwise it is a string.
Barry Warsawba925802001-09-23 03:17:28 +000077
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000078 Message objects implement part of the `mapping' interface, which assumes
Barry Warsawba925802001-09-23 03:17:28 +000079 there is exactly one occurrance of the header per message. Some headers
Barry Warsawc4945492002-09-28 20:40:25 +000080 do in fact appear multiple times (e.g. Received) and for those headers,
Barry Warsawba925802001-09-23 03:17:28 +000081 you must use the explicit API to set or get all the headers. Not all of
82 the mapping methods are implemented.
Barry Warsawba925802001-09-23 03:17:28 +000083 """
84 def __init__(self):
85 self._headers = []
86 self._unixfrom = None
87 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +000088 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +000089 # Defaults for multipart messages
90 self.preamble = self.epilogue = None
Barry Warsawa0c8b9d2002-07-09 02:46:12 +000091 # Default content type
92 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +000093
94 def __str__(self):
95 """Return the entire formatted message as a string.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000096 This includes the headers, body, and envelope header.
Barry Warsawba925802001-09-23 03:17:28 +000097 """
Barry Warsawc4945492002-09-28 20:40:25 +000098 return self.as_string(unixfrom=True)
Barry Warsawba925802001-09-23 03:17:28 +000099
Barry Warsawc4945492002-09-28 20:40:25 +0000100 def as_string(self, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +0000101 """Return the entire formatted message as a string.
Barry Warsawc4945492002-09-28 20:40:25 +0000102 Optional `unixfrom' when True, means include the Unix From_ envelope
Barry Warsawba925802001-09-23 03:17:28 +0000103 header.
104 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000105 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +0000106 fp = StringIO()
107 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000108 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000109 return fp.getvalue()
110
111 def is_multipart(self):
Barry Warsawc4945492002-09-28 20:40:25 +0000112 """Return True if the message consists of multiple parts."""
Barry Warsaw4ece7782002-09-28 20:41:39 +0000113 if isinstance(self._payload, ListType):
Barry Warsawc4945492002-09-28 20:40:25 +0000114 return True
115 return False
Barry Warsawba925802001-09-23 03:17:28 +0000116
117 #
118 # Unix From_ line
119 #
120 def set_unixfrom(self, unixfrom):
121 self._unixfrom = unixfrom
122
123 def get_unixfrom(self):
124 return self._unixfrom
125
126 #
127 # Payload manipulation.
128 #
129 def add_payload(self, payload):
130 """Add the given payload to the current payload.
131
132 If the current payload is empty, then the current payload will be made
133 a scalar, set to the given value.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000134
135 Note: This method is deprecated. Use .attach() instead.
Barry Warsawba925802001-09-23 03:17:28 +0000136 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000137 warnings.warn('add_payload() is deprecated, use attach() instead.',
138 DeprecationWarning, 2)
Barry Warsawba925802001-09-23 03:17:28 +0000139 if self._payload is None:
140 self._payload = payload
Barry Warsawc4945492002-09-28 20:40:25 +0000141 elif isinstance(self._payload, ListType):
Barry Warsawba925802001-09-23 03:17:28 +0000142 self._payload.append(payload)
143 elif self.get_main_type() not in (None, 'multipart'):
144 raise Errors.MultipartConversionError(
Barry Warsawc4945492002-09-28 20:40:25 +0000145 'Message main content type must be "multipart" or missing')
Barry Warsawba925802001-09-23 03:17:28 +0000146 else:
147 self._payload = [self._payload, payload]
148
Barry Warsaw409a4c02002-04-10 21:01:31 +0000149 def attach(self, payload):
150 """Add the given payload to the current payload.
151
152 The current payload will always be a list of objects after this method
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000153 is called. If you want to set the payload to a scalar object, use
Barry Warsaw409a4c02002-04-10 21:01:31 +0000154 set_payload() instead.
155 """
156 if self._payload is None:
157 self._payload = [payload]
158 else:
159 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000160
Barry Warsawc4945492002-09-28 20:40:25 +0000161 def get_payload(self, i=None, decode=False):
Barry Warsawfbcde752002-09-11 14:11:35 +0000162 """Return a reference to the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000163
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000164 The payload will either be a list object or a string. If you mutate
165 the list object, you modify the message's payload in place. Optional
166 i returns that index into the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000167
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000168 Optional decode is a flag (defaulting to False) indicating whether the
169 payload should be decoded or not, according to the
170 Content-Transfer-Encoding header. When True and the message is not a
171 multipart, the payload will be decoded if this header's value is
172 `quoted-printable' or `base64'. If some other encoding is used, or
Barry Warsaw21191d32003-03-10 16:13:14 +0000173 the header is missing, or if the payload has bogus base64 data, the
174 payload is returned as-is (undecoded).
175
176 If the message is a multipart and the decode flag is True, then None
177 is returned.
Barry Warsawba925802001-09-23 03:17:28 +0000178 """
179 if i is None:
180 payload = self._payload
Barry Warsawc4945492002-09-28 20:40:25 +0000181 elif not isinstance(self._payload, ListType):
Barry Warsawba925802001-09-23 03:17:28 +0000182 raise TypeError, i
183 else:
184 payload = self._payload[i]
185 if decode:
186 if self.is_multipart():
187 return None
188 cte = self.get('content-transfer-encoding', '')
189 if cte.lower() == 'quoted-printable':
190 return Utils._qdecode(payload)
191 elif cte.lower() == 'base64':
Barry Warsaw21191d32003-03-10 16:13:14 +0000192 try:
193 return Utils._bdecode(payload)
194 except binascii.Error:
195 # Incorrect padding
196 return payload
Barry Warsawba925802001-09-23 03:17:28 +0000197 # Everything else, including encodings with 8bit or 7bit are returned
198 # unchanged.
199 return payload
200
Barry Warsaw409a4c02002-04-10 21:01:31 +0000201 def set_payload(self, payload, charset=None):
202 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000203
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000204 Optional charset sets the message's default character set. See
205 set_charset() for details.
206 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000207 self._payload = payload
208 if charset is not None:
209 self.set_charset(charset)
210
211 def set_charset(self, charset):
212 """Set the charset of the payload to a given character set.
213
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000214 charset can be a Charset instance, a string naming a character set, or
215 None. If it is a string it will be converted to a Charset instance.
216 If charset is None, the charset parameter will be removed from the
217 Content-Type field. Anything else will generate a TypeError.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000218
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000219 The message will be assumed to be of type text/* encoded with
Barry Warsaw409a4c02002-04-10 21:01:31 +0000220 charset.input_charset. It will be converted to charset.output_charset
221 and encoded properly, if needed, when generating the plain text
222 representation of the message. MIME headers (MIME-Version,
223 Content-Type, Content-Transfer-Encoding) will be added as needed.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000224
Barry Warsaw409a4c02002-04-10 21:01:31 +0000225 """
226 if charset is None:
227 self.del_param('charset')
228 self._charset = None
229 return
230 if isinstance(charset, StringType):
231 charset = Charset.Charset(charset)
232 if not isinstance(charset, Charset.Charset):
233 raise TypeError, charset
234 # BAW: should we accept strings that can serve as arguments to the
235 # Charset constructor?
236 self._charset = charset
237 if not self.has_key('MIME-Version'):
238 self.add_header('MIME-Version', '1.0')
239 if not self.has_key('Content-Type'):
240 self.add_header('Content-Type', 'text/plain',
241 charset=charset.get_output_charset())
242 else:
243 self.set_param('charset', charset.get_output_charset())
244 if not self.has_key('Content-Transfer-Encoding'):
245 cte = charset.get_body_encoding()
246 if callable(cte):
247 cte(self)
248 else:
249 self.add_header('Content-Transfer-Encoding', cte)
250
251 def get_charset(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000252 """Return the Charset instance associated with the message's payload.
253 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000254 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000255
Barry Warsawba925802001-09-23 03:17:28 +0000256 #
257 # MAPPING INTERFACE (partial)
258 #
259 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000260 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000261 return len(self._headers)
262
263 def __getitem__(self, name):
264 """Get a header value.
265
266 Return None if the header is missing instead of raising an exception.
267
268 Note that if the header appeared multiple times, exactly which
269 occurrance gets returned is undefined. Use getall() to get all
270 the values matching a header field name.
271 """
272 return self.get(name)
273
274 def __setitem__(self, name, val):
275 """Set the value of a header.
276
277 Note: this does not overwrite an existing header with the same field
278 name. Use __delitem__() first to delete any existing headers.
279 """
280 self._headers.append((name, val))
281
282 def __delitem__(self, name):
283 """Delete all occurrences of a header, if present.
284
285 Does not raise an exception if the header is missing.
286 """
287 name = name.lower()
288 newheaders = []
289 for k, v in self._headers:
290 if k.lower() <> name:
291 newheaders.append((k, v))
292 self._headers = newheaders
293
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000294 def __contains__(self, name):
295 return name.lower() in [k.lower() for k, v in self._headers]
Barry Warsawba925802001-09-23 03:17:28 +0000296
297 def has_key(self, name):
298 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000299 missing = []
300 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000301
302 def keys(self):
303 """Return a list of all the message's header field names.
304
305 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000306 message, or were added to the message, and may contain duplicates.
307 Any fields deleted and re-inserted are always appended to the header
308 list.
Barry Warsawba925802001-09-23 03:17:28 +0000309 """
310 return [k for k, v in self._headers]
311
312 def values(self):
313 """Return a list of all the message's header values.
314
315 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000316 message, or were added to the message, and may contain duplicates.
317 Any fields deleted and re-inserted are always appended to the header
318 list.
Barry Warsawba925802001-09-23 03:17:28 +0000319 """
320 return [v for k, v in self._headers]
321
322 def items(self):
323 """Get all the message's header fields and values.
324
325 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000326 message, or were added to the message, and may contain duplicates.
327 Any fields deleted and re-inserted are always appended to the header
328 list.
Barry Warsawba925802001-09-23 03:17:28 +0000329 """
330 return self._headers[:]
331
332 def get(self, name, failobj=None):
333 """Get a header value.
334
335 Like __getitem__() but return failobj instead of None when the field
336 is missing.
337 """
338 name = name.lower()
339 for k, v in self._headers:
340 if k.lower() == name:
341 return v
342 return failobj
343
344 #
345 # Additional useful stuff
346 #
347
348 def get_all(self, name, failobj=None):
349 """Return a list of all the values for the named field.
350
351 These will be sorted in the order they appeared in the original
352 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000353 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000354
355 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000356 """
357 values = []
358 name = name.lower()
359 for k, v in self._headers:
360 if k.lower() == name:
361 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000362 if not values:
363 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000364 return values
365
366 def add_header(self, _name, _value, **_params):
367 """Extended header setting.
368
369 name is the header field to add. keyword arguments can be used to set
370 additional parameters for the header field, with underscores converted
371 to dashes. Normally the parameter will be added as key="value" unless
372 value is None, in which case only the key will be added.
373
374 Example:
375
376 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000377 """
378 parts = []
379 for k, v in _params.items():
380 if v is None:
381 parts.append(k.replace('_', '-'))
382 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000383 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000384 if _value is not None:
385 parts.insert(0, _value)
386 self._headers.append((_name, SEMISPACE.join(parts)))
387
Barry Warsaw229727f2002-09-06 03:38:12 +0000388 def replace_header(self, _name, _value):
389 """Replace a header.
390
391 Replace the first matching header found in the message, retaining
392 header order and case. If no matching header was found, a KeyError is
393 raised.
394 """
395 _name = _name.lower()
396 for i, (k, v) in zip(range(len(self._headers)), self._headers):
397 if k.lower() == _name:
398 self._headers[i] = (k, _value)
399 break
400 else:
401 raise KeyError, _name
402
Barry Warsawc1068642002-07-19 22:24:55 +0000403 #
404 # These methods are silently deprecated in favor of get_content_type() and
405 # friends (see below). They will be noisily deprecated in email 3.0.
406 #
407
Barry Warsawba925802001-09-23 03:17:28 +0000408 def get_type(self, failobj=None):
409 """Returns the message's content type.
410
411 The returned string is coerced to lowercase and returned as a single
Barry Warsawc4945492002-09-28 20:40:25 +0000412 string of the form `maintype/subtype'. If there was no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000413 header in the message, failobj is returned (defaults to None).
414 """
415 missing = []
416 value = self.get('content-type', missing)
417 if value is missing:
418 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000419 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000420
421 def get_main_type(self, failobj=None):
422 """Return the message's main content type if present."""
423 missing = []
424 ctype = self.get_type(missing)
425 if ctype is missing:
426 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000427 if ctype.count('/') <> 1:
428 return failobj
429 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000430
431 def get_subtype(self, failobj=None):
432 """Return the message's content subtype if present."""
433 missing = []
434 ctype = self.get_type(missing)
435 if ctype is missing:
436 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000437 if ctype.count('/') <> 1:
438 return failobj
439 return ctype.split('/')[1]
440
441 #
442 # Use these three methods instead of the three above.
443 #
444
445 def get_content_type(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000446 """Return the message's content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000447
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000448 The returned string is coerced to lower case of the form
449 `maintype/subtype'. If there was no Content-Type header in the
450 message, the default type as given by get_default_type() will be
451 returned. Since according to RFC 2045, messages always have a default
452 type this will always return a value.
Barry Warsawc1068642002-07-19 22:24:55 +0000453
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000454 RFC 2045 defines a message's default type to be text/plain unless it
455 appears inside a multipart/digest container, in which case it would be
456 message/rfc822.
Barry Warsawc1068642002-07-19 22:24:55 +0000457 """
458 missing = []
459 value = self.get('content-type', missing)
460 if value is missing:
461 # This should have no parameters
462 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000463 ctype = paramre.split(value)[0].lower().strip()
464 # RFC 2045, section 5.2 says if its invalid, use text/plain
465 if ctype.count('/') <> 1:
466 return 'text/plain'
467 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000468
469 def get_content_maintype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000470 """Return the message's main content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000471
472 This is the `maintype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000473 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000474 """
475 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000476 return ctype.split('/')[0]
477
478 def get_content_subtype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000479 """Returns the message's sub-content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000480
481 This is the `subtype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000482 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000483 """
484 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000485 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000486
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000487 def get_default_type(self):
488 """Return the `default' content type.
489
490 Most messages have a default content type of text/plain, except for
491 messages that are subparts of multipart/digest containers. Such
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000492 subparts have a default content type of message/rfc822.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000493 """
494 return self._default_type
495
496 def set_default_type(self, ctype):
497 """Set the `default' content type.
498
Barry Warsawc1068642002-07-19 22:24:55 +0000499 ctype should be either "text/plain" or "message/rfc822", although this
500 is not enforced. The default content type is not stored in the
Barry Warsawc4945492002-09-28 20:40:25 +0000501 Content-Type header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000502 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000503 self._default_type = ctype
504
Barry Warsawbeb59452001-09-26 05:41:51 +0000505 def _get_params_preserve(self, failobj, header):
506 # Like get_params() but preserves the quoting of values. BAW:
507 # should this be part of the public interface?
508 missing = []
509 value = self.get(header, missing)
510 if value is missing:
511 return failobj
512 params = []
513 for p in paramre.split(value):
514 try:
515 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000516 name = name.strip()
517 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000518 except ValueError:
519 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000520 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000521 val = ''
522 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000523 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000524 return params
525
Barry Warsawc4945492002-09-28 20:40:25 +0000526 def get_params(self, failobj=None, header='content-type', unquote=True):
527 """Return the message's Content-Type parameters, as a list.
Barry Warsawba925802001-09-23 03:17:28 +0000528
Barry Warsawbeb59452001-09-26 05:41:51 +0000529 The elements of the returned list are 2-tuples of key/value pairs, as
530 split on the `=' sign. The left hand side of the `=' is the key,
531 while the right hand side is the value. If there is no `=' sign in
Barry Warsaw15aefa92002-09-26 17:19:34 +0000532 the parameter the value is the empty string. The value is as
533 described in the get_param() method.
Barry Warsawbeb59452001-09-26 05:41:51 +0000534
Barry Warsawc4945492002-09-28 20:40:25 +0000535 Optional failobj is the object to return if there is no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000536 header. Optional header is the header to search instead of
Barry Warsawc4945492002-09-28 20:40:25 +0000537 Content-Type. If unquote is True, the value is unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000538 """
539 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000540 params = self._get_params_preserve(missing, header)
541 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000542 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000543 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000544 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000545 else:
546 return params
Barry Warsawba925802001-09-23 03:17:28 +0000547
Barry Warsawc4945492002-09-28 20:40:25 +0000548 def get_param(self, param, failobj=None, header='content-type',
549 unquote=True):
550 """Return the parameter value if found in the Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000551
Barry Warsawc4945492002-09-28 20:40:25 +0000552 Optional failobj is the object to return if there is no Content-Type
Barry Warsaw15aefa92002-09-26 17:19:34 +0000553 header, or the Content-Type header has no such parameter. Optional
Barry Warsawc4945492002-09-28 20:40:25 +0000554 header is the header to search instead of Content-Type.
Barry Warsawbeb59452001-09-26 05:41:51 +0000555
Barry Warsaw15aefa92002-09-26 17:19:34 +0000556 Parameter keys are always compared case insensitively. The return
557 value can either be a string, or a 3-tuple if the parameter was RFC
558 2231 encoded. When it's a 3-tuple, the elements of the value are of
559 the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
560 string. Your application should be prepared to deal with these, and
561 can convert the parameter to a Unicode string like so:
562
563 param = msg.get_param('foo')
564 if isinstance(param, tuple):
565 param = unicode(param[2], param[0])
566
567 In any case, the parameter value (either the returned string, or the
568 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
Barry Warsawc4945492002-09-28 20:40:25 +0000569 to False.
Barry Warsawba925802001-09-23 03:17:28 +0000570 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000571 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000572 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000573 for k, v in self._get_params_preserve(failobj, header):
574 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000575 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000576 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000577 else:
578 return v
Barry Warsawba925802001-09-23 03:17:28 +0000579 return failobj
580
Barry Warsawc4945492002-09-28 20:40:25 +0000581 def set_param(self, param, value, header='Content-Type', requote=True,
Barry Warsaw3c255352002-09-06 03:55:04 +0000582 charset=None, language=''):
Barry Warsawc4945492002-09-28 20:40:25 +0000583 """Set a parameter in the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000584
585 If the parameter already exists in the header, its value will be
586 replaced with the new value.
587
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000588 If header is Content-Type and has not yet been defined for this
Barry Warsaw409a4c02002-04-10 21:01:31 +0000589 message, it will be set to "text/plain" and the new parameter and
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000590 value will be appended as per RFC 2045.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000591
Barry Warsawc4945492002-09-28 20:40:25 +0000592 An alternate header can specified in the header argument, and all
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000593 parameters will be quoted as necessary unless requote is False.
Barry Warsaw3c255352002-09-06 03:55:04 +0000594
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000595 If charset is specified, the parameter will be encoded according to RFC
596 2231. Optional language specifies the RFC 2231 language, defaulting
597 to the empty string. Both charset and language should be strings.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000598 """
Barry Warsaw3c255352002-09-06 03:55:04 +0000599 if not isinstance(value, TupleType) and charset:
600 value = (charset, language, value)
601
Barry Warsaw409a4c02002-04-10 21:01:31 +0000602 if not self.has_key(header) and header.lower() == 'content-type':
603 ctype = 'text/plain'
604 else:
605 ctype = self.get(header)
606 if not self.get_param(param, header=header):
607 if not ctype:
608 ctype = _formatparam(param, value, requote)
609 else:
610 ctype = SEMISPACE.join(
611 [ctype, _formatparam(param, value, requote)])
612 else:
613 ctype = ''
614 for old_param, old_value in self.get_params(header=header,
615 unquote=requote):
616 append_param = ''
617 if old_param.lower() == param.lower():
618 append_param = _formatparam(param, value, requote)
619 else:
620 append_param = _formatparam(old_param, old_value, requote)
621 if not ctype:
622 ctype = append_param
623 else:
624 ctype = SEMISPACE.join([ctype, append_param])
625 if ctype <> self.get(header):
626 del self[header]
627 self[header] = ctype
628
Barry Warsawc4945492002-09-28 20:40:25 +0000629 def del_param(self, param, header='content-type', requote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000630 """Remove the given parameter completely from the Content-Type header.
631
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000632 The header will be re-written in place without the parameter or its
633 value. All values will be quoted as necessary unless requote is
634 False. Optional header specifies an alternative to the Content-Type
635 header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000636 """
637 if not self.has_key(header):
638 return
639 new_ctype = ''
640 for p, v in self.get_params(header, unquote=requote):
641 if p.lower() <> param.lower():
642 if not new_ctype:
643 new_ctype = _formatparam(p, v, requote)
644 else:
645 new_ctype = SEMISPACE.join([new_ctype,
646 _formatparam(p, v, requote)])
647 if new_ctype <> self.get(header):
648 del self[header]
649 self[header] = new_ctype
650
Barry Warsawc4945492002-09-28 20:40:25 +0000651 def set_type(self, type, header='Content-Type', requote=True):
652 """Set the main type and subtype for the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000653
654 type must be a string in the form "maintype/subtype", otherwise a
655 ValueError is raised.
656
Barry Warsawc4945492002-09-28 20:40:25 +0000657 This method replaces the Content-Type header, keeping all the
658 parameters in place. If requote is False, this leaves the existing
Barry Warsaw409a4c02002-04-10 21:01:31 +0000659 header's quoting as is. Otherwise, the parameters will be quoted (the
660 default).
661
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000662 An alternative header can be specified in the header argument. When
663 the Content-Type header is set, we'll always also add a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000664 header.
665 """
666 # BAW: should we be strict?
667 if not type.count('/') == 1:
668 raise ValueError
Barry Warsawc4945492002-09-28 20:40:25 +0000669 # Set the Content-Type, you get a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000670 if header.lower() == 'content-type':
671 del self['mime-version']
672 self['MIME-Version'] = '1.0'
673 if not self.has_key(header):
674 self[header] = type
675 return
676 params = self.get_params(header, unquote=requote)
677 del self[header]
678 self[header] = type
679 # Skip the first param; it's the old type.
680 for p, v in params[1:]:
681 self.set_param(p, v, header, requote)
682
Barry Warsawba925802001-09-23 03:17:28 +0000683 def get_filename(self, failobj=None):
684 """Return the filename associated with the payload if present.
685
Barry Warsawc4945492002-09-28 20:40:25 +0000686 The filename is extracted from the Content-Disposition header's
Barry Warsawba925802001-09-23 03:17:28 +0000687 `filename' parameter, and it is unquoted.
688 """
689 missing = []
690 filename = self.get_param('filename', missing, 'content-disposition')
691 if filename is missing:
692 return failobj
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000693 if isinstance(filename, TupleType):
694 # It's an RFC 2231 encoded parameter
695 newvalue = _unquotevalue(filename)
696 return unicode(newvalue[2], newvalue[0])
697 else:
698 newvalue = _unquotevalue(filename.strip())
699 return newvalue
Barry Warsawba925802001-09-23 03:17:28 +0000700
701 def get_boundary(self, failobj=None):
702 """Return the boundary associated with the payload if present.
703
Barry Warsawc4945492002-09-28 20:40:25 +0000704 The boundary is extracted from the Content-Type header's `boundary'
Barry Warsawba925802001-09-23 03:17:28 +0000705 parameter, and it is unquoted.
706 """
707 missing = []
708 boundary = self.get_param('boundary', missing)
709 if boundary is missing:
710 return failobj
Barry Warsaw15aefa92002-09-26 17:19:34 +0000711 if isinstance(boundary, TupleType):
712 # RFC 2231 encoded, so decode. It better end up as ascii
713 return unicode(boundary[2], boundary[0]).encode('us-ascii')
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000714 return _unquotevalue(boundary.strip())
Barry Warsawba925802001-09-23 03:17:28 +0000715
716 def set_boundary(self, boundary):
Barry Warsawc4945492002-09-28 20:40:25 +0000717 """Set the boundary parameter in Content-Type to 'boundary'.
Barry Warsawba925802001-09-23 03:17:28 +0000718
Barry Warsawc4945492002-09-28 20:40:25 +0000719 This is subtly different than deleting the Content-Type header and
Barry Warsawba925802001-09-23 03:17:28 +0000720 adding a new one with a new boundary parameter via add_header(). The
721 main difference is that using the set_boundary() method preserves the
Barry Warsawc4945492002-09-28 20:40:25 +0000722 order of the Content-Type header in the original message.
Barry Warsawba925802001-09-23 03:17:28 +0000723
Barry Warsawc4945492002-09-28 20:40:25 +0000724 HeaderParseError is raised if the message has no Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000725 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000726 missing = []
727 params = self._get_params_preserve(missing, 'content-type')
728 if params is missing:
Barry Warsawc4945492002-09-28 20:40:25 +0000729 # There was no Content-Type header, and we don't know what type
Barry Warsawba925802001-09-23 03:17:28 +0000730 # to set it to, so raise an exception.
Barry Warsawc4945492002-09-28 20:40:25 +0000731 raise Errors.HeaderParseError, 'No Content-Type header found'
Barry Warsawba925802001-09-23 03:17:28 +0000732 newparams = []
Barry Warsawc4945492002-09-28 20:40:25 +0000733 foundp = False
Barry Warsawbeb59452001-09-26 05:41:51 +0000734 for pk, pv in params:
735 if pk.lower() == 'boundary':
736 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000737 foundp = True
Barry Warsawba925802001-09-23 03:17:28 +0000738 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000739 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000740 if not foundp:
Barry Warsawc4945492002-09-28 20:40:25 +0000741 # The original Content-Type header had no boundary attribute.
Barry Warsawba925802001-09-23 03:17:28 +0000742 # Tack one one the end. BAW: should we raise an exception
743 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000744 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000745 # Replace the existing Content-Type header with the new value
Barry Warsawba925802001-09-23 03:17:28 +0000746 newheaders = []
747 for h, v in self._headers:
748 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000749 parts = []
750 for k, v in newparams:
751 if v == '':
752 parts.append(k)
753 else:
754 parts.append('%s=%s' % (k, v))
755 newheaders.append((h, SEMISPACE.join(parts)))
756
Barry Warsawba925802001-09-23 03:17:28 +0000757 else:
758 newheaders.append((h, v))
759 self._headers = newheaders
760
Barry Warsaw8c1aac22002-05-19 23:44:19 +0000761 try:
762 from email._compat22 import walk
763 except SyntaxError:
764 # Must be using Python 2.1
765 from email._compat21 import walk
Barry Warsawba925802001-09-23 03:17:28 +0000766
Barry Warsaw15aefa92002-09-26 17:19:34 +0000767 def get_content_charset(self, failobj=None):
768 """Return the charset parameter of the Content-Type header.
769
Barry Warsawee07cb12002-10-10 15:13:26 +0000770 The returned string is always coerced to lower case. If there is no
771 Content-Type header, or if that header has no charset parameter,
772 failobj is returned.
Barry Warsaw15aefa92002-09-26 17:19:34 +0000773 """
774 missing = []
775 charset = self.get_param('charset', missing)
776 if charset is missing:
777 return failobj
778 if isinstance(charset, TupleType):
779 # RFC 2231 encoded, so decode it, and it better end up as ascii.
Barry Warsawee07cb12002-10-10 15:13:26 +0000780 charset = unicode(charset[2], charset[0]).encode('us-ascii')
781 # RFC 2046, $4.1.2 says charsets are not case sensitive
782 return charset.lower()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000783
Barry Warsawba925802001-09-23 03:17:28 +0000784 def get_charsets(self, failobj=None):
785 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000786
Barry Warsawc4945492002-09-28 20:40:25 +0000787 The returned list of items describes the Content-Type headers'
Barry Warsawba925802001-09-23 03:17:28 +0000788 charset parameter for this message and all the subparts in its
789 payload.
790
791 Each item will either be a string (the value of the charset parameter
Barry Warsawc4945492002-09-28 20:40:25 +0000792 in the Content-Type header of that part) or the value of the
Barry Warsawba925802001-09-23 03:17:28 +0000793 'failobj' parameter (defaults to None), if the part does not have a
794 main MIME type of "text", or the charset is not defined.
795
796 The list will contain one string for each part of the message, plus
797 one for the container message (i.e. self), so that a non-multipart
798 message will still return a list of length 1.
799 """
Barry Warsaw15aefa92002-09-26 17:19:34 +0000800 return [part.get_content_charset(failobj) for part in self.walk()]