blob: 2245f9b8340bd81881996a609627ec8ddb1cd2b4 [file] [log] [blame]
Barry Warsaw5d840532004-05-09 03:44:55 +00001# Copyright (C) 2001-2004 Python Software Foundation
2# Author: barry@python.org (Barry Warsaw)
Barry Warsawba925802001-09-23 03:17:28 +00003
Barry Warsaw5d840532004-05-09 03:44:55 +00004"""Basic message object for the email package object model."""
Barry Warsawba925802001-09-23 03:17:28 +00005
Barry Warsawba925802001-09-23 03:17:28 +00006import re
Barry Warsaw08898492003-03-11 04:33:30 +00007import uu
Barry Warsaw21191d32003-03-10 16:13:14 +00008import binascii
Barry Warsaw409a4c02002-04-10 21:01:31 +00009import warnings
Barry Warsawba925802001-09-23 03:17:28 +000010from cStringIO import StringIO
Barry Warsawba925802001-09-23 03:17:28 +000011
Barry Warsawba925802001-09-23 03:17:28 +000012# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000013from email import Utils
Barry Warsaw21191d32003-03-10 16:13:14 +000014from email import Errors
Barry Warsaw8ba76e82002-06-02 19:05:51 +000015from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000016
Barry Warsawbeb59452001-09-26 05:41:51 +000017SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000018
19# Regular expression used to split header parameters. BAW: this may be too
20# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
21# most headers found in the wild. We may eventually need a full fledged
22# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000023paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000024# Regular expression that matches `special' characters in parameters, the
25# existance of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29
Barry Warsaw908dc4b2002-06-29 05:56:15 +000030# Helper functions
Barry Warsawc4945492002-09-28 20:40:25 +000031def _formatparam(param, value=None, quote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +000032 """Convenience function to format and return a key=value pair.
33
Barry Warsaw908dc4b2002-06-29 05:56:15 +000034 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000035 """
36 if value is not None and len(value) > 0:
Barry Warsaw5d840532004-05-09 03:44:55 +000037 # A tuple is used for RFC 2231 encoded parameter values where items
Barry Warsaw908dc4b2002-06-29 05:56:15 +000038 # are (charset, language, value). charset is a string, not a Charset
39 # instance.
Barry Warsaw5d840532004-05-09 03:44:55 +000040 if isinstance(value, tuple):
Barry Warsaw3c255352002-09-06 03:55:04 +000041 # Encode as per RFC 2231
42 param += '*'
43 value = Utils.encode_rfc2231(value[2], value[0], value[1])
Barry Warsaw409a4c02002-04-10 21:01:31 +000044 # BAW: Please check this. I think that if quote is set it should
45 # force quoting even if not necessary.
46 if quote or tspecials.search(value):
47 return '%s="%s"' % (param, Utils.quote(value))
48 else:
49 return '%s=%s' % (param, value)
50 else:
51 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000052
Barry Warsawa74e8682003-09-03 04:08:13 +000053def _parseparam(s):
54 plist = []
55 while s[:1] == ';':
56 s = s[1:]
57 end = s.find(';')
58 while end > 0 and s.count('"', 0, end) % 2:
59 end = s.find(';', end + 1)
60 if end < 0:
61 end = len(s)
62 f = s[:end]
63 if '=' in f:
64 i = f.index('=')
65 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
66 plist.append(f.strip())
67 s = s[end:]
68 return plist
69
Barry Warsawba925802001-09-23 03:17:28 +000070
Barry Warsaw908dc4b2002-06-29 05:56:15 +000071def _unquotevalue(value):
Barry Warsaw5d840532004-05-09 03:44:55 +000072 if isinstance(value, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +000073 return value[0], value[1], Utils.unquote(value[2])
Barry Warsaw908dc4b2002-06-29 05:56:15 +000074 else:
Tim Peters280488b2002-08-23 18:19:30 +000075 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000076
77
Barry Warsaw48b0d362002-08-27 22:34:44 +000078
Barry Warsawba925802001-09-23 03:17:28 +000079class Message:
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000080 """Basic message object.
Barry Warsawba925802001-09-23 03:17:28 +000081
82 A message object is defined as something that has a bunch of RFC 2822
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000083 headers and a payload. It may optionally have an envelope header
84 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
85 multipart or a message/rfc822), then the payload is a list of Message
86 objects, otherwise it is a string.
Barry Warsawba925802001-09-23 03:17:28 +000087
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000088 Message objects implement part of the `mapping' interface, which assumes
Barry Warsawba925802001-09-23 03:17:28 +000089 there is exactly one occurrance of the header per message. Some headers
Barry Warsawc4945492002-09-28 20:40:25 +000090 do in fact appear multiple times (e.g. Received) and for those headers,
Barry Warsawba925802001-09-23 03:17:28 +000091 you must use the explicit API to set or get all the headers. Not all of
92 the mapping methods are implemented.
Barry Warsawba925802001-09-23 03:17:28 +000093 """
94 def __init__(self):
95 self._headers = []
96 self._unixfrom = None
97 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +000098 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +000099 # Defaults for multipart messages
100 self.preamble = self.epilogue = None
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000101 # Default content type
102 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +0000103
104 def __str__(self):
105 """Return the entire formatted message as a string.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000106 This includes the headers, body, and envelope header.
Barry Warsawba925802001-09-23 03:17:28 +0000107 """
Barry Warsawc4945492002-09-28 20:40:25 +0000108 return self.as_string(unixfrom=True)
Barry Warsawba925802001-09-23 03:17:28 +0000109
Barry Warsawc4945492002-09-28 20:40:25 +0000110 def as_string(self, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +0000111 """Return the entire formatted message as a string.
Barry Warsawc4945492002-09-28 20:40:25 +0000112 Optional `unixfrom' when True, means include the Unix From_ envelope
Barry Warsawba925802001-09-23 03:17:28 +0000113 header.
Barry Warsaw482c5f72003-04-18 23:04:35 +0000114
115 This is a convenience method and may not generate the message exactly
116 as you intend. For more flexibility, use the flatten() method of a
117 Generator instance.
Barry Warsawba925802001-09-23 03:17:28 +0000118 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000119 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +0000120 fp = StringIO()
121 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000122 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000123 return fp.getvalue()
124
125 def is_multipart(self):
Barry Warsawc4945492002-09-28 20:40:25 +0000126 """Return True if the message consists of multiple parts."""
Barry Warsaw5d840532004-05-09 03:44:55 +0000127 if isinstance(self._payload, list):
Barry Warsawc4945492002-09-28 20:40:25 +0000128 return True
129 return False
Barry Warsawba925802001-09-23 03:17:28 +0000130
131 #
132 # Unix From_ line
133 #
134 def set_unixfrom(self, unixfrom):
135 self._unixfrom = unixfrom
136
137 def get_unixfrom(self):
138 return self._unixfrom
139
140 #
141 # Payload manipulation.
142 #
143 def add_payload(self, payload):
144 """Add the given payload to the current payload.
145
146 If the current payload is empty, then the current payload will be made
147 a scalar, set to the given value.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000148
149 Note: This method is deprecated. Use .attach() instead.
Barry Warsawba925802001-09-23 03:17:28 +0000150 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000151 warnings.warn('add_payload() is deprecated, use attach() instead.',
152 DeprecationWarning, 2)
Barry Warsawba925802001-09-23 03:17:28 +0000153 if self._payload is None:
154 self._payload = payload
Barry Warsaw5d840532004-05-09 03:44:55 +0000155 elif isinstance(self._payload, list):
Barry Warsawba925802001-09-23 03:17:28 +0000156 self._payload.append(payload)
157 elif self.get_main_type() not in (None, 'multipart'):
158 raise Errors.MultipartConversionError(
Barry Warsawc4945492002-09-28 20:40:25 +0000159 'Message main content type must be "multipart" or missing')
Barry Warsawba925802001-09-23 03:17:28 +0000160 else:
161 self._payload = [self._payload, payload]
162
Barry Warsaw409a4c02002-04-10 21:01:31 +0000163 def attach(self, payload):
164 """Add the given payload to the current payload.
165
166 The current payload will always be a list of objects after this method
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000167 is called. If you want to set the payload to a scalar object, use
Barry Warsaw409a4c02002-04-10 21:01:31 +0000168 set_payload() instead.
169 """
170 if self._payload is None:
171 self._payload = [payload]
172 else:
173 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000174
Barry Warsawc4945492002-09-28 20:40:25 +0000175 def get_payload(self, i=None, decode=False):
Barry Warsawfbcde752002-09-11 14:11:35 +0000176 """Return a reference to the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000177
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000178 The payload will either be a list object or a string. If you mutate
179 the list object, you modify the message's payload in place. Optional
180 i returns that index into the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000181
Barry Warsaw08898492003-03-11 04:33:30 +0000182 Optional decode is a flag indicating whether the payload should be
183 decoded or not, according to the Content-Transfer-Encoding header
184 (default is False).
185
186 When True and the message is not a multipart, the payload will be
187 decoded if this header's value is `quoted-printable' or `base64'. If
188 some other encoding is used, or the header is missing, or if the
189 payload has bogus data (i.e. bogus base64 or uuencoded data), the
190 payload is returned as-is.
Barry Warsaw21191d32003-03-10 16:13:14 +0000191
192 If the message is a multipart and the decode flag is True, then None
193 is returned.
Barry Warsawba925802001-09-23 03:17:28 +0000194 """
195 if i is None:
196 payload = self._payload
Barry Warsaw5d840532004-05-09 03:44:55 +0000197 elif not isinstance(self._payload, list):
Barry Warsaw6754d522003-06-10 16:31:55 +0000198 raise TypeError, 'Expected list, got %s' % type(self._payload)
Barry Warsawba925802001-09-23 03:17:28 +0000199 else:
200 payload = self._payload[i]
201 if decode:
202 if self.is_multipart():
203 return None
Barry Warsaw08898492003-03-11 04:33:30 +0000204 cte = self.get('content-transfer-encoding', '').lower()
205 if cte == 'quoted-printable':
Barry Warsawba925802001-09-23 03:17:28 +0000206 return Utils._qdecode(payload)
Barry Warsaw08898492003-03-11 04:33:30 +0000207 elif cte == 'base64':
Barry Warsaw21191d32003-03-10 16:13:14 +0000208 try:
209 return Utils._bdecode(payload)
210 except binascii.Error:
211 # Incorrect padding
212 return payload
Barry Warsaw08898492003-03-11 04:33:30 +0000213 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
214 sfp = StringIO()
215 try:
216 uu.decode(StringIO(payload+'\n'), sfp)
217 payload = sfp.getvalue()
218 except uu.Error:
219 # Some decoding problem
220 return payload
Barry Warsawba925802001-09-23 03:17:28 +0000221 # Everything else, including encodings with 8bit or 7bit are returned
222 # unchanged.
223 return payload
224
Barry Warsaw409a4c02002-04-10 21:01:31 +0000225 def set_payload(self, payload, charset=None):
226 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000227
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000228 Optional charset sets the message's default character set. See
229 set_charset() for details.
230 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000231 self._payload = payload
232 if charset is not None:
233 self.set_charset(charset)
234
235 def set_charset(self, charset):
236 """Set the charset of the payload to a given character set.
237
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000238 charset can be a Charset instance, a string naming a character set, or
239 None. If it is a string it will be converted to a Charset instance.
240 If charset is None, the charset parameter will be removed from the
241 Content-Type field. Anything else will generate a TypeError.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000242
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000243 The message will be assumed to be of type text/* encoded with
Barry Warsaw409a4c02002-04-10 21:01:31 +0000244 charset.input_charset. It will be converted to charset.output_charset
245 and encoded properly, if needed, when generating the plain text
246 representation of the message. MIME headers (MIME-Version,
247 Content-Type, Content-Transfer-Encoding) will be added as needed.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000248
Barry Warsaw409a4c02002-04-10 21:01:31 +0000249 """
250 if charset is None:
251 self.del_param('charset')
252 self._charset = None
253 return
Barry Warsaw5d840532004-05-09 03:44:55 +0000254 if isinstance(charset, str):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000255 charset = Charset.Charset(charset)
256 if not isinstance(charset, Charset.Charset):
257 raise TypeError, charset
258 # BAW: should we accept strings that can serve as arguments to the
259 # Charset constructor?
260 self._charset = charset
261 if not self.has_key('MIME-Version'):
262 self.add_header('MIME-Version', '1.0')
263 if not self.has_key('Content-Type'):
264 self.add_header('Content-Type', 'text/plain',
265 charset=charset.get_output_charset())
266 else:
267 self.set_param('charset', charset.get_output_charset())
268 if not self.has_key('Content-Transfer-Encoding'):
269 cte = charset.get_body_encoding()
270 if callable(cte):
271 cte(self)
272 else:
273 self.add_header('Content-Transfer-Encoding', cte)
274
275 def get_charset(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000276 """Return the Charset instance associated with the message's payload.
277 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000278 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000279
Barry Warsawba925802001-09-23 03:17:28 +0000280 #
281 # MAPPING INTERFACE (partial)
282 #
283 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000284 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000285 return len(self._headers)
286
287 def __getitem__(self, name):
288 """Get a header value.
289
290 Return None if the header is missing instead of raising an exception.
291
292 Note that if the header appeared multiple times, exactly which
293 occurrance gets returned is undefined. Use getall() to get all
294 the values matching a header field name.
295 """
296 return self.get(name)
297
298 def __setitem__(self, name, val):
299 """Set the value of a header.
300
301 Note: this does not overwrite an existing header with the same field
302 name. Use __delitem__() first to delete any existing headers.
303 """
304 self._headers.append((name, val))
305
306 def __delitem__(self, name):
307 """Delete all occurrences of a header, if present.
308
309 Does not raise an exception if the header is missing.
310 """
311 name = name.lower()
312 newheaders = []
313 for k, v in self._headers:
314 if k.lower() <> name:
315 newheaders.append((k, v))
316 self._headers = newheaders
317
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000318 def __contains__(self, name):
319 return name.lower() in [k.lower() for k, v in self._headers]
Barry Warsawba925802001-09-23 03:17:28 +0000320
321 def has_key(self, name):
322 """Return true if the message contains the header."""
Barry Warsawbeb59452001-09-26 05:41:51 +0000323 missing = []
324 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000325
326 def keys(self):
327 """Return a list of all the message's header field names.
328
329 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000330 message, or were added to the message, and may contain duplicates.
331 Any fields deleted and re-inserted are always appended to the header
332 list.
Barry Warsawba925802001-09-23 03:17:28 +0000333 """
334 return [k for k, v in self._headers]
335
336 def values(self):
337 """Return a list of all the message's header values.
338
339 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000340 message, or were added to the message, and may contain duplicates.
341 Any fields deleted and re-inserted are always appended to the header
342 list.
Barry Warsawba925802001-09-23 03:17:28 +0000343 """
344 return [v for k, v in self._headers]
345
346 def items(self):
347 """Get all the message's header fields and values.
348
349 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000350 message, or were added to the message, and may contain duplicates.
351 Any fields deleted and re-inserted are always appended to the header
352 list.
Barry Warsawba925802001-09-23 03:17:28 +0000353 """
354 return self._headers[:]
355
356 def get(self, name, failobj=None):
357 """Get a header value.
358
359 Like __getitem__() but return failobj instead of None when the field
360 is missing.
361 """
362 name = name.lower()
363 for k, v in self._headers:
364 if k.lower() == name:
365 return v
366 return failobj
367
368 #
369 # Additional useful stuff
370 #
371
372 def get_all(self, name, failobj=None):
373 """Return a list of all the values for the named field.
374
375 These will be sorted in the order they appeared in the original
376 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000377 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000378
379 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000380 """
381 values = []
382 name = name.lower()
383 for k, v in self._headers:
384 if k.lower() == name:
385 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000386 if not values:
387 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000388 return values
389
390 def add_header(self, _name, _value, **_params):
391 """Extended header setting.
392
393 name is the header field to add. keyword arguments can be used to set
394 additional parameters for the header field, with underscores converted
395 to dashes. Normally the parameter will be added as key="value" unless
396 value is None, in which case only the key will be added.
397
398 Example:
399
400 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000401 """
402 parts = []
403 for k, v in _params.items():
404 if v is None:
405 parts.append(k.replace('_', '-'))
406 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000407 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000408 if _value is not None:
409 parts.insert(0, _value)
410 self._headers.append((_name, SEMISPACE.join(parts)))
411
Barry Warsaw229727f2002-09-06 03:38:12 +0000412 def replace_header(self, _name, _value):
413 """Replace a header.
414
415 Replace the first matching header found in the message, retaining
416 header order and case. If no matching header was found, a KeyError is
417 raised.
418 """
419 _name = _name.lower()
420 for i, (k, v) in zip(range(len(self._headers)), self._headers):
421 if k.lower() == _name:
422 self._headers[i] = (k, _value)
423 break
424 else:
425 raise KeyError, _name
426
Barry Warsawc1068642002-07-19 22:24:55 +0000427 #
428 # These methods are silently deprecated in favor of get_content_type() and
429 # friends (see below). They will be noisily deprecated in email 3.0.
430 #
431
Barry Warsawba925802001-09-23 03:17:28 +0000432 def get_type(self, failobj=None):
433 """Returns the message's content type.
434
435 The returned string is coerced to lowercase and returned as a single
Barry Warsawc4945492002-09-28 20:40:25 +0000436 string of the form `maintype/subtype'. If there was no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000437 header in the message, failobj is returned (defaults to None).
438 """
439 missing = []
440 value = self.get('content-type', missing)
441 if value is missing:
442 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000443 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000444
445 def get_main_type(self, failobj=None):
446 """Return the message's main content type if present."""
447 missing = []
448 ctype = self.get_type(missing)
449 if ctype is missing:
450 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000451 if ctype.count('/') <> 1:
452 return failobj
453 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000454
455 def get_subtype(self, failobj=None):
456 """Return the message's content subtype if present."""
457 missing = []
458 ctype = self.get_type(missing)
459 if ctype is missing:
460 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000461 if ctype.count('/') <> 1:
462 return failobj
463 return ctype.split('/')[1]
464
465 #
466 # Use these three methods instead of the three above.
467 #
468
469 def get_content_type(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000470 """Return the message's content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000471
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000472 The returned string is coerced to lower case of the form
473 `maintype/subtype'. If there was no Content-Type header in the
474 message, the default type as given by get_default_type() will be
475 returned. Since according to RFC 2045, messages always have a default
476 type this will always return a value.
Barry Warsawc1068642002-07-19 22:24:55 +0000477
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000478 RFC 2045 defines a message's default type to be text/plain unless it
479 appears inside a multipart/digest container, in which case it would be
480 message/rfc822.
Barry Warsawc1068642002-07-19 22:24:55 +0000481 """
482 missing = []
483 value = self.get('content-type', missing)
484 if value is missing:
485 # This should have no parameters
486 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000487 ctype = paramre.split(value)[0].lower().strip()
488 # RFC 2045, section 5.2 says if its invalid, use text/plain
489 if ctype.count('/') <> 1:
490 return 'text/plain'
491 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000492
493 def get_content_maintype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000494 """Return the message's main content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000495
496 This is the `maintype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000497 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000498 """
499 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000500 return ctype.split('/')[0]
501
502 def get_content_subtype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000503 """Returns the message's sub-content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000504
505 This is the `subtype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000506 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000507 """
508 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000509 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000510
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000511 def get_default_type(self):
512 """Return the `default' content type.
513
514 Most messages have a default content type of text/plain, except for
515 messages that are subparts of multipart/digest containers. Such
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000516 subparts have a default content type of message/rfc822.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000517 """
518 return self._default_type
519
520 def set_default_type(self, ctype):
521 """Set the `default' content type.
522
Barry Warsawc1068642002-07-19 22:24:55 +0000523 ctype should be either "text/plain" or "message/rfc822", although this
524 is not enforced. The default content type is not stored in the
Barry Warsawc4945492002-09-28 20:40:25 +0000525 Content-Type header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000526 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000527 self._default_type = ctype
528
Barry Warsawbeb59452001-09-26 05:41:51 +0000529 def _get_params_preserve(self, failobj, header):
530 # Like get_params() but preserves the quoting of values. BAW:
531 # should this be part of the public interface?
532 missing = []
533 value = self.get(header, missing)
534 if value is missing:
535 return failobj
536 params = []
Barry Warsawa74e8682003-09-03 04:08:13 +0000537 for p in _parseparam(';' + value):
Barry Warsawbeb59452001-09-26 05:41:51 +0000538 try:
539 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000540 name = name.strip()
541 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000542 except ValueError:
543 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000544 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000545 val = ''
546 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000547 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000548 return params
549
Barry Warsawc4945492002-09-28 20:40:25 +0000550 def get_params(self, failobj=None, header='content-type', unquote=True):
551 """Return the message's Content-Type parameters, as a list.
Barry Warsawba925802001-09-23 03:17:28 +0000552
Barry Warsawbeb59452001-09-26 05:41:51 +0000553 The elements of the returned list are 2-tuples of key/value pairs, as
554 split on the `=' sign. The left hand side of the `=' is the key,
555 while the right hand side is the value. If there is no `=' sign in
Barry Warsaw15aefa92002-09-26 17:19:34 +0000556 the parameter the value is the empty string. The value is as
557 described in the get_param() method.
Barry Warsawbeb59452001-09-26 05:41:51 +0000558
Barry Warsawc4945492002-09-28 20:40:25 +0000559 Optional failobj is the object to return if there is no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000560 header. Optional header is the header to search instead of
Barry Warsawc4945492002-09-28 20:40:25 +0000561 Content-Type. If unquote is True, the value is unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000562 """
563 missing = []
Barry Warsawbeb59452001-09-26 05:41:51 +0000564 params = self._get_params_preserve(missing, header)
565 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000566 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000567 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000568 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000569 else:
570 return params
Barry Warsawba925802001-09-23 03:17:28 +0000571
Barry Warsawc4945492002-09-28 20:40:25 +0000572 def get_param(self, param, failobj=None, header='content-type',
573 unquote=True):
574 """Return the parameter value if found in the Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000575
Barry Warsawc4945492002-09-28 20:40:25 +0000576 Optional failobj is the object to return if there is no Content-Type
Barry Warsaw15aefa92002-09-26 17:19:34 +0000577 header, or the Content-Type header has no such parameter. Optional
Barry Warsawc4945492002-09-28 20:40:25 +0000578 header is the header to search instead of Content-Type.
Barry Warsawbeb59452001-09-26 05:41:51 +0000579
Barry Warsaw15aefa92002-09-26 17:19:34 +0000580 Parameter keys are always compared case insensitively. The return
581 value can either be a string, or a 3-tuple if the parameter was RFC
582 2231 encoded. When it's a 3-tuple, the elements of the value are of
Barry Warsaw62083692003-08-19 03:53:02 +0000583 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
584 LANGUAGE can be None, in which case you should consider VALUE to be
585 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
586
587 Your application should be prepared to deal with 3-tuple return
588 values, and can convert the parameter to a Unicode string like so:
Barry Warsaw15aefa92002-09-26 17:19:34 +0000589
590 param = msg.get_param('foo')
591 if isinstance(param, tuple):
Barry Warsaw62083692003-08-19 03:53:02 +0000592 param = unicode(param[2], param[0] or 'us-ascii')
Barry Warsaw15aefa92002-09-26 17:19:34 +0000593
594 In any case, the parameter value (either the returned string, or the
595 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
Barry Warsawc4945492002-09-28 20:40:25 +0000596 to False.
Barry Warsawba925802001-09-23 03:17:28 +0000597 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000598 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000599 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000600 for k, v in self._get_params_preserve(failobj, header):
601 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000602 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000603 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000604 else:
605 return v
Barry Warsawba925802001-09-23 03:17:28 +0000606 return failobj
607
Barry Warsawc4945492002-09-28 20:40:25 +0000608 def set_param(self, param, value, header='Content-Type', requote=True,
Barry Warsaw3c255352002-09-06 03:55:04 +0000609 charset=None, language=''):
Barry Warsawc4945492002-09-28 20:40:25 +0000610 """Set a parameter in the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000611
612 If the parameter already exists in the header, its value will be
613 replaced with the new value.
614
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000615 If header is Content-Type and has not yet been defined for this
Barry Warsaw409a4c02002-04-10 21:01:31 +0000616 message, it will be set to "text/plain" and the new parameter and
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000617 value will be appended as per RFC 2045.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000618
Barry Warsawc4945492002-09-28 20:40:25 +0000619 An alternate header can specified in the header argument, and all
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000620 parameters will be quoted as necessary unless requote is False.
Barry Warsaw3c255352002-09-06 03:55:04 +0000621
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000622 If charset is specified, the parameter will be encoded according to RFC
623 2231. Optional language specifies the RFC 2231 language, defaulting
624 to the empty string. Both charset and language should be strings.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000625 """
Barry Warsaw5d840532004-05-09 03:44:55 +0000626 if not isinstance(value, tuple) and charset:
Barry Warsaw3c255352002-09-06 03:55:04 +0000627 value = (charset, language, value)
628
Barry Warsaw409a4c02002-04-10 21:01:31 +0000629 if not self.has_key(header) and header.lower() == 'content-type':
630 ctype = 'text/plain'
631 else:
632 ctype = self.get(header)
633 if not self.get_param(param, header=header):
634 if not ctype:
635 ctype = _formatparam(param, value, requote)
636 else:
637 ctype = SEMISPACE.join(
638 [ctype, _formatparam(param, value, requote)])
639 else:
640 ctype = ''
641 for old_param, old_value in self.get_params(header=header,
642 unquote=requote):
643 append_param = ''
644 if old_param.lower() == param.lower():
645 append_param = _formatparam(param, value, requote)
646 else:
647 append_param = _formatparam(old_param, old_value, requote)
648 if not ctype:
649 ctype = append_param
650 else:
651 ctype = SEMISPACE.join([ctype, append_param])
652 if ctype <> self.get(header):
653 del self[header]
654 self[header] = ctype
655
Barry Warsawc4945492002-09-28 20:40:25 +0000656 def del_param(self, param, header='content-type', requote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000657 """Remove the given parameter completely from the Content-Type header.
658
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000659 The header will be re-written in place without the parameter or its
660 value. All values will be quoted as necessary unless requote is
661 False. Optional header specifies an alternative to the Content-Type
662 header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000663 """
664 if not self.has_key(header):
665 return
666 new_ctype = ''
Barry Warsaw06fa0422004-08-16 15:47:34 +0000667 for p, v in self.get_params(header=header, unquote=requote):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000668 if p.lower() <> param.lower():
669 if not new_ctype:
670 new_ctype = _formatparam(p, v, requote)
671 else:
672 new_ctype = SEMISPACE.join([new_ctype,
673 _formatparam(p, v, requote)])
674 if new_ctype <> self.get(header):
675 del self[header]
676 self[header] = new_ctype
677
Barry Warsawc4945492002-09-28 20:40:25 +0000678 def set_type(self, type, header='Content-Type', requote=True):
679 """Set the main type and subtype for the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000680
681 type must be a string in the form "maintype/subtype", otherwise a
682 ValueError is raised.
683
Barry Warsawc4945492002-09-28 20:40:25 +0000684 This method replaces the Content-Type header, keeping all the
685 parameters in place. If requote is False, this leaves the existing
Barry Warsaw409a4c02002-04-10 21:01:31 +0000686 header's quoting as is. Otherwise, the parameters will be quoted (the
687 default).
688
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000689 An alternative header can be specified in the header argument. When
690 the Content-Type header is set, we'll always also add a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000691 header.
692 """
693 # BAW: should we be strict?
694 if not type.count('/') == 1:
695 raise ValueError
Barry Warsawc4945492002-09-28 20:40:25 +0000696 # Set the Content-Type, you get a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000697 if header.lower() == 'content-type':
698 del self['mime-version']
699 self['MIME-Version'] = '1.0'
700 if not self.has_key(header):
701 self[header] = type
702 return
Barry Warsaw06fa0422004-08-16 15:47:34 +0000703 params = self.get_params(header=header, unquote=requote)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000704 del self[header]
705 self[header] = type
706 # Skip the first param; it's the old type.
707 for p, v in params[1:]:
708 self.set_param(p, v, header, requote)
709
Barry Warsawba925802001-09-23 03:17:28 +0000710 def get_filename(self, failobj=None):
711 """Return the filename associated with the payload if present.
712
Barry Warsawc4945492002-09-28 20:40:25 +0000713 The filename is extracted from the Content-Disposition header's
Barry Warsawba925802001-09-23 03:17:28 +0000714 `filename' parameter, and it is unquoted.
715 """
716 missing = []
717 filename = self.get_param('filename', missing, 'content-disposition')
718 if filename is missing:
719 return failobj
Barry Warsaw5d840532004-05-09 03:44:55 +0000720 if isinstance(filename, tuple):
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000721 # It's an RFC 2231 encoded parameter
722 newvalue = _unquotevalue(filename)
Barry Warsaw62083692003-08-19 03:53:02 +0000723 return unicode(newvalue[2], newvalue[0] or 'us-ascii')
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000724 else:
725 newvalue = _unquotevalue(filename.strip())
726 return newvalue
Barry Warsawba925802001-09-23 03:17:28 +0000727
728 def get_boundary(self, failobj=None):
729 """Return the boundary associated with the payload if present.
730
Barry Warsawc4945492002-09-28 20:40:25 +0000731 The boundary is extracted from the Content-Type header's `boundary'
Barry Warsawba925802001-09-23 03:17:28 +0000732 parameter, and it is unquoted.
733 """
734 missing = []
735 boundary = self.get_param('boundary', missing)
736 if boundary is missing:
737 return failobj
Barry Warsaw5d840532004-05-09 03:44:55 +0000738 if isinstance(boundary, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +0000739 # RFC 2231 encoded, so decode. It better end up as ascii
Barry Warsaw62083692003-08-19 03:53:02 +0000740 charset = boundary[0] or 'us-ascii'
741 return unicode(boundary[2], charset).encode('us-ascii')
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000742 return _unquotevalue(boundary.strip())
Barry Warsawba925802001-09-23 03:17:28 +0000743
744 def set_boundary(self, boundary):
Barry Warsawc4945492002-09-28 20:40:25 +0000745 """Set the boundary parameter in Content-Type to 'boundary'.
Barry Warsawba925802001-09-23 03:17:28 +0000746
Barry Warsawc4945492002-09-28 20:40:25 +0000747 This is subtly different than deleting the Content-Type header and
Barry Warsawba925802001-09-23 03:17:28 +0000748 adding a new one with a new boundary parameter via add_header(). The
749 main difference is that using the set_boundary() method preserves the
Barry Warsawc4945492002-09-28 20:40:25 +0000750 order of the Content-Type header in the original message.
Barry Warsawba925802001-09-23 03:17:28 +0000751
Barry Warsawc4945492002-09-28 20:40:25 +0000752 HeaderParseError is raised if the message has no Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000753 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000754 missing = []
755 params = self._get_params_preserve(missing, 'content-type')
756 if params is missing:
Barry Warsawc4945492002-09-28 20:40:25 +0000757 # There was no Content-Type header, and we don't know what type
Barry Warsawba925802001-09-23 03:17:28 +0000758 # to set it to, so raise an exception.
Barry Warsawc4945492002-09-28 20:40:25 +0000759 raise Errors.HeaderParseError, 'No Content-Type header found'
Barry Warsawba925802001-09-23 03:17:28 +0000760 newparams = []
Barry Warsawc4945492002-09-28 20:40:25 +0000761 foundp = False
Barry Warsawbeb59452001-09-26 05:41:51 +0000762 for pk, pv in params:
763 if pk.lower() == 'boundary':
764 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000765 foundp = True
Barry Warsawba925802001-09-23 03:17:28 +0000766 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000767 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000768 if not foundp:
Barry Warsawc4945492002-09-28 20:40:25 +0000769 # The original Content-Type header had no boundary attribute.
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000770 # Tack one on the end. BAW: should we raise an exception
Barry Warsawba925802001-09-23 03:17:28 +0000771 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000772 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000773 # Replace the existing Content-Type header with the new value
Barry Warsawba925802001-09-23 03:17:28 +0000774 newheaders = []
775 for h, v in self._headers:
776 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000777 parts = []
778 for k, v in newparams:
779 if v == '':
780 parts.append(k)
781 else:
782 parts.append('%s=%s' % (k, v))
783 newheaders.append((h, SEMISPACE.join(parts)))
784
Barry Warsawba925802001-09-23 03:17:28 +0000785 else:
786 newheaders.append((h, v))
787 self._headers = newheaders
788
Barry Warsaw15aefa92002-09-26 17:19:34 +0000789 def get_content_charset(self, failobj=None):
790 """Return the charset parameter of the Content-Type header.
791
Barry Warsawee07cb12002-10-10 15:13:26 +0000792 The returned string is always coerced to lower case. If there is no
793 Content-Type header, or if that header has no charset parameter,
794 failobj is returned.
Barry Warsaw15aefa92002-09-26 17:19:34 +0000795 """
796 missing = []
797 charset = self.get_param('charset', missing)
798 if charset is missing:
799 return failobj
Barry Warsaw5d840532004-05-09 03:44:55 +0000800 if isinstance(charset, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +0000801 # RFC 2231 encoded, so decode it, and it better end up as ascii.
Barry Warsaw62083692003-08-19 03:53:02 +0000802 pcharset = charset[0] or 'us-ascii'
803 charset = unicode(charset[2], pcharset).encode('us-ascii')
Barry Warsawee07cb12002-10-10 15:13:26 +0000804 # RFC 2046, $4.1.2 says charsets are not case sensitive
805 return charset.lower()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000806
Barry Warsawba925802001-09-23 03:17:28 +0000807 def get_charsets(self, failobj=None):
808 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000809
Barry Warsawc4945492002-09-28 20:40:25 +0000810 The returned list of items describes the Content-Type headers'
Barry Warsawba925802001-09-23 03:17:28 +0000811 charset parameter for this message and all the subparts in its
812 payload.
813
814 Each item will either be a string (the value of the charset parameter
Barry Warsawc4945492002-09-28 20:40:25 +0000815 in the Content-Type header of that part) or the value of the
Barry Warsawba925802001-09-23 03:17:28 +0000816 'failobj' parameter (defaults to None), if the part does not have a
817 main MIME type of "text", or the charset is not defined.
818
819 The list will contain one string for each part of the message, plus
820 one for the container message (i.e. self), so that a non-multipart
821 message will still return a list of length 1.
822 """
Barry Warsaw15aefa92002-09-26 17:19:34 +0000823 return [part.get_content_charset(failobj) for part in self.walk()]
Barry Warsaw5d840532004-05-09 03:44:55 +0000824
825 # I.e. def walk(self): ...
826 from email.Iterators import walk