blob: b466f396d92a03ae380e62a6371447cf425c59b9 [file] [log] [blame]
Barry Warsaw5d840532004-05-09 03:44:55 +00001# Copyright (C) 2001-2004 Python Software Foundation
Barry Warsawbb113862004-10-03 03:16:19 +00002# Author: Barry Warsaw
3# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00004
Barry Warsaw5d840532004-05-09 03:44:55 +00005"""Basic message object for the email package object model."""
Barry Warsawba925802001-09-23 03:17:28 +00006
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw08898492003-03-11 04:33:30 +00008import uu
Barry Warsaw21191d32003-03-10 16:13:14 +00009import binascii
Barry Warsaw409a4c02002-04-10 21:01:31 +000010import warnings
Barry Warsawba925802001-09-23 03:17:28 +000011from cStringIO import StringIO
Barry Warsawba925802001-09-23 03:17:28 +000012
Barry Warsawba925802001-09-23 03:17:28 +000013# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000014from email import Utils
Barry Warsaw21191d32003-03-10 16:13:14 +000015from email import Errors
Barry Warsaw8ba76e82002-06-02 19:05:51 +000016from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000017
Barry Warsawbeb59452001-09-26 05:41:51 +000018SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000019
20# Regular expression used to split header parameters. BAW: this may be too
21# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
22# most headers found in the wild. We may eventually need a full fledged
23# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000024paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000025# Regular expression that matches `special' characters in parameters, the
26# existance of which force quoting of the parameter value.
27tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
29
30
Barry Warsaw908dc4b2002-06-29 05:56:15 +000031# Helper functions
Barry Warsawc4945492002-09-28 20:40:25 +000032def _formatparam(param, value=None, quote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +000033 """Convenience function to format and return a key=value pair.
34
Barry Warsaw908dc4b2002-06-29 05:56:15 +000035 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000036 """
37 if value is not None and len(value) > 0:
Barry Warsaw5d840532004-05-09 03:44:55 +000038 # A tuple is used for RFC 2231 encoded parameter values where items
Barry Warsaw908dc4b2002-06-29 05:56:15 +000039 # are (charset, language, value). charset is a string, not a Charset
40 # instance.
Barry Warsaw5d840532004-05-09 03:44:55 +000041 if isinstance(value, tuple):
Barry Warsaw3c255352002-09-06 03:55:04 +000042 # Encode as per RFC 2231
43 param += '*'
44 value = Utils.encode_rfc2231(value[2], value[0], value[1])
Barry Warsaw409a4c02002-04-10 21:01:31 +000045 # BAW: Please check this. I think that if quote is set it should
46 # force quoting even if not necessary.
47 if quote or tspecials.search(value):
48 return '%s="%s"' % (param, Utils.quote(value))
49 else:
50 return '%s=%s' % (param, value)
51 else:
52 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000053
Barry Warsawa74e8682003-09-03 04:08:13 +000054def _parseparam(s):
55 plist = []
56 while s[:1] == ';':
57 s = s[1:]
58 end = s.find(';')
59 while end > 0 and s.count('"', 0, end) % 2:
60 end = s.find(';', end + 1)
61 if end < 0:
62 end = len(s)
63 f = s[:end]
64 if '=' in f:
65 i = f.index('=')
66 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
67 plist.append(f.strip())
68 s = s[end:]
69 return plist
70
Barry Warsawba925802001-09-23 03:17:28 +000071
Barry Warsaw908dc4b2002-06-29 05:56:15 +000072def _unquotevalue(value):
Barry Warsawbb113862004-10-03 03:16:19 +000073 # This is different than Utils.collapse_rfc2231_value() because it doesn't
74 # try to convert the value to a unicode. Message.get_param() and
75 # Message.get_params() are both currently defined to return the tuple in
76 # the face of RFC 2231 parameters.
Barry Warsaw5d840532004-05-09 03:44:55 +000077 if isinstance(value, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +000078 return value[0], value[1], Utils.unquote(value[2])
Barry Warsaw908dc4b2002-06-29 05:56:15 +000079 else:
Tim Peters280488b2002-08-23 18:19:30 +000080 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000081
82
Barry Warsaw48b0d362002-08-27 22:34:44 +000083
Barry Warsawba925802001-09-23 03:17:28 +000084class Message:
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000085 """Basic message object.
Barry Warsawba925802001-09-23 03:17:28 +000086
87 A message object is defined as something that has a bunch of RFC 2822
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000088 headers and a payload. It may optionally have an envelope header
89 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
90 multipart or a message/rfc822), then the payload is a list of Message
91 objects, otherwise it is a string.
Barry Warsawba925802001-09-23 03:17:28 +000092
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000093 Message objects implement part of the `mapping' interface, which assumes
Barry Warsawba925802001-09-23 03:17:28 +000094 there is exactly one occurrance of the header per message. Some headers
Barry Warsawc4945492002-09-28 20:40:25 +000095 do in fact appear multiple times (e.g. Received) and for those headers,
Barry Warsawba925802001-09-23 03:17:28 +000096 you must use the explicit API to set or get all the headers. Not all of
97 the mapping methods are implemented.
Barry Warsawba925802001-09-23 03:17:28 +000098 """
99 def __init__(self):
100 self._headers = []
101 self._unixfrom = None
102 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +0000103 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +0000104 # Defaults for multipart messages
105 self.preamble = self.epilogue = None
Barry Warsawbb113862004-10-03 03:16:19 +0000106 self.defects = []
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000107 # Default content type
108 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +0000109
110 def __str__(self):
111 """Return the entire formatted message as a string.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000112 This includes the headers, body, and envelope header.
Barry Warsawba925802001-09-23 03:17:28 +0000113 """
Barry Warsawc4945492002-09-28 20:40:25 +0000114 return self.as_string(unixfrom=True)
Barry Warsawba925802001-09-23 03:17:28 +0000115
Barry Warsawc4945492002-09-28 20:40:25 +0000116 def as_string(self, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +0000117 """Return the entire formatted message as a string.
Barry Warsawc4945492002-09-28 20:40:25 +0000118 Optional `unixfrom' when True, means include the Unix From_ envelope
Barry Warsawba925802001-09-23 03:17:28 +0000119 header.
Barry Warsaw482c5f72003-04-18 23:04:35 +0000120
121 This is a convenience method and may not generate the message exactly
Barry Warsaw05bef932004-10-03 03:38:07 +0000122 as you intend because by default it mangles lines that begin with
123 "From ". For more flexibility, use the flatten() method of a
Barry Warsaw482c5f72003-04-18 23:04:35 +0000124 Generator instance.
Barry Warsawba925802001-09-23 03:17:28 +0000125 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000126 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +0000127 fp = StringIO()
128 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000129 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000130 return fp.getvalue()
131
132 def is_multipart(self):
Barry Warsawc4945492002-09-28 20:40:25 +0000133 """Return True if the message consists of multiple parts."""
Barry Warsawbb113862004-10-03 03:16:19 +0000134 return isinstance(self._payload, list)
Barry Warsawba925802001-09-23 03:17:28 +0000135
136 #
137 # Unix From_ line
138 #
139 def set_unixfrom(self, unixfrom):
140 self._unixfrom = unixfrom
141
142 def get_unixfrom(self):
143 return self._unixfrom
144
145 #
146 # Payload manipulation.
147 #
Barry Warsaw409a4c02002-04-10 21:01:31 +0000148 def attach(self, payload):
149 """Add the given payload to the current payload.
150
151 The current payload will always be a list of objects after this method
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000152 is called. If you want to set the payload to a scalar object, use
Barry Warsaw409a4c02002-04-10 21:01:31 +0000153 set_payload() instead.
154 """
155 if self._payload is None:
156 self._payload = [payload]
157 else:
158 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000159
Barry Warsawc4945492002-09-28 20:40:25 +0000160 def get_payload(self, i=None, decode=False):
Barry Warsawfbcde752002-09-11 14:11:35 +0000161 """Return a reference to the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000162
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000163 The payload will either be a list object or a string. If you mutate
164 the list object, you modify the message's payload in place. Optional
165 i returns that index into the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000166
Barry Warsaw08898492003-03-11 04:33:30 +0000167 Optional decode is a flag indicating whether the payload should be
168 decoded or not, according to the Content-Transfer-Encoding header
169 (default is False).
170
171 When True and the message is not a multipart, the payload will be
172 decoded if this header's value is `quoted-printable' or `base64'. If
173 some other encoding is used, or the header is missing, or if the
174 payload has bogus data (i.e. bogus base64 or uuencoded data), the
175 payload is returned as-is.
Barry Warsaw21191d32003-03-10 16:13:14 +0000176
177 If the message is a multipart and the decode flag is True, then None
178 is returned.
Barry Warsawba925802001-09-23 03:17:28 +0000179 """
180 if i is None:
181 payload = self._payload
Barry Warsaw5d840532004-05-09 03:44:55 +0000182 elif not isinstance(self._payload, list):
Barry Warsawbb113862004-10-03 03:16:19 +0000183 raise TypeError('Expected list, got %s' % type(self._payload))
Barry Warsawba925802001-09-23 03:17:28 +0000184 else:
185 payload = self._payload[i]
186 if decode:
187 if self.is_multipart():
188 return None
Barry Warsaw08898492003-03-11 04:33:30 +0000189 cte = self.get('content-transfer-encoding', '').lower()
190 if cte == 'quoted-printable':
Barry Warsawba925802001-09-23 03:17:28 +0000191 return Utils._qdecode(payload)
Barry Warsaw08898492003-03-11 04:33:30 +0000192 elif cte == 'base64':
Barry Warsaw21191d32003-03-10 16:13:14 +0000193 try:
194 return Utils._bdecode(payload)
195 except binascii.Error:
196 # Incorrect padding
197 return payload
Barry Warsaw08898492003-03-11 04:33:30 +0000198 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
199 sfp = StringIO()
200 try:
201 uu.decode(StringIO(payload+'\n'), sfp)
202 payload = sfp.getvalue()
203 except uu.Error:
204 # Some decoding problem
205 return payload
Barry Warsawba925802001-09-23 03:17:28 +0000206 # Everything else, including encodings with 8bit or 7bit are returned
207 # unchanged.
208 return payload
209
Barry Warsaw409a4c02002-04-10 21:01:31 +0000210 def set_payload(self, payload, charset=None):
211 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000212
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000213 Optional charset sets the message's default character set. See
214 set_charset() for details.
215 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000216 self._payload = payload
217 if charset is not None:
218 self.set_charset(charset)
219
220 def set_charset(self, charset):
221 """Set the charset of the payload to a given character set.
222
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000223 charset can be a Charset instance, a string naming a character set, or
224 None. If it is a string it will be converted to a Charset instance.
225 If charset is None, the charset parameter will be removed from the
226 Content-Type field. Anything else will generate a TypeError.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000227
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000228 The message will be assumed to be of type text/* encoded with
Barry Warsaw409a4c02002-04-10 21:01:31 +0000229 charset.input_charset. It will be converted to charset.output_charset
230 and encoded properly, if needed, when generating the plain text
231 representation of the message. MIME headers (MIME-Version,
232 Content-Type, Content-Transfer-Encoding) will be added as needed.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000233
Barry Warsaw409a4c02002-04-10 21:01:31 +0000234 """
235 if charset is None:
236 self.del_param('charset')
237 self._charset = None
238 return
Barry Warsaw5d840532004-05-09 03:44:55 +0000239 if isinstance(charset, str):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000240 charset = Charset.Charset(charset)
241 if not isinstance(charset, Charset.Charset):
Barry Warsawbb113862004-10-03 03:16:19 +0000242 raise TypeError(charset)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000243 # BAW: should we accept strings that can serve as arguments to the
244 # Charset constructor?
245 self._charset = charset
246 if not self.has_key('MIME-Version'):
247 self.add_header('MIME-Version', '1.0')
248 if not self.has_key('Content-Type'):
249 self.add_header('Content-Type', 'text/plain',
250 charset=charset.get_output_charset())
251 else:
252 self.set_param('charset', charset.get_output_charset())
253 if not self.has_key('Content-Transfer-Encoding'):
254 cte = charset.get_body_encoding()
Barry Warsawbb113862004-10-03 03:16:19 +0000255 try:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000256 cte(self)
Barry Warsawbb113862004-10-03 03:16:19 +0000257 except TypeError:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000258 self.add_header('Content-Transfer-Encoding', cte)
259
260 def get_charset(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000261 """Return the Charset instance associated with the message's payload.
262 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000263 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000264
Barry Warsawba925802001-09-23 03:17:28 +0000265 #
266 # MAPPING INTERFACE (partial)
267 #
268 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000269 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000270 return len(self._headers)
271
272 def __getitem__(self, name):
273 """Get a header value.
274
275 Return None if the header is missing instead of raising an exception.
276
277 Note that if the header appeared multiple times, exactly which
Barry Warsawbb113862004-10-03 03:16:19 +0000278 occurrance gets returned is undefined. Use get_all() to get all
Barry Warsawba925802001-09-23 03:17:28 +0000279 the values matching a header field name.
280 """
281 return self.get(name)
282
283 def __setitem__(self, name, val):
284 """Set the value of a header.
285
286 Note: this does not overwrite an existing header with the same field
287 name. Use __delitem__() first to delete any existing headers.
288 """
289 self._headers.append((name, val))
290
291 def __delitem__(self, name):
292 """Delete all occurrences of a header, if present.
293
294 Does not raise an exception if the header is missing.
295 """
296 name = name.lower()
297 newheaders = []
298 for k, v in self._headers:
299 if k.lower() <> name:
300 newheaders.append((k, v))
301 self._headers = newheaders
302
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000303 def __contains__(self, name):
304 return name.lower() in [k.lower() for k, v in self._headers]
Barry Warsawba925802001-09-23 03:17:28 +0000305
306 def has_key(self, name):
307 """Return true if the message contains the header."""
Barry Warsawbb113862004-10-03 03:16:19 +0000308 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000309 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000310
311 def keys(self):
312 """Return a list of all the message's header field names.
313
314 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000315 message, or were added to the message, and may contain duplicates.
316 Any fields deleted and re-inserted are always appended to the header
317 list.
Barry Warsawba925802001-09-23 03:17:28 +0000318 """
319 return [k for k, v in self._headers]
320
321 def values(self):
322 """Return a list of all the message's header values.
323
324 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000325 message, or were added to the message, and may contain duplicates.
326 Any fields deleted and re-inserted are always appended to the header
327 list.
Barry Warsawba925802001-09-23 03:17:28 +0000328 """
329 return [v for k, v in self._headers]
330
331 def items(self):
332 """Get all the message's header fields and values.
333
334 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000335 message, or were added to the message, and may contain duplicates.
336 Any fields deleted and re-inserted are always appended to the header
337 list.
Barry Warsawba925802001-09-23 03:17:28 +0000338 """
339 return self._headers[:]
340
341 def get(self, name, failobj=None):
342 """Get a header value.
343
344 Like __getitem__() but return failobj instead of None when the field
345 is missing.
346 """
347 name = name.lower()
348 for k, v in self._headers:
349 if k.lower() == name:
350 return v
351 return failobj
352
353 #
354 # Additional useful stuff
355 #
356
357 def get_all(self, name, failobj=None):
358 """Return a list of all the values for the named field.
359
360 These will be sorted in the order they appeared in the original
361 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000362 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000363
364 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000365 """
366 values = []
367 name = name.lower()
368 for k, v in self._headers:
369 if k.lower() == name:
370 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000371 if not values:
372 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000373 return values
374
375 def add_header(self, _name, _value, **_params):
376 """Extended header setting.
377
378 name is the header field to add. keyword arguments can be used to set
379 additional parameters for the header field, with underscores converted
380 to dashes. Normally the parameter will be added as key="value" unless
381 value is None, in which case only the key will be added.
382
383 Example:
384
385 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000386 """
387 parts = []
388 for k, v in _params.items():
389 if v is None:
390 parts.append(k.replace('_', '-'))
391 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000392 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000393 if _value is not None:
394 parts.insert(0, _value)
395 self._headers.append((_name, SEMISPACE.join(parts)))
396
Barry Warsaw229727f2002-09-06 03:38:12 +0000397 def replace_header(self, _name, _value):
398 """Replace a header.
399
400 Replace the first matching header found in the message, retaining
401 header order and case. If no matching header was found, a KeyError is
402 raised.
403 """
404 _name = _name.lower()
405 for i, (k, v) in zip(range(len(self._headers)), self._headers):
406 if k.lower() == _name:
407 self._headers[i] = (k, _value)
408 break
409 else:
Barry Warsawbb113862004-10-03 03:16:19 +0000410 raise KeyError(_name)
Barry Warsaw229727f2002-09-06 03:38:12 +0000411
Barry Warsawc1068642002-07-19 22:24:55 +0000412 #
Barry Warsawbb113862004-10-03 03:16:19 +0000413 # Deprecated methods. These will be removed in email 3.1.
Barry Warsawc1068642002-07-19 22:24:55 +0000414 #
415
Barry Warsawba925802001-09-23 03:17:28 +0000416 def get_type(self, failobj=None):
417 """Returns the message's content type.
418
419 The returned string is coerced to lowercase and returned as a single
Barry Warsawc4945492002-09-28 20:40:25 +0000420 string of the form `maintype/subtype'. If there was no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000421 header in the message, failobj is returned (defaults to None).
422 """
Barry Warsawbb113862004-10-03 03:16:19 +0000423 warnings.warn('get_type() deprecated; use get_content_type()',
424 DeprecationWarning, 2)
425 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000426 value = self.get('content-type', missing)
427 if value is missing:
428 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000429 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000430
431 def get_main_type(self, failobj=None):
432 """Return the message's main content type if present."""
Barry Warsawbb113862004-10-03 03:16:19 +0000433 warnings.warn('get_main_type() deprecated; use get_content_maintype()',
434 DeprecationWarning, 2)
435 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000436 ctype = self.get_type(missing)
437 if ctype is missing:
438 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000439 if ctype.count('/') <> 1:
440 return failobj
441 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000442
443 def get_subtype(self, failobj=None):
444 """Return the message's content subtype if present."""
Barry Warsawbb113862004-10-03 03:16:19 +0000445 warnings.warn('get_subtype() deprecated; use get_content_subtype()',
446 DeprecationWarning, 2)
447 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000448 ctype = self.get_type(missing)
449 if ctype is missing:
450 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000451 if ctype.count('/') <> 1:
452 return failobj
453 return ctype.split('/')[1]
454
455 #
456 # Use these three methods instead of the three above.
457 #
458
459 def get_content_type(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000460 """Return the message's content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000461
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000462 The returned string is coerced to lower case of the form
463 `maintype/subtype'. If there was no Content-Type header in the
464 message, the default type as given by get_default_type() will be
465 returned. Since according to RFC 2045, messages always have a default
466 type this will always return a value.
Barry Warsawc1068642002-07-19 22:24:55 +0000467
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000468 RFC 2045 defines a message's default type to be text/plain unless it
469 appears inside a multipart/digest container, in which case it would be
470 message/rfc822.
Barry Warsawc1068642002-07-19 22:24:55 +0000471 """
Barry Warsawbb113862004-10-03 03:16:19 +0000472 missing = object()
Barry Warsawc1068642002-07-19 22:24:55 +0000473 value = self.get('content-type', missing)
474 if value is missing:
475 # This should have no parameters
476 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000477 ctype = paramre.split(value)[0].lower().strip()
478 # RFC 2045, section 5.2 says if its invalid, use text/plain
479 if ctype.count('/') <> 1:
480 return 'text/plain'
481 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000482
483 def get_content_maintype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000484 """Return the message's main content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000485
486 This is the `maintype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000487 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000488 """
489 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000490 return ctype.split('/')[0]
491
492 def get_content_subtype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000493 """Returns the message's sub-content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000494
495 This is the `subtype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000496 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000497 """
498 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000499 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000500
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000501 def get_default_type(self):
502 """Return the `default' content type.
503
504 Most messages have a default content type of text/plain, except for
505 messages that are subparts of multipart/digest containers. Such
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000506 subparts have a default content type of message/rfc822.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000507 """
508 return self._default_type
509
510 def set_default_type(self, ctype):
511 """Set the `default' content type.
512
Barry Warsawc1068642002-07-19 22:24:55 +0000513 ctype should be either "text/plain" or "message/rfc822", although this
514 is not enforced. The default content type is not stored in the
Barry Warsawc4945492002-09-28 20:40:25 +0000515 Content-Type header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000516 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000517 self._default_type = ctype
518
Barry Warsawbeb59452001-09-26 05:41:51 +0000519 def _get_params_preserve(self, failobj, header):
520 # Like get_params() but preserves the quoting of values. BAW:
521 # should this be part of the public interface?
Barry Warsawbb113862004-10-03 03:16:19 +0000522 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000523 value = self.get(header, missing)
524 if value is missing:
525 return failobj
526 params = []
Barry Warsawa74e8682003-09-03 04:08:13 +0000527 for p in _parseparam(';' + value):
Barry Warsawbeb59452001-09-26 05:41:51 +0000528 try:
529 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000530 name = name.strip()
531 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000532 except ValueError:
533 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000534 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000535 val = ''
536 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000537 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000538 return params
539
Barry Warsawc4945492002-09-28 20:40:25 +0000540 def get_params(self, failobj=None, header='content-type', unquote=True):
541 """Return the message's Content-Type parameters, as a list.
Barry Warsawba925802001-09-23 03:17:28 +0000542
Barry Warsawbeb59452001-09-26 05:41:51 +0000543 The elements of the returned list are 2-tuples of key/value pairs, as
544 split on the `=' sign. The left hand side of the `=' is the key,
545 while the right hand side is the value. If there is no `=' sign in
Barry Warsaw15aefa92002-09-26 17:19:34 +0000546 the parameter the value is the empty string. The value is as
547 described in the get_param() method.
Barry Warsawbeb59452001-09-26 05:41:51 +0000548
Barry Warsawc4945492002-09-28 20:40:25 +0000549 Optional failobj is the object to return if there is no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000550 header. Optional header is the header to search instead of
Barry Warsawc4945492002-09-28 20:40:25 +0000551 Content-Type. If unquote is True, the value is unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000552 """
Barry Warsawbb113862004-10-03 03:16:19 +0000553 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000554 params = self._get_params_preserve(missing, header)
555 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000556 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000557 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000558 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000559 else:
560 return params
Barry Warsawba925802001-09-23 03:17:28 +0000561
Barry Warsawc4945492002-09-28 20:40:25 +0000562 def get_param(self, param, failobj=None, header='content-type',
563 unquote=True):
564 """Return the parameter value if found in the Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000565
Barry Warsawc4945492002-09-28 20:40:25 +0000566 Optional failobj is the object to return if there is no Content-Type
Barry Warsaw15aefa92002-09-26 17:19:34 +0000567 header, or the Content-Type header has no such parameter. Optional
Barry Warsawc4945492002-09-28 20:40:25 +0000568 header is the header to search instead of Content-Type.
Barry Warsawbeb59452001-09-26 05:41:51 +0000569
Barry Warsaw15aefa92002-09-26 17:19:34 +0000570 Parameter keys are always compared case insensitively. The return
571 value can either be a string, or a 3-tuple if the parameter was RFC
572 2231 encoded. When it's a 3-tuple, the elements of the value are of
Barry Warsaw62083692003-08-19 03:53:02 +0000573 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
574 LANGUAGE can be None, in which case you should consider VALUE to be
575 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
576
577 Your application should be prepared to deal with 3-tuple return
578 values, and can convert the parameter to a Unicode string like so:
Barry Warsaw15aefa92002-09-26 17:19:34 +0000579
580 param = msg.get_param('foo')
581 if isinstance(param, tuple):
Barry Warsaw62083692003-08-19 03:53:02 +0000582 param = unicode(param[2], param[0] or 'us-ascii')
Barry Warsaw15aefa92002-09-26 17:19:34 +0000583
584 In any case, the parameter value (either the returned string, or the
585 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
Barry Warsawc4945492002-09-28 20:40:25 +0000586 to False.
Barry Warsawba925802001-09-23 03:17:28 +0000587 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000588 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000589 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000590 for k, v in self._get_params_preserve(failobj, header):
591 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000592 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000593 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000594 else:
595 return v
Barry Warsawba925802001-09-23 03:17:28 +0000596 return failobj
597
Barry Warsawc4945492002-09-28 20:40:25 +0000598 def set_param(self, param, value, header='Content-Type', requote=True,
Barry Warsaw3c255352002-09-06 03:55:04 +0000599 charset=None, language=''):
Barry Warsawc4945492002-09-28 20:40:25 +0000600 """Set a parameter in the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000601
602 If the parameter already exists in the header, its value will be
603 replaced with the new value.
604
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000605 If header is Content-Type and has not yet been defined for this
Barry Warsaw409a4c02002-04-10 21:01:31 +0000606 message, it will be set to "text/plain" and the new parameter and
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000607 value will be appended as per RFC 2045.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000608
Barry Warsawc4945492002-09-28 20:40:25 +0000609 An alternate header can specified in the header argument, and all
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000610 parameters will be quoted as necessary unless requote is False.
Barry Warsaw3c255352002-09-06 03:55:04 +0000611
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000612 If charset is specified, the parameter will be encoded according to RFC
613 2231. Optional language specifies the RFC 2231 language, defaulting
614 to the empty string. Both charset and language should be strings.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000615 """
Barry Warsaw5d840532004-05-09 03:44:55 +0000616 if not isinstance(value, tuple) and charset:
Barry Warsaw3c255352002-09-06 03:55:04 +0000617 value = (charset, language, value)
618
Barry Warsaw409a4c02002-04-10 21:01:31 +0000619 if not self.has_key(header) and header.lower() == 'content-type':
620 ctype = 'text/plain'
621 else:
622 ctype = self.get(header)
623 if not self.get_param(param, header=header):
624 if not ctype:
625 ctype = _formatparam(param, value, requote)
626 else:
627 ctype = SEMISPACE.join(
628 [ctype, _formatparam(param, value, requote)])
629 else:
630 ctype = ''
631 for old_param, old_value in self.get_params(header=header,
632 unquote=requote):
633 append_param = ''
634 if old_param.lower() == param.lower():
635 append_param = _formatparam(param, value, requote)
636 else:
637 append_param = _formatparam(old_param, old_value, requote)
638 if not ctype:
639 ctype = append_param
640 else:
641 ctype = SEMISPACE.join([ctype, append_param])
642 if ctype <> self.get(header):
643 del self[header]
644 self[header] = ctype
645
Barry Warsawc4945492002-09-28 20:40:25 +0000646 def del_param(self, param, header='content-type', requote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000647 """Remove the given parameter completely from the Content-Type header.
648
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000649 The header will be re-written in place without the parameter or its
650 value. All values will be quoted as necessary unless requote is
651 False. Optional header specifies an alternative to the Content-Type
652 header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000653 """
654 if not self.has_key(header):
655 return
656 new_ctype = ''
Barry Warsaw06fa0422004-08-16 15:47:34 +0000657 for p, v in self.get_params(header=header, unquote=requote):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000658 if p.lower() <> param.lower():
659 if not new_ctype:
660 new_ctype = _formatparam(p, v, requote)
661 else:
662 new_ctype = SEMISPACE.join([new_ctype,
663 _formatparam(p, v, requote)])
664 if new_ctype <> self.get(header):
665 del self[header]
666 self[header] = new_ctype
667
Barry Warsawc4945492002-09-28 20:40:25 +0000668 def set_type(self, type, header='Content-Type', requote=True):
669 """Set the main type and subtype for the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000670
671 type must be a string in the form "maintype/subtype", otherwise a
672 ValueError is raised.
673
Barry Warsawc4945492002-09-28 20:40:25 +0000674 This method replaces the Content-Type header, keeping all the
675 parameters in place. If requote is False, this leaves the existing
Barry Warsaw409a4c02002-04-10 21:01:31 +0000676 header's quoting as is. Otherwise, the parameters will be quoted (the
677 default).
678
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000679 An alternative header can be specified in the header argument. When
680 the Content-Type header is set, we'll always also add a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000681 header.
682 """
683 # BAW: should we be strict?
684 if not type.count('/') == 1:
685 raise ValueError
Barry Warsawc4945492002-09-28 20:40:25 +0000686 # Set the Content-Type, you get a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000687 if header.lower() == 'content-type':
688 del self['mime-version']
689 self['MIME-Version'] = '1.0'
690 if not self.has_key(header):
691 self[header] = type
692 return
Barry Warsaw06fa0422004-08-16 15:47:34 +0000693 params = self.get_params(header=header, unquote=requote)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000694 del self[header]
695 self[header] = type
696 # Skip the first param; it's the old type.
697 for p, v in params[1:]:
698 self.set_param(p, v, header, requote)
699
Barry Warsawba925802001-09-23 03:17:28 +0000700 def get_filename(self, failobj=None):
701 """Return the filename associated with the payload if present.
702
Barry Warsawc4945492002-09-28 20:40:25 +0000703 The filename is extracted from the Content-Disposition header's
Barry Warsawba925802001-09-23 03:17:28 +0000704 `filename' parameter, and it is unquoted.
705 """
Barry Warsawbb113862004-10-03 03:16:19 +0000706 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000707 filename = self.get_param('filename', missing, 'content-disposition')
708 if filename is missing:
709 return failobj
Barry Warsawbb113862004-10-03 03:16:19 +0000710 return Utils.collapse_rfc2231_value(filename).strip()
Barry Warsawba925802001-09-23 03:17:28 +0000711
712 def get_boundary(self, failobj=None):
713 """Return the boundary associated with the payload if present.
714
Barry Warsawc4945492002-09-28 20:40:25 +0000715 The boundary is extracted from the Content-Type header's `boundary'
Barry Warsawba925802001-09-23 03:17:28 +0000716 parameter, and it is unquoted.
717 """
Barry Warsawbb113862004-10-03 03:16:19 +0000718 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000719 boundary = self.get_param('boundary', missing)
720 if boundary is missing:
721 return failobj
Barry Warsaw93d9d5f2004-11-06 00:04:52 +0000722 # RFC 2046 says that boundaries may begin but not end in w/s
723 return Utils.collapse_rfc2231_value(boundary).rstrip()
Barry Warsawba925802001-09-23 03:17:28 +0000724
725 def set_boundary(self, boundary):
Barry Warsawc4945492002-09-28 20:40:25 +0000726 """Set the boundary parameter in Content-Type to 'boundary'.
Barry Warsawba925802001-09-23 03:17:28 +0000727
Barry Warsawc4945492002-09-28 20:40:25 +0000728 This is subtly different than deleting the Content-Type header and
Barry Warsawba925802001-09-23 03:17:28 +0000729 adding a new one with a new boundary parameter via add_header(). The
730 main difference is that using the set_boundary() method preserves the
Barry Warsawc4945492002-09-28 20:40:25 +0000731 order of the Content-Type header in the original message.
Barry Warsawba925802001-09-23 03:17:28 +0000732
Barry Warsawc4945492002-09-28 20:40:25 +0000733 HeaderParseError is raised if the message has no Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000734 """
Barry Warsawbb113862004-10-03 03:16:19 +0000735 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000736 params = self._get_params_preserve(missing, 'content-type')
737 if params is missing:
Barry Warsawc4945492002-09-28 20:40:25 +0000738 # There was no Content-Type header, and we don't know what type
Barry Warsawba925802001-09-23 03:17:28 +0000739 # to set it to, so raise an exception.
Barry Warsawc4945492002-09-28 20:40:25 +0000740 raise Errors.HeaderParseError, 'No Content-Type header found'
Barry Warsawba925802001-09-23 03:17:28 +0000741 newparams = []
Barry Warsawc4945492002-09-28 20:40:25 +0000742 foundp = False
Barry Warsawbeb59452001-09-26 05:41:51 +0000743 for pk, pv in params:
744 if pk.lower() == 'boundary':
745 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000746 foundp = True
Barry Warsawba925802001-09-23 03:17:28 +0000747 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000748 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000749 if not foundp:
Barry Warsawc4945492002-09-28 20:40:25 +0000750 # The original Content-Type header had no boundary attribute.
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000751 # Tack one on the end. BAW: should we raise an exception
Barry Warsawba925802001-09-23 03:17:28 +0000752 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000753 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000754 # Replace the existing Content-Type header with the new value
Barry Warsawba925802001-09-23 03:17:28 +0000755 newheaders = []
756 for h, v in self._headers:
757 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000758 parts = []
759 for k, v in newparams:
760 if v == '':
761 parts.append(k)
762 else:
763 parts.append('%s=%s' % (k, v))
764 newheaders.append((h, SEMISPACE.join(parts)))
765
Barry Warsawba925802001-09-23 03:17:28 +0000766 else:
767 newheaders.append((h, v))
768 self._headers = newheaders
769
Barry Warsaw15aefa92002-09-26 17:19:34 +0000770 def get_content_charset(self, failobj=None):
771 """Return the charset parameter of the Content-Type header.
772
Barry Warsawee07cb12002-10-10 15:13:26 +0000773 The returned string is always coerced to lower case. If there is no
774 Content-Type header, or if that header has no charset parameter,
775 failobj is returned.
Barry Warsaw15aefa92002-09-26 17:19:34 +0000776 """
Barry Warsawbb113862004-10-03 03:16:19 +0000777 missing = object()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000778 charset = self.get_param('charset', missing)
779 if charset is missing:
780 return failobj
Barry Warsaw5d840532004-05-09 03:44:55 +0000781 if isinstance(charset, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +0000782 # RFC 2231 encoded, so decode it, and it better end up as ascii.
Barry Warsaw62083692003-08-19 03:53:02 +0000783 pcharset = charset[0] or 'us-ascii'
784 charset = unicode(charset[2], pcharset).encode('us-ascii')
Barry Warsawee07cb12002-10-10 15:13:26 +0000785 # RFC 2046, $4.1.2 says charsets are not case sensitive
786 return charset.lower()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000787
Barry Warsawba925802001-09-23 03:17:28 +0000788 def get_charsets(self, failobj=None):
789 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000790
Barry Warsawc4945492002-09-28 20:40:25 +0000791 The returned list of items describes the Content-Type headers'
Barry Warsawba925802001-09-23 03:17:28 +0000792 charset parameter for this message and all the subparts in its
793 payload.
794
795 Each item will either be a string (the value of the charset parameter
Barry Warsawc4945492002-09-28 20:40:25 +0000796 in the Content-Type header of that part) or the value of the
Barry Warsawba925802001-09-23 03:17:28 +0000797 'failobj' parameter (defaults to None), if the part does not have a
798 main MIME type of "text", or the charset is not defined.
799
800 The list will contain one string for each part of the message, plus
801 one for the container message (i.e. self), so that a non-multipart
802 message will still return a list of length 1.
803 """
Barry Warsaw15aefa92002-09-26 17:19:34 +0000804 return [part.get_content_charset(failobj) for part in self.walk()]
Barry Warsaw5d840532004-05-09 03:44:55 +0000805
806 # I.e. def walk(self): ...
807 from email.Iterators import walk