blob: d23a26f8c87a128a5b1ef888ea4615564e42073f [file] [log] [blame]
Barry Warsaw5d840532004-05-09 03:44:55 +00001# Copyright (C) 2001-2004 Python Software Foundation
Barry Warsawbb113862004-10-03 03:16:19 +00002# Author: Barry Warsaw
3# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00004
Barry Warsaw5d840532004-05-09 03:44:55 +00005"""Basic message object for the email package object model."""
Barry Warsawba925802001-09-23 03:17:28 +00006
Barry Warsawba925802001-09-23 03:17:28 +00007import re
Barry Warsaw08898492003-03-11 04:33:30 +00008import uu
Barry Warsaw21191d32003-03-10 16:13:14 +00009import binascii
Barry Warsaw409a4c02002-04-10 21:01:31 +000010import warnings
Barry Warsawba925802001-09-23 03:17:28 +000011from cStringIO import StringIO
Barry Warsawba925802001-09-23 03:17:28 +000012
Barry Warsawba925802001-09-23 03:17:28 +000013# Intrapackage imports
Barry Warsaw8ba76e82002-06-02 19:05:51 +000014from email import Utils
Barry Warsaw21191d32003-03-10 16:13:14 +000015from email import Errors
Barry Warsaw8ba76e82002-06-02 19:05:51 +000016from email import Charset
Barry Warsawba925802001-09-23 03:17:28 +000017
Barry Warsawbeb59452001-09-26 05:41:51 +000018SEMISPACE = '; '
Barry Warsaw409a4c02002-04-10 21:01:31 +000019
20# Regular expression used to split header parameters. BAW: this may be too
21# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
22# most headers found in the wild. We may eventually need a full fledged
23# parser eventually.
Barry Warsaw2539cf52001-10-25 22:43:46 +000024paramre = re.compile(r'\s*;\s*')
Barry Warsaw409a4c02002-04-10 21:01:31 +000025# Regular expression that matches `special' characters in parameters, the
26# existance of which force quoting of the parameter value.
27tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
28
29
30
Barry Warsaw908dc4b2002-06-29 05:56:15 +000031# Helper functions
Barry Warsawc4945492002-09-28 20:40:25 +000032def _formatparam(param, value=None, quote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +000033 """Convenience function to format and return a key=value pair.
34
Barry Warsaw908dc4b2002-06-29 05:56:15 +000035 This will quote the value if needed or if quote is true.
Barry Warsaw409a4c02002-04-10 21:01:31 +000036 """
37 if value is not None and len(value) > 0:
Barry Warsaw5d840532004-05-09 03:44:55 +000038 # A tuple is used for RFC 2231 encoded parameter values where items
Barry Warsaw908dc4b2002-06-29 05:56:15 +000039 # are (charset, language, value). charset is a string, not a Charset
40 # instance.
Barry Warsaw5d840532004-05-09 03:44:55 +000041 if isinstance(value, tuple):
Barry Warsaw3c255352002-09-06 03:55:04 +000042 # Encode as per RFC 2231
43 param += '*'
44 value = Utils.encode_rfc2231(value[2], value[0], value[1])
Barry Warsaw409a4c02002-04-10 21:01:31 +000045 # BAW: Please check this. I think that if quote is set it should
46 # force quoting even if not necessary.
47 if quote or tspecials.search(value):
48 return '%s="%s"' % (param, Utils.quote(value))
49 else:
50 return '%s=%s' % (param, value)
51 else:
52 return param
Barry Warsawbeb59452001-09-26 05:41:51 +000053
Barry Warsawa74e8682003-09-03 04:08:13 +000054def _parseparam(s):
55 plist = []
56 while s[:1] == ';':
57 s = s[1:]
58 end = s.find(';')
59 while end > 0 and s.count('"', 0, end) % 2:
60 end = s.find(';', end + 1)
61 if end < 0:
62 end = len(s)
63 f = s[:end]
64 if '=' in f:
65 i = f.index('=')
66 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
67 plist.append(f.strip())
68 s = s[end:]
69 return plist
70
Barry Warsawba925802001-09-23 03:17:28 +000071
Barry Warsaw908dc4b2002-06-29 05:56:15 +000072def _unquotevalue(value):
Barry Warsawbb113862004-10-03 03:16:19 +000073 # This is different than Utils.collapse_rfc2231_value() because it doesn't
74 # try to convert the value to a unicode. Message.get_param() and
75 # Message.get_params() are both currently defined to return the tuple in
76 # the face of RFC 2231 parameters.
Barry Warsaw5d840532004-05-09 03:44:55 +000077 if isinstance(value, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +000078 return value[0], value[1], Utils.unquote(value[2])
Barry Warsaw908dc4b2002-06-29 05:56:15 +000079 else:
Tim Peters280488b2002-08-23 18:19:30 +000080 return Utils.unquote(value)
Barry Warsaw908dc4b2002-06-29 05:56:15 +000081
82
Barry Warsaw48b0d362002-08-27 22:34:44 +000083
Barry Warsawba925802001-09-23 03:17:28 +000084class Message:
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000085 """Basic message object.
Barry Warsawba925802001-09-23 03:17:28 +000086
87 A message object is defined as something that has a bunch of RFC 2822
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000088 headers and a payload. It may optionally have an envelope header
89 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
90 multipart or a message/rfc822), then the payload is a list of Message
91 objects, otherwise it is a string.
Barry Warsawba925802001-09-23 03:17:28 +000092
Barry Warsaw42d1d3e2002-09-30 18:17:35 +000093 Message objects implement part of the `mapping' interface, which assumes
Barry Warsawba925802001-09-23 03:17:28 +000094 there is exactly one occurrance of the header per message. Some headers
Barry Warsawc4945492002-09-28 20:40:25 +000095 do in fact appear multiple times (e.g. Received) and for those headers,
Barry Warsawba925802001-09-23 03:17:28 +000096 you must use the explicit API to set or get all the headers. Not all of
97 the mapping methods are implemented.
Barry Warsawba925802001-09-23 03:17:28 +000098 """
99 def __init__(self):
100 self._headers = []
101 self._unixfrom = None
102 self._payload = None
Barry Warsaw409a4c02002-04-10 21:01:31 +0000103 self._charset = None
Barry Warsawba925802001-09-23 03:17:28 +0000104 # Defaults for multipart messages
105 self.preamble = self.epilogue = None
Barry Warsawbb113862004-10-03 03:16:19 +0000106 self.defects = []
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000107 # Default content type
108 self._default_type = 'text/plain'
Barry Warsawba925802001-09-23 03:17:28 +0000109
110 def __str__(self):
111 """Return the entire formatted message as a string.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000112 This includes the headers, body, and envelope header.
Barry Warsawba925802001-09-23 03:17:28 +0000113 """
Barry Warsawc4945492002-09-28 20:40:25 +0000114 return self.as_string(unixfrom=True)
Barry Warsawba925802001-09-23 03:17:28 +0000115
Barry Warsawc4945492002-09-28 20:40:25 +0000116 def as_string(self, unixfrom=False):
Barry Warsawba925802001-09-23 03:17:28 +0000117 """Return the entire formatted message as a string.
Barry Warsawc4945492002-09-28 20:40:25 +0000118 Optional `unixfrom' when True, means include the Unix From_ envelope
Barry Warsawba925802001-09-23 03:17:28 +0000119 header.
Barry Warsaw482c5f72003-04-18 23:04:35 +0000120
121 This is a convenience method and may not generate the message exactly
122 as you intend. For more flexibility, use the flatten() method of a
123 Generator instance.
Barry Warsawba925802001-09-23 03:17:28 +0000124 """
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000125 from email.Generator import Generator
Barry Warsawba925802001-09-23 03:17:28 +0000126 fp = StringIO()
127 g = Generator(fp)
Barry Warsaw8ba76e82002-06-02 19:05:51 +0000128 g.flatten(self, unixfrom=unixfrom)
Barry Warsawba925802001-09-23 03:17:28 +0000129 return fp.getvalue()
130
131 def is_multipart(self):
Barry Warsawc4945492002-09-28 20:40:25 +0000132 """Return True if the message consists of multiple parts."""
Barry Warsawbb113862004-10-03 03:16:19 +0000133 return isinstance(self._payload, list)
Barry Warsawba925802001-09-23 03:17:28 +0000134
135 #
136 # Unix From_ line
137 #
138 def set_unixfrom(self, unixfrom):
139 self._unixfrom = unixfrom
140
141 def get_unixfrom(self):
142 return self._unixfrom
143
144 #
145 # Payload manipulation.
146 #
Barry Warsaw409a4c02002-04-10 21:01:31 +0000147 def attach(self, payload):
148 """Add the given payload to the current payload.
149
150 The current payload will always be a list of objects after this method
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000151 is called. If you want to set the payload to a scalar object, use
Barry Warsaw409a4c02002-04-10 21:01:31 +0000152 set_payload() instead.
153 """
154 if self._payload is None:
155 self._payload = [payload]
156 else:
157 self._payload.append(payload)
Barry Warsawba925802001-09-23 03:17:28 +0000158
Barry Warsawc4945492002-09-28 20:40:25 +0000159 def get_payload(self, i=None, decode=False):
Barry Warsawfbcde752002-09-11 14:11:35 +0000160 """Return a reference to the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000161
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000162 The payload will either be a list object or a string. If you mutate
163 the list object, you modify the message's payload in place. Optional
164 i returns that index into the payload.
Barry Warsawba925802001-09-23 03:17:28 +0000165
Barry Warsaw08898492003-03-11 04:33:30 +0000166 Optional decode is a flag indicating whether the payload should be
167 decoded or not, according to the Content-Transfer-Encoding header
168 (default is False).
169
170 When True and the message is not a multipart, the payload will be
171 decoded if this header's value is `quoted-printable' or `base64'. If
172 some other encoding is used, or the header is missing, or if the
173 payload has bogus data (i.e. bogus base64 or uuencoded data), the
174 payload is returned as-is.
Barry Warsaw21191d32003-03-10 16:13:14 +0000175
176 If the message is a multipart and the decode flag is True, then None
177 is returned.
Barry Warsawba925802001-09-23 03:17:28 +0000178 """
179 if i is None:
180 payload = self._payload
Barry Warsaw5d840532004-05-09 03:44:55 +0000181 elif not isinstance(self._payload, list):
Barry Warsawbb113862004-10-03 03:16:19 +0000182 raise TypeError('Expected list, got %s' % type(self._payload))
Barry Warsawba925802001-09-23 03:17:28 +0000183 else:
184 payload = self._payload[i]
185 if decode:
186 if self.is_multipart():
187 return None
Barry Warsaw08898492003-03-11 04:33:30 +0000188 cte = self.get('content-transfer-encoding', '').lower()
189 if cte == 'quoted-printable':
Barry Warsawba925802001-09-23 03:17:28 +0000190 return Utils._qdecode(payload)
Barry Warsaw08898492003-03-11 04:33:30 +0000191 elif cte == 'base64':
Barry Warsaw21191d32003-03-10 16:13:14 +0000192 try:
193 return Utils._bdecode(payload)
194 except binascii.Error:
195 # Incorrect padding
196 return payload
Barry Warsaw08898492003-03-11 04:33:30 +0000197 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
198 sfp = StringIO()
199 try:
200 uu.decode(StringIO(payload+'\n'), sfp)
201 payload = sfp.getvalue()
202 except uu.Error:
203 # Some decoding problem
204 return payload
Barry Warsawba925802001-09-23 03:17:28 +0000205 # Everything else, including encodings with 8bit or 7bit are returned
206 # unchanged.
207 return payload
208
Barry Warsaw409a4c02002-04-10 21:01:31 +0000209 def set_payload(self, payload, charset=None):
210 """Set the payload to the given value.
Barry Warsawba925802001-09-23 03:17:28 +0000211
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000212 Optional charset sets the message's default character set. See
213 set_charset() for details.
214 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000215 self._payload = payload
216 if charset is not None:
217 self.set_charset(charset)
218
219 def set_charset(self, charset):
220 """Set the charset of the payload to a given character set.
221
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000222 charset can be a Charset instance, a string naming a character set, or
223 None. If it is a string it will be converted to a Charset instance.
224 If charset is None, the charset parameter will be removed from the
225 Content-Type field. Anything else will generate a TypeError.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000226
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000227 The message will be assumed to be of type text/* encoded with
Barry Warsaw409a4c02002-04-10 21:01:31 +0000228 charset.input_charset. It will be converted to charset.output_charset
229 and encoded properly, if needed, when generating the plain text
230 representation of the message. MIME headers (MIME-Version,
231 Content-Type, Content-Transfer-Encoding) will be added as needed.
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000232
Barry Warsaw409a4c02002-04-10 21:01:31 +0000233 """
234 if charset is None:
235 self.del_param('charset')
236 self._charset = None
237 return
Barry Warsaw5d840532004-05-09 03:44:55 +0000238 if isinstance(charset, str):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000239 charset = Charset.Charset(charset)
240 if not isinstance(charset, Charset.Charset):
Barry Warsawbb113862004-10-03 03:16:19 +0000241 raise TypeError(charset)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000242 # BAW: should we accept strings that can serve as arguments to the
243 # Charset constructor?
244 self._charset = charset
245 if not self.has_key('MIME-Version'):
246 self.add_header('MIME-Version', '1.0')
247 if not self.has_key('Content-Type'):
248 self.add_header('Content-Type', 'text/plain',
249 charset=charset.get_output_charset())
250 else:
251 self.set_param('charset', charset.get_output_charset())
252 if not self.has_key('Content-Transfer-Encoding'):
253 cte = charset.get_body_encoding()
Barry Warsawbb113862004-10-03 03:16:19 +0000254 try:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000255 cte(self)
Barry Warsawbb113862004-10-03 03:16:19 +0000256 except TypeError:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000257 self.add_header('Content-Transfer-Encoding', cte)
258
259 def get_charset(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000260 """Return the Charset instance associated with the message's payload.
261 """
Barry Warsaw409a4c02002-04-10 21:01:31 +0000262 return self._charset
Tim Peters8ac14952002-05-23 15:15:30 +0000263
Barry Warsawba925802001-09-23 03:17:28 +0000264 #
265 # MAPPING INTERFACE (partial)
266 #
267 def __len__(self):
Barry Warsawbeb59452001-09-26 05:41:51 +0000268 """Return the total number of headers, including duplicates."""
Barry Warsawba925802001-09-23 03:17:28 +0000269 return len(self._headers)
270
271 def __getitem__(self, name):
272 """Get a header value.
273
274 Return None if the header is missing instead of raising an exception.
275
276 Note that if the header appeared multiple times, exactly which
Barry Warsawbb113862004-10-03 03:16:19 +0000277 occurrance gets returned is undefined. Use get_all() to get all
Barry Warsawba925802001-09-23 03:17:28 +0000278 the values matching a header field name.
279 """
280 return self.get(name)
281
282 def __setitem__(self, name, val):
283 """Set the value of a header.
284
285 Note: this does not overwrite an existing header with the same field
286 name. Use __delitem__() first to delete any existing headers.
287 """
288 self._headers.append((name, val))
289
290 def __delitem__(self, name):
291 """Delete all occurrences of a header, if present.
292
293 Does not raise an exception if the header is missing.
294 """
295 name = name.lower()
296 newheaders = []
297 for k, v in self._headers:
298 if k.lower() <> name:
299 newheaders.append((k, v))
300 self._headers = newheaders
301
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000302 def __contains__(self, name):
303 return name.lower() in [k.lower() for k, v in self._headers]
Barry Warsawba925802001-09-23 03:17:28 +0000304
305 def has_key(self, name):
306 """Return true if the message contains the header."""
Barry Warsawbb113862004-10-03 03:16:19 +0000307 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000308 return self.get(name, missing) is not missing
Barry Warsawba925802001-09-23 03:17:28 +0000309
310 def keys(self):
311 """Return a list of all the message's header field names.
312
313 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000314 message, or were added to the message, and may contain duplicates.
315 Any fields deleted and re-inserted are always appended to the header
316 list.
Barry Warsawba925802001-09-23 03:17:28 +0000317 """
318 return [k for k, v in self._headers]
319
320 def values(self):
321 """Return a list of all the message's header values.
322
323 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000324 message, or were added to the message, and may contain duplicates.
325 Any fields deleted and re-inserted are always appended to the header
326 list.
Barry Warsawba925802001-09-23 03:17:28 +0000327 """
328 return [v for k, v in self._headers]
329
330 def items(self):
331 """Get all the message's header fields and values.
332
333 These will be sorted in the order they appeared in the original
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000334 message, or were added to the message, and may contain duplicates.
335 Any fields deleted and re-inserted are always appended to the header
336 list.
Barry Warsawba925802001-09-23 03:17:28 +0000337 """
338 return self._headers[:]
339
340 def get(self, name, failobj=None):
341 """Get a header value.
342
343 Like __getitem__() but return failobj instead of None when the field
344 is missing.
345 """
346 name = name.lower()
347 for k, v in self._headers:
348 if k.lower() == name:
349 return v
350 return failobj
351
352 #
353 # Additional useful stuff
354 #
355
356 def get_all(self, name, failobj=None):
357 """Return a list of all the values for the named field.
358
359 These will be sorted in the order they appeared in the original
360 message, and may contain duplicates. Any fields deleted and
Greg Ward6253c2d2001-11-24 15:49:53 +0000361 re-inserted are always appended to the header list.
Barry Warsaw9300a752001-10-09 15:48:29 +0000362
363 If no such fields exist, failobj is returned (defaults to None).
Barry Warsawba925802001-09-23 03:17:28 +0000364 """
365 values = []
366 name = name.lower()
367 for k, v in self._headers:
368 if k.lower() == name:
369 values.append(v)
Barry Warsaw9300a752001-10-09 15:48:29 +0000370 if not values:
371 return failobj
Barry Warsawba925802001-09-23 03:17:28 +0000372 return values
373
374 def add_header(self, _name, _value, **_params):
375 """Extended header setting.
376
377 name is the header field to add. keyword arguments can be used to set
378 additional parameters for the header field, with underscores converted
379 to dashes. Normally the parameter will be added as key="value" unless
380 value is None, in which case only the key will be added.
381
382 Example:
383
384 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
Barry Warsawba925802001-09-23 03:17:28 +0000385 """
386 parts = []
387 for k, v in _params.items():
388 if v is None:
389 parts.append(k.replace('_', '-'))
390 else:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000391 parts.append(_formatparam(k.replace('_', '-'), v))
Barry Warsawba925802001-09-23 03:17:28 +0000392 if _value is not None:
393 parts.insert(0, _value)
394 self._headers.append((_name, SEMISPACE.join(parts)))
395
Barry Warsaw229727f2002-09-06 03:38:12 +0000396 def replace_header(self, _name, _value):
397 """Replace a header.
398
399 Replace the first matching header found in the message, retaining
400 header order and case. If no matching header was found, a KeyError is
401 raised.
402 """
403 _name = _name.lower()
404 for i, (k, v) in zip(range(len(self._headers)), self._headers):
405 if k.lower() == _name:
406 self._headers[i] = (k, _value)
407 break
408 else:
Barry Warsawbb113862004-10-03 03:16:19 +0000409 raise KeyError(_name)
Barry Warsaw229727f2002-09-06 03:38:12 +0000410
Barry Warsawc1068642002-07-19 22:24:55 +0000411 #
Barry Warsawbb113862004-10-03 03:16:19 +0000412 # Deprecated methods. These will be removed in email 3.1.
Barry Warsawc1068642002-07-19 22:24:55 +0000413 #
414
Barry Warsawba925802001-09-23 03:17:28 +0000415 def get_type(self, failobj=None):
416 """Returns the message's content type.
417
418 The returned string is coerced to lowercase and returned as a single
Barry Warsawc4945492002-09-28 20:40:25 +0000419 string of the form `maintype/subtype'. If there was no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000420 header in the message, failobj is returned (defaults to None).
421 """
Barry Warsawbb113862004-10-03 03:16:19 +0000422 warnings.warn('get_type() deprecated; use get_content_type()',
423 DeprecationWarning, 2)
424 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000425 value = self.get('content-type', missing)
426 if value is missing:
427 return failobj
Barry Warsaw7aeac912002-07-18 23:09:09 +0000428 return paramre.split(value)[0].lower().strip()
Barry Warsawba925802001-09-23 03:17:28 +0000429
430 def get_main_type(self, failobj=None):
431 """Return the message's main content type if present."""
Barry Warsawbb113862004-10-03 03:16:19 +0000432 warnings.warn('get_main_type() deprecated; use get_content_maintype()',
433 DeprecationWarning, 2)
434 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000435 ctype = self.get_type(missing)
436 if ctype is missing:
437 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000438 if ctype.count('/') <> 1:
439 return failobj
440 return ctype.split('/')[0]
Barry Warsawba925802001-09-23 03:17:28 +0000441
442 def get_subtype(self, failobj=None):
443 """Return the message's content subtype if present."""
Barry Warsawbb113862004-10-03 03:16:19 +0000444 warnings.warn('get_subtype() deprecated; use get_content_subtype()',
445 DeprecationWarning, 2)
446 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000447 ctype = self.get_type(missing)
448 if ctype is missing:
449 return failobj
Barry Warsawc1068642002-07-19 22:24:55 +0000450 if ctype.count('/') <> 1:
451 return failobj
452 return ctype.split('/')[1]
453
454 #
455 # Use these three methods instead of the three above.
456 #
457
458 def get_content_type(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000459 """Return the message's content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000460
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000461 The returned string is coerced to lower case of the form
462 `maintype/subtype'. If there was no Content-Type header in the
463 message, the default type as given by get_default_type() will be
464 returned. Since according to RFC 2045, messages always have a default
465 type this will always return a value.
Barry Warsawc1068642002-07-19 22:24:55 +0000466
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000467 RFC 2045 defines a message's default type to be text/plain unless it
468 appears inside a multipart/digest container, in which case it would be
469 message/rfc822.
Barry Warsawc1068642002-07-19 22:24:55 +0000470 """
Barry Warsawbb113862004-10-03 03:16:19 +0000471 missing = object()
Barry Warsawc1068642002-07-19 22:24:55 +0000472 value = self.get('content-type', missing)
473 if value is missing:
474 # This should have no parameters
475 return self.get_default_type()
Barry Warsawf36d8042002-08-20 14:50:09 +0000476 ctype = paramre.split(value)[0].lower().strip()
477 # RFC 2045, section 5.2 says if its invalid, use text/plain
478 if ctype.count('/') <> 1:
479 return 'text/plain'
480 return ctype
Barry Warsawc1068642002-07-19 22:24:55 +0000481
482 def get_content_maintype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000483 """Return the message's main content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000484
485 This is the `maintype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000486 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000487 """
488 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000489 return ctype.split('/')[0]
490
491 def get_content_subtype(self):
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000492 """Returns the message's sub-content type.
Barry Warsawc1068642002-07-19 22:24:55 +0000493
494 This is the `subtype' part of the string returned by
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000495 get_content_type().
Barry Warsawc1068642002-07-19 22:24:55 +0000496 """
497 ctype = self.get_content_type()
Barry Warsawc1068642002-07-19 22:24:55 +0000498 return ctype.split('/')[1]
Barry Warsawba925802001-09-23 03:17:28 +0000499
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000500 def get_default_type(self):
501 """Return the `default' content type.
502
503 Most messages have a default content type of text/plain, except for
504 messages that are subparts of multipart/digest containers. Such
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000505 subparts have a default content type of message/rfc822.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000506 """
507 return self._default_type
508
509 def set_default_type(self, ctype):
510 """Set the `default' content type.
511
Barry Warsawc1068642002-07-19 22:24:55 +0000512 ctype should be either "text/plain" or "message/rfc822", although this
513 is not enforced. The default content type is not stored in the
Barry Warsawc4945492002-09-28 20:40:25 +0000514 Content-Type header.
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000515 """
Barry Warsawa0c8b9d2002-07-09 02:46:12 +0000516 self._default_type = ctype
517
Barry Warsawbeb59452001-09-26 05:41:51 +0000518 def _get_params_preserve(self, failobj, header):
519 # Like get_params() but preserves the quoting of values. BAW:
520 # should this be part of the public interface?
Barry Warsawbb113862004-10-03 03:16:19 +0000521 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000522 value = self.get(header, missing)
523 if value is missing:
524 return failobj
525 params = []
Barry Warsawa74e8682003-09-03 04:08:13 +0000526 for p in _parseparam(';' + value):
Barry Warsawbeb59452001-09-26 05:41:51 +0000527 try:
528 name, val = p.split('=', 1)
Barry Warsaw7aeac912002-07-18 23:09:09 +0000529 name = name.strip()
530 val = val.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000531 except ValueError:
532 # Must have been a bare attribute
Barry Warsaw7aeac912002-07-18 23:09:09 +0000533 name = p.strip()
Barry Warsawbeb59452001-09-26 05:41:51 +0000534 val = ''
535 params.append((name, val))
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000536 params = Utils.decode_params(params)
Barry Warsawbeb59452001-09-26 05:41:51 +0000537 return params
538
Barry Warsawc4945492002-09-28 20:40:25 +0000539 def get_params(self, failobj=None, header='content-type', unquote=True):
540 """Return the message's Content-Type parameters, as a list.
Barry Warsawba925802001-09-23 03:17:28 +0000541
Barry Warsawbeb59452001-09-26 05:41:51 +0000542 The elements of the returned list are 2-tuples of key/value pairs, as
543 split on the `=' sign. The left hand side of the `=' is the key,
544 while the right hand side is the value. If there is no `=' sign in
Barry Warsaw15aefa92002-09-26 17:19:34 +0000545 the parameter the value is the empty string. The value is as
546 described in the get_param() method.
Barry Warsawbeb59452001-09-26 05:41:51 +0000547
Barry Warsawc4945492002-09-28 20:40:25 +0000548 Optional failobj is the object to return if there is no Content-Type
Barry Warsawba925802001-09-23 03:17:28 +0000549 header. Optional header is the header to search instead of
Barry Warsawc4945492002-09-28 20:40:25 +0000550 Content-Type. If unquote is True, the value is unquoted.
Barry Warsawba925802001-09-23 03:17:28 +0000551 """
Barry Warsawbb113862004-10-03 03:16:19 +0000552 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000553 params = self._get_params_preserve(missing, header)
554 if params is missing:
Barry Warsawba925802001-09-23 03:17:28 +0000555 return failobj
Barry Warsaw409a4c02002-04-10 21:01:31 +0000556 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000557 return [(k, _unquotevalue(v)) for k, v in params]
Barry Warsaw409a4c02002-04-10 21:01:31 +0000558 else:
559 return params
Barry Warsawba925802001-09-23 03:17:28 +0000560
Barry Warsawc4945492002-09-28 20:40:25 +0000561 def get_param(self, param, failobj=None, header='content-type',
562 unquote=True):
563 """Return the parameter value if found in the Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000564
Barry Warsawc4945492002-09-28 20:40:25 +0000565 Optional failobj is the object to return if there is no Content-Type
Barry Warsaw15aefa92002-09-26 17:19:34 +0000566 header, or the Content-Type header has no such parameter. Optional
Barry Warsawc4945492002-09-28 20:40:25 +0000567 header is the header to search instead of Content-Type.
Barry Warsawbeb59452001-09-26 05:41:51 +0000568
Barry Warsaw15aefa92002-09-26 17:19:34 +0000569 Parameter keys are always compared case insensitively. The return
570 value can either be a string, or a 3-tuple if the parameter was RFC
571 2231 encoded. When it's a 3-tuple, the elements of the value are of
Barry Warsaw62083692003-08-19 03:53:02 +0000572 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
573 LANGUAGE can be None, in which case you should consider VALUE to be
574 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
575
576 Your application should be prepared to deal with 3-tuple return
577 values, and can convert the parameter to a Unicode string like so:
Barry Warsaw15aefa92002-09-26 17:19:34 +0000578
579 param = msg.get_param('foo')
580 if isinstance(param, tuple):
Barry Warsaw62083692003-08-19 03:53:02 +0000581 param = unicode(param[2], param[0] or 'us-ascii')
Barry Warsaw15aefa92002-09-26 17:19:34 +0000582
583 In any case, the parameter value (either the returned string, or the
584 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
Barry Warsawc4945492002-09-28 20:40:25 +0000585 to False.
Barry Warsawba925802001-09-23 03:17:28 +0000586 """
Barry Warsawbeb59452001-09-26 05:41:51 +0000587 if not self.has_key(header):
Barry Warsawba925802001-09-23 03:17:28 +0000588 return failobj
Barry Warsawbeb59452001-09-26 05:41:51 +0000589 for k, v in self._get_params_preserve(failobj, header):
590 if k.lower() == param.lower():
Barry Warsaw409a4c02002-04-10 21:01:31 +0000591 if unquote:
Barry Warsaw908dc4b2002-06-29 05:56:15 +0000592 return _unquotevalue(v)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000593 else:
594 return v
Barry Warsawba925802001-09-23 03:17:28 +0000595 return failobj
596
Barry Warsawc4945492002-09-28 20:40:25 +0000597 def set_param(self, param, value, header='Content-Type', requote=True,
Barry Warsaw3c255352002-09-06 03:55:04 +0000598 charset=None, language=''):
Barry Warsawc4945492002-09-28 20:40:25 +0000599 """Set a parameter in the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000600
601 If the parameter already exists in the header, its value will be
602 replaced with the new value.
603
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000604 If header is Content-Type and has not yet been defined for this
Barry Warsaw409a4c02002-04-10 21:01:31 +0000605 message, it will be set to "text/plain" and the new parameter and
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000606 value will be appended as per RFC 2045.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000607
Barry Warsawc4945492002-09-28 20:40:25 +0000608 An alternate header can specified in the header argument, and all
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000609 parameters will be quoted as necessary unless requote is False.
Barry Warsaw3c255352002-09-06 03:55:04 +0000610
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000611 If charset is specified, the parameter will be encoded according to RFC
612 2231. Optional language specifies the RFC 2231 language, defaulting
613 to the empty string. Both charset and language should be strings.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000614 """
Barry Warsaw5d840532004-05-09 03:44:55 +0000615 if not isinstance(value, tuple) and charset:
Barry Warsaw3c255352002-09-06 03:55:04 +0000616 value = (charset, language, value)
617
Barry Warsaw409a4c02002-04-10 21:01:31 +0000618 if not self.has_key(header) and header.lower() == 'content-type':
619 ctype = 'text/plain'
620 else:
621 ctype = self.get(header)
622 if not self.get_param(param, header=header):
623 if not ctype:
624 ctype = _formatparam(param, value, requote)
625 else:
626 ctype = SEMISPACE.join(
627 [ctype, _formatparam(param, value, requote)])
628 else:
629 ctype = ''
630 for old_param, old_value in self.get_params(header=header,
631 unquote=requote):
632 append_param = ''
633 if old_param.lower() == param.lower():
634 append_param = _formatparam(param, value, requote)
635 else:
636 append_param = _formatparam(old_param, old_value, requote)
637 if not ctype:
638 ctype = append_param
639 else:
640 ctype = SEMISPACE.join([ctype, append_param])
641 if ctype <> self.get(header):
642 del self[header]
643 self[header] = ctype
644
Barry Warsawc4945492002-09-28 20:40:25 +0000645 def del_param(self, param, header='content-type', requote=True):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000646 """Remove the given parameter completely from the Content-Type header.
647
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000648 The header will be re-written in place without the parameter or its
649 value. All values will be quoted as necessary unless requote is
650 False. Optional header specifies an alternative to the Content-Type
651 header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000652 """
653 if not self.has_key(header):
654 return
655 new_ctype = ''
Barry Warsaw06fa0422004-08-16 15:47:34 +0000656 for p, v in self.get_params(header=header, unquote=requote):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000657 if p.lower() <> param.lower():
658 if not new_ctype:
659 new_ctype = _formatparam(p, v, requote)
660 else:
661 new_ctype = SEMISPACE.join([new_ctype,
662 _formatparam(p, v, requote)])
663 if new_ctype <> self.get(header):
664 del self[header]
665 self[header] = new_ctype
666
Barry Warsawc4945492002-09-28 20:40:25 +0000667 def set_type(self, type, header='Content-Type', requote=True):
668 """Set the main type and subtype for the Content-Type header.
Barry Warsaw409a4c02002-04-10 21:01:31 +0000669
670 type must be a string in the form "maintype/subtype", otherwise a
671 ValueError is raised.
672
Barry Warsawc4945492002-09-28 20:40:25 +0000673 This method replaces the Content-Type header, keeping all the
674 parameters in place. If requote is False, this leaves the existing
Barry Warsaw409a4c02002-04-10 21:01:31 +0000675 header's quoting as is. Otherwise, the parameters will be quoted (the
676 default).
677
Barry Warsaw42d1d3e2002-09-30 18:17:35 +0000678 An alternative header can be specified in the header argument. When
679 the Content-Type header is set, we'll always also add a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000680 header.
681 """
682 # BAW: should we be strict?
683 if not type.count('/') == 1:
684 raise ValueError
Barry Warsawc4945492002-09-28 20:40:25 +0000685 # Set the Content-Type, you get a MIME-Version
Barry Warsaw409a4c02002-04-10 21:01:31 +0000686 if header.lower() == 'content-type':
687 del self['mime-version']
688 self['MIME-Version'] = '1.0'
689 if not self.has_key(header):
690 self[header] = type
691 return
Barry Warsaw06fa0422004-08-16 15:47:34 +0000692 params = self.get_params(header=header, unquote=requote)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000693 del self[header]
694 self[header] = type
695 # Skip the first param; it's the old type.
696 for p, v in params[1:]:
697 self.set_param(p, v, header, requote)
698
Barry Warsawba925802001-09-23 03:17:28 +0000699 def get_filename(self, failobj=None):
700 """Return the filename associated with the payload if present.
701
Barry Warsawc4945492002-09-28 20:40:25 +0000702 The filename is extracted from the Content-Disposition header's
Barry Warsawba925802001-09-23 03:17:28 +0000703 `filename' parameter, and it is unquoted.
704 """
Barry Warsawbb113862004-10-03 03:16:19 +0000705 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000706 filename = self.get_param('filename', missing, 'content-disposition')
707 if filename is missing:
708 return failobj
Barry Warsawbb113862004-10-03 03:16:19 +0000709 return Utils.collapse_rfc2231_value(filename).strip()
Barry Warsawba925802001-09-23 03:17:28 +0000710
711 def get_boundary(self, failobj=None):
712 """Return the boundary associated with the payload if present.
713
Barry Warsawc4945492002-09-28 20:40:25 +0000714 The boundary is extracted from the Content-Type header's `boundary'
Barry Warsawba925802001-09-23 03:17:28 +0000715 parameter, and it is unquoted.
716 """
Barry Warsawbb113862004-10-03 03:16:19 +0000717 missing = object()
Barry Warsawba925802001-09-23 03:17:28 +0000718 boundary = self.get_param('boundary', missing)
719 if boundary is missing:
720 return failobj
Barry Warsawbb113862004-10-03 03:16:19 +0000721 return Utils.collapse_rfc2231_value(boundary).strip()
Barry Warsawba925802001-09-23 03:17:28 +0000722
723 def set_boundary(self, boundary):
Barry Warsawc4945492002-09-28 20:40:25 +0000724 """Set the boundary parameter in Content-Type to 'boundary'.
Barry Warsawba925802001-09-23 03:17:28 +0000725
Barry Warsawc4945492002-09-28 20:40:25 +0000726 This is subtly different than deleting the Content-Type header and
Barry Warsawba925802001-09-23 03:17:28 +0000727 adding a new one with a new boundary parameter via add_header(). The
728 main difference is that using the set_boundary() method preserves the
Barry Warsawc4945492002-09-28 20:40:25 +0000729 order of the Content-Type header in the original message.
Barry Warsawba925802001-09-23 03:17:28 +0000730
Barry Warsawc4945492002-09-28 20:40:25 +0000731 HeaderParseError is raised if the message has no Content-Type header.
Barry Warsawba925802001-09-23 03:17:28 +0000732 """
Barry Warsawbb113862004-10-03 03:16:19 +0000733 missing = object()
Barry Warsawbeb59452001-09-26 05:41:51 +0000734 params = self._get_params_preserve(missing, 'content-type')
735 if params is missing:
Barry Warsawc4945492002-09-28 20:40:25 +0000736 # There was no Content-Type header, and we don't know what type
Barry Warsawba925802001-09-23 03:17:28 +0000737 # to set it to, so raise an exception.
Barry Warsawc4945492002-09-28 20:40:25 +0000738 raise Errors.HeaderParseError, 'No Content-Type header found'
Barry Warsawba925802001-09-23 03:17:28 +0000739 newparams = []
Barry Warsawc4945492002-09-28 20:40:25 +0000740 foundp = False
Barry Warsawbeb59452001-09-26 05:41:51 +0000741 for pk, pv in params:
742 if pk.lower() == 'boundary':
743 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000744 foundp = True
Barry Warsawba925802001-09-23 03:17:28 +0000745 else:
Barry Warsawbeb59452001-09-26 05:41:51 +0000746 newparams.append((pk, pv))
Barry Warsawba925802001-09-23 03:17:28 +0000747 if not foundp:
Barry Warsawc4945492002-09-28 20:40:25 +0000748 # The original Content-Type header had no boundary attribute.
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000749 # Tack one on the end. BAW: should we raise an exception
Barry Warsawba925802001-09-23 03:17:28 +0000750 # instead???
Barry Warsawbeb59452001-09-26 05:41:51 +0000751 newparams.append(('boundary', '"%s"' % boundary))
Barry Warsawc4945492002-09-28 20:40:25 +0000752 # Replace the existing Content-Type header with the new value
Barry Warsawba925802001-09-23 03:17:28 +0000753 newheaders = []
754 for h, v in self._headers:
755 if h.lower() == 'content-type':
Barry Warsawbeb59452001-09-26 05:41:51 +0000756 parts = []
757 for k, v in newparams:
758 if v == '':
759 parts.append(k)
760 else:
761 parts.append('%s=%s' % (k, v))
762 newheaders.append((h, SEMISPACE.join(parts)))
763
Barry Warsawba925802001-09-23 03:17:28 +0000764 else:
765 newheaders.append((h, v))
766 self._headers = newheaders
767
Barry Warsaw15aefa92002-09-26 17:19:34 +0000768 def get_content_charset(self, failobj=None):
769 """Return the charset parameter of the Content-Type header.
770
Barry Warsawee07cb12002-10-10 15:13:26 +0000771 The returned string is always coerced to lower case. If there is no
772 Content-Type header, or if that header has no charset parameter,
773 failobj is returned.
Barry Warsaw15aefa92002-09-26 17:19:34 +0000774 """
Barry Warsawbb113862004-10-03 03:16:19 +0000775 missing = object()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000776 charset = self.get_param('charset', missing)
777 if charset is missing:
778 return failobj
Barry Warsaw5d840532004-05-09 03:44:55 +0000779 if isinstance(charset, tuple):
Barry Warsaw15aefa92002-09-26 17:19:34 +0000780 # RFC 2231 encoded, so decode it, and it better end up as ascii.
Barry Warsaw62083692003-08-19 03:53:02 +0000781 pcharset = charset[0] or 'us-ascii'
782 charset = unicode(charset[2], pcharset).encode('us-ascii')
Barry Warsawee07cb12002-10-10 15:13:26 +0000783 # RFC 2046, $4.1.2 says charsets are not case sensitive
784 return charset.lower()
Barry Warsaw15aefa92002-09-26 17:19:34 +0000785
Barry Warsawba925802001-09-23 03:17:28 +0000786 def get_charsets(self, failobj=None):
787 """Return a list containing the charset(s) used in this message.
Tim Peters527e64f2001-10-04 05:36:56 +0000788
Barry Warsawc4945492002-09-28 20:40:25 +0000789 The returned list of items describes the Content-Type headers'
Barry Warsawba925802001-09-23 03:17:28 +0000790 charset parameter for this message and all the subparts in its
791 payload.
792
793 Each item will either be a string (the value of the charset parameter
Barry Warsawc4945492002-09-28 20:40:25 +0000794 in the Content-Type header of that part) or the value of the
Barry Warsawba925802001-09-23 03:17:28 +0000795 'failobj' parameter (defaults to None), if the part does not have a
796 main MIME type of "text", or the charset is not defined.
797
798 The list will contain one string for each part of the message, plus
799 one for the container message (i.e. self), so that a non-multipart
800 message will still return a list of length 1.
801 """
Barry Warsaw15aefa92002-09-26 17:19:34 +0000802 return [part.get_content_charset(failobj) for part in self.walk()]
Barry Warsaw5d840532004-05-09 03:44:55 +0000803
804 # I.e. def walk(self): ...
805 from email.Iterators import walk