Blame - Lib/email/message.py - platform/external/python/cpython2

blob: 8d68c093cc95f608bc269d67a8f9ba313e6ffcff [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	19	from email.charset import Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	20
				21	SEMISPACE = '; '
				22
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	23	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	24	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				26
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	27	# How to figure out if we are processing strings that come from a byte
				28	# source with undecodable characters.
				29	_has_surrogates = re.compile(
				30	'([^\ud800-\udbff]\|\A)[\udc00-\udfff]([^\udc00-\udfff]\|\Z)').search
				31
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	32
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	33	# Helper functions
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	34	def _sanitize_surrogates(value):
				35	# If the value contains surrogates, re-decode and replace the original
				36	# non-ascii bytes with '?'s. Used to sanitize header values before letting
				37	# them escape as strings.
				38	if not isinstance(value, str):
				39	# Header object
				40	return value
				41	if _has_surrogates(value):
				42	original_bytes = value.encode('ascii', 'surrogateescape')
				43	return original_bytes.decode('ascii', 'replace').replace('\ufffd', '?')
				44	else:
				45	return value
				46
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	47	def _splitparam(param):
				48	# Split header parameters. BAW: this may be too simple. It isn't
				49	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				50	# found in the wild. We may eventually need a full fledged parser
				51	# eventually.
				52	a, sep, b = param.partition(';')
				53	if not sep:
				54	return a.strip(), None
				55	return a.strip(), b.strip()
				56
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	57	def _formatparam(param, value=None, quote=True):
				58	"""Convenience function to format and return a key=value pair.
				59
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	60	This will quote the value if needed or if quote is true. If value is a
				61	three tuple (charset, language, value), it will be encoded according
				62	to RFC2231 rules. If it contains non-ascii characters it will likewise
				63	be encoded according to RFC2231 rules, using the utf-8 charset and
				64	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	65	"""
				66	if value is not None and len(value) > 0:
				67	# A tuple is used for RFC 2231 encoded parameter values where items
				68	# are (charset, language, value). charset is a string, not a Charset
				69	# instance.
				70	if isinstance(value, tuple):
				71	# Encode as per RFC 2231
				72	param += '*'
				73	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	74	else:
				75	try:
				76	value.encode('ascii')
				77	except UnicodeEncodeError:
				78	param += '*'
				79	value = utils.encode_rfc2231(value, 'utf-8', '')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	80	# BAW: Please check this. I think that if quote is set it should
				81	# force quoting even if not necessary.
				82	if quote or tspecials.search(value):
				83	return '%s="%s"' % (param, utils.quote(value))
				84	else:
				85	return '%s=%s' % (param, value)
				86	else:
				87	return param
				88
				89	def _parseparam(s):
				90	plist = []
				91	while s[:1] == ';':
				92	s = s[1:]
				93	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	94	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	95	end = s.find(';', end + 1)
				96	if end < 0:
				97	end = len(s)
				98	f = s[:end]
				99	if '=' in f:
				100	i = f.index('=')
				101	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				102	plist.append(f.strip())
				103	s = s[end:]
				104	return plist
				105
				106
				107	def _unquotevalue(value):
				108	# This is different than utils.collapse_rfc2231_value() because it doesn't
				109	# try to convert the value to a unicode. Message.get_param() and
				110	# Message.get_params() are both currently defined to return the tuple in
				111	# the face of RFC 2231 parameters.
				112	if isinstance(value, tuple):
				113	return value[0], value[1], utils.unquote(value[2])
				114	else:
				115	return utils.unquote(value)
				116
				117
				118
				119	class Message:
				120	"""Basic message object.
				121
				122	A message object is defined as something that has a bunch of RFC 2822
				123	headers and a payload. It may optionally have an envelope header
				124	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				125	multipart or a message/rfc822), then the payload is a list of Message
				126	objects, otherwise it is a string.
				127
				128	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	129	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	130	do in fact appear multiple times (e.g. Received) and for those headers,
				131	you must use the explicit API to set or get all the headers. Not all of
				132	the mapping methods are implemented.
				133	"""
				134	def __init__(self):
				135	self._headers = []
				136	self._unixfrom = None
				137	self._payload = None
				138	self._charset = None
				139	# Defaults for multipart messages
				140	self.preamble = self.epilogue = None
				141	self.defects = []
				142	# Default content type
				143	self._default_type = 'text/plain'
				144
				145	def __str__(self):
				146	"""Return the entire formatted message as a string.
				147	This includes the headers, body, and envelope header.
				148	"""
				149	return self.as_string()
				150
				151	def as_string(self, unixfrom=False, maxheaderlen=0):
				152	"""Return the entire formatted message as a string.
				153	Optional `unixfrom' when True, means include the Unix From_ envelope
				154	header.
				155
				156	This is a convenience method and may not generate the message exactly
				157	as you intend because by default it mangles lines that begin with
				158	"From ". For more flexibility, use the flatten() method of a
				159	Generator instance.
				160	"""
				161	from email.generator import Generator
				162	fp = StringIO()
				163	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				164	g.flatten(self, unixfrom=unixfrom)
				165	return fp.getvalue()
				166
				167	def is_multipart(self):
				168	"""Return True if the message consists of multiple parts."""
				169	return isinstance(self._payload, list)
				170
				171	#
				172	# Unix From_ line
				173	#
				174	def set_unixfrom(self, unixfrom):
				175	self._unixfrom = unixfrom
				176
				177	def get_unixfrom(self):
				178	return self._unixfrom
				179
				180	#
				181	# Payload manipulation.
				182	#
				183	def attach(self, payload):
				184	"""Add the given payload to the current payload.
				185
				186	The current payload will always be a list of objects after this method
				187	is called. If you want to set the payload to a scalar object, use
				188	set_payload() instead.
				189	"""
				190	if self._payload is None:
				191	self._payload = [payload]
				192	else:
				193	self._payload.append(payload)
				194
				195	def get_payload(self, i=None, decode=False):
				196	"""Return a reference to the payload.
				197
				198	The payload will either be a list object or a string. If you mutate
				199	the list object, you modify the message's payload in place. Optional
				200	i returns that index into the payload.
				201
				202	Optional decode is a flag indicating whether the payload should be
				203	decoded or not, according to the Content-Transfer-Encoding header
				204	(default is False).
				205
				206	When True and the message is not a multipart, the payload will be
				207	decoded if this header's value is `quoted-printable' or `base64'. If
				208	some other encoding is used, or the header is missing, or if the
				209	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				210	payload is returned as-is.
				211
				212	If the message is a multipart and the decode flag is True, then None
				213	is returned.
				214	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	215	# Here is the logic table for this code, based on the email5.0.0 code:
				216	# i decode is_multipart result
				217	# ------ ------ ------------ ------------------------------
				218	# None True True None
				219	# i True True None
				220	# None False True _payload (a list)
				221	# i False True _payload element i (a Message)
				222	# i False False error (not a list)
				223	# i True False error (not a list)
				224	# None False False _payload
				225	# None True False _payload decoded (bytes)
				226	# Note that Barry planned to factor out the 'decode' case, but that
				227	# isn't so easy now that we handle the 8 bit data, which needs to be
				228	# converted in both the decode and non-decode path.
				229	if self.is_multipart():
				230	if decode:
				231	return None
				232	if i is None:
				233	return self._payload
				234	else:
				235	return self._payload[i]
				236	# For backward compatibility, Use isinstance and this error message
				237	# instead of the more logical is_multipart test.
				238	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	239	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	240	payload = self._payload
				241	cte = self.get('content-transfer-encoding', '').lower()
				242	# payload can be bytes here, (I wonder if that is actually a bug?)
				243	if isinstance(payload, str):
				244	if _has_surrogates(payload):
				245	bpayload = payload.encode('ascii', 'surrogateescape')
				246	if not decode:
				247	try:
				248	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				249	except LookupError:
				250	payload = bpayload.decode('ascii', 'replace')
				251	elif decode:
				252	try:
				253	bpayload = payload.encode('ascii')
				254	except UnicodeError:
				255	# This won't happen for RFC compliant messages (messages
				256	# containing only ASCII codepoints in the unicode input).
				257	# If it does happen, turn the string into bytes in a way
				258	# guaranteed not to fail.
				259	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	260	if not decode:
				261	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	262	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	263	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	264	elif cte == 'base64':
				265	try:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	266	return base64.b64decode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	267	except binascii.Error:
				268	# Incorrect padding
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	269	return bpayload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	270	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	271	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	272	out_file = BytesIO()
				273	try:
				274	uu.decode(in_file, out_file, quiet=True)
				275	return out_file.getvalue()
				276	except uu.Error:
				277	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	278	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	279	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	280	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	281	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	282
				283	def set_payload(self, payload, charset=None):
				284	"""Set the payload to the given value.
				285
				286	Optional charset sets the message's default character set. See
				287	set_charset() for details.
				288	"""
				289	self._payload = payload
				290	if charset is not None:
				291	self.set_charset(charset)
				292
				293	def set_charset(self, charset):
				294	"""Set the charset of the payload to a given character set.
				295
				296	charset can be a Charset instance, a string naming a character set, or
				297	None. If it is a string it will be converted to a Charset instance.
				298	If charset is None, the charset parameter will be removed from the
				299	Content-Type field. Anything else will generate a TypeError.
				300
				301	The message will be assumed to be of type text/* encoded with
				302	charset.input_charset. It will be converted to charset.output_charset
				303	and encoded properly, if needed, when generating the plain text
				304	representation of the message. MIME headers (MIME-Version,
				305	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	306	"""
				307	if charset is None:
				308	self.del_param('charset')
				309	self._charset = None
				310	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	311	if not isinstance(charset, Charset):
				312	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	313	self._charset = charset
				314	if 'MIME-Version' not in self:
				315	self.add_header('MIME-Version', '1.0')
				316	if 'Content-Type' not in self:
				317	self.add_header('Content-Type', 'text/plain',
				318	charset=charset.get_output_charset())
				319	else:
				320	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	321	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	322	self._payload = charset.body_encode(self._payload)
				323	if 'Content-Transfer-Encoding' not in self:
				324	cte = charset.get_body_encoding()
				325	try:
				326	cte(self)
				327	except TypeError:
				328	self._payload = charset.body_encode(self._payload)
				329	self.add_header('Content-Transfer-Encoding', cte)
				330
				331	def get_charset(self):
				332	"""Return the Charset instance associated with the message's payload.
				333	"""
				334	return self._charset
				335
				336	#
				337	# MAPPING INTERFACE (partial)
				338	#
				339	def __len__(self):
				340	"""Return the total number of headers, including duplicates."""
				341	return len(self._headers)
				342
				343	def __getitem__(self, name):
				344	"""Get a header value.
				345
				346	Return None if the header is missing instead of raising an exception.
				347
				348	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	349	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	350	the values matching a header field name.
				351	"""
				352	return self.get(name)
				353
				354	def __setitem__(self, name, val):
				355	"""Set the value of a header.
				356
				357	Note: this does not overwrite an existing header with the same field
				358	name. Use __delitem__() first to delete any existing headers.
				359	"""
				360	self._headers.append((name, val))
				361
				362	def __delitem__(self, name):
				363	"""Delete all occurrences of a header, if present.
				364
				365	Does not raise an exception if the header is missing.
				366	"""
				367	name = name.lower()
				368	newheaders = []
				369	for k, v in self._headers:
				370	if k.lower() != name:
				371	newheaders.append((k, v))
				372	self._headers = newheaders
				373
				374	def __contains__(self, name):
				375	return name.lower() in [k.lower() for k, v in self._headers]
				376
				377	def __iter__(self):
				378	for field, value in self._headers:
				379	yield field
				380
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	381	def keys(self):
				382	"""Return a list of all the message's header field names.
				383
				384	These will be sorted in the order they appeared in the original
				385	message, or were added to the message, and may contain duplicates.
				386	Any fields deleted and re-inserted are always appended to the header
				387	list.
				388	"""
				389	return [k for k, v in self._headers]
				390
				391	def values(self):
				392	"""Return a list of all the message's header values.
				393
				394	These will be sorted in the order they appeared in the original
				395	message, or were added to the message, and may contain duplicates.
				396	Any fields deleted and re-inserted are always appended to the header
				397	list.
				398	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	399	return [_sanitize_surrogates(v) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	400
				401	def items(self):
				402	"""Get all the message's header fields and values.
				403
				404	These will be sorted in the order they appeared in the original
				405	message, or were added to the message, and may contain duplicates.
				406	Any fields deleted and re-inserted are always appended to the header
				407	list.
				408	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	409	return [(k, _sanitize_surrogates(v)) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	410
				411	def get(self, name, failobj=None):
				412	"""Get a header value.
				413
				414	Like __getitem__() but return failobj instead of None when the field
				415	is missing.
				416	"""
				417	name = name.lower()
				418	for k, v in self._headers:
				419	if k.lower() == name:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	420	return _sanitize_surrogates(v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	421	return failobj
				422
				423	#
				424	# Additional useful stuff
				425	#
				426
				427	def get_all(self, name, failobj=None):
				428	"""Return a list of all the values for the named field.
				429
				430	These will be sorted in the order they appeared in the original
				431	message, and may contain duplicates. Any fields deleted and
				432	re-inserted are always appended to the header list.
				433
				434	If no such fields exist, failobj is returned (defaults to None).
				435	"""
				436	values = []
				437	name = name.lower()
				438	for k, v in self._headers:
				439	if k.lower() == name:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	440	values.append(_sanitize_surrogates(v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	441	if not values:
				442	return failobj
				443	return values
				444
				445	def add_header(self, _name, _value, **_params):
				446	"""Extended header setting.
				447
				448	name is the header field to add. keyword arguments can be used to set
				449	additional parameters for the header field, with underscores converted
				450	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	451	value is None, in which case only the key will be added. If a
				452	parameter value contains non-ASCII characters it can be specified as a
				453	three-tuple of (charset, language, value), in which case it will be
				454	encoded according to RFC2231 rules. Otherwise it will be encoded using
				455	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	456
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	457	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	458
				459	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	460	msg.add_header('content-disposition', 'attachment',
				461	filename=('utf-8', '', Fußballer.ppt'))
				462	msg.add_header('content-disposition', 'attachment',
				463	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	464	"""
				465	parts = []
				466	for k, v in _params.items():
				467	if v is None:
				468	parts.append(k.replace('_', '-'))
				469	else:
				470	parts.append(_formatparam(k.replace('_', '-'), v))
				471	if _value is not None:
				472	parts.insert(0, _value)
				473	self._headers.append((_name, SEMISPACE.join(parts)))
				474
				475	def replace_header(self, _name, _value):
				476	"""Replace a header.
				477
				478	Replace the first matching header found in the message, retaining
				479	header order and case. If no matching header was found, a KeyError is
				480	raised.
				481	"""
				482	_name = _name.lower()
				483	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				484	if k.lower() == _name:
				485	self._headers[i] = (k, _value)
				486	break
				487	else:
				488	raise KeyError(_name)
				489
				490	#
				491	# Use these three methods instead of the three above.
				492	#
				493
				494	def get_content_type(self):
				495	"""Return the message's content type.
				496
				497	The returned string is coerced to lower case of the form
				498	`maintype/subtype'. If there was no Content-Type header in the
				499	message, the default type as given by get_default_type() will be
				500	returned. Since according to RFC 2045, messages always have a default
				501	type this will always return a value.
				502
				503	RFC 2045 defines a message's default type to be text/plain unless it
				504	appears inside a multipart/digest container, in which case it would be
				505	message/rfc822.
				506	"""
				507	missing = object()
				508	value = self.get('content-type', missing)
				509	if value is missing:
				510	# This should have no parameters
				511	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	512	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	513	# RFC 2045, section 5.2 says if its invalid, use text/plain
				514	if ctype.count('/') != 1:
				515	return 'text/plain'
				516	return ctype
				517
				518	def get_content_maintype(self):
				519	"""Return the message's main content type.
				520
				521	This is the `maintype' part of the string returned by
				522	get_content_type().
				523	"""
				524	ctype = self.get_content_type()
				525	return ctype.split('/')[0]
				526
				527	def get_content_subtype(self):
				528	"""Returns the message's sub-content type.
				529
				530	This is the `subtype' part of the string returned by
				531	get_content_type().
				532	"""
				533	ctype = self.get_content_type()
				534	return ctype.split('/')[1]
				535
				536	def get_default_type(self):
				537	"""Return the `default' content type.
				538
				539	Most messages have a default content type of text/plain, except for
				540	messages that are subparts of multipart/digest containers. Such
				541	subparts have a default content type of message/rfc822.
				542	"""
				543	return self._default_type
				544
				545	def set_default_type(self, ctype):
				546	"""Set the `default' content type.
				547
				548	ctype should be either "text/plain" or "message/rfc822", although this
				549	is not enforced. The default content type is not stored in the
				550	Content-Type header.
				551	"""
				552	self._default_type = ctype
				553
				554	def _get_params_preserve(self, failobj, header):
				555	# Like get_params() but preserves the quoting of values. BAW:
				556	# should this be part of the public interface?
				557	missing = object()
				558	value = self.get(header, missing)
				559	if value is missing:
				560	return failobj
				561	params = []
				562	for p in _parseparam(';' + value):
				563	try:
				564	name, val = p.split('=', 1)
				565	name = name.strip()
				566	val = val.strip()
				567	except ValueError:
				568	# Must have been a bare attribute
				569	name = p.strip()
				570	val = ''
				571	params.append((name, val))
				572	params = utils.decode_params(params)
				573	return params
				574
				575	def get_params(self, failobj=None, header='content-type', unquote=True):
				576	"""Return the message's Content-Type parameters, as a list.
				577
				578	The elements of the returned list are 2-tuples of key/value pairs, as
				579	split on the `=' sign. The left hand side of the `=' is the key,
				580	while the right hand side is the value. If there is no `=' sign in
				581	the parameter the value is the empty string. The value is as
				582	described in the get_param() method.
				583
				584	Optional failobj is the object to return if there is no Content-Type
				585	header. Optional header is the header to search instead of
				586	Content-Type. If unquote is True, the value is unquoted.
				587	"""
				588	missing = object()
				589	params = self._get_params_preserve(missing, header)
				590	if params is missing:
				591	return failobj
				592	if unquote:
				593	return [(k, _unquotevalue(v)) for k, v in params]
				594	else:
				595	return params
				596
				597	def get_param(self, param, failobj=None, header='content-type',
				598	unquote=True):
				599	"""Return the parameter value if found in the Content-Type header.
				600
				601	Optional failobj is the object to return if there is no Content-Type
				602	header, or the Content-Type header has no such parameter. Optional
				603	header is the header to search instead of Content-Type.
				604
				605	Parameter keys are always compared case insensitively. The return
				606	value can either be a string, or a 3-tuple if the parameter was RFC
				607	2231 encoded. When it's a 3-tuple, the elements of the value are of
				608	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				609	LANGUAGE can be None, in which case you should consider VALUE to be
				610	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				611
				612	Your application should be prepared to deal with 3-tuple return
				613	values, and can convert the parameter to a Unicode string like so:
				614
				615	param = msg.get_param('foo')
				616	if isinstance(param, tuple):
				617	param = unicode(param[2], param[0] or 'us-ascii')
				618
				619	In any case, the parameter value (either the returned string, or the
				620	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				621	to False.
				622	"""
				623	if header not in self:
				624	return failobj
				625	for k, v in self._get_params_preserve(failobj, header):
				626	if k.lower() == param.lower():
				627	if unquote:
				628	return _unquotevalue(v)
				629	else:
				630	return v
				631	return failobj
				632
				633	def set_param(self, param, value, header='Content-Type', requote=True,
				634	charset=None, language=''):
				635	"""Set a parameter in the Content-Type header.
				636
				637	If the parameter already exists in the header, its value will be
				638	replaced with the new value.
				639
				640	If header is Content-Type and has not yet been defined for this
				641	message, it will be set to "text/plain" and the new parameter and
				642	value will be appended as per RFC 2045.
				643
				644	An alternate header can specified in the header argument, and all
				645	parameters will be quoted as necessary unless requote is False.
				646
				647	If charset is specified, the parameter will be encoded according to RFC
				648	2231. Optional language specifies the RFC 2231 language, defaulting
				649	to the empty string. Both charset and language should be strings.
				650	"""
				651	if not isinstance(value, tuple) and charset:
				652	value = (charset, language, value)
				653
				654	if header not in self and header.lower() == 'content-type':
				655	ctype = 'text/plain'
				656	else:
				657	ctype = self.get(header)
				658	if not self.get_param(param, header=header):
				659	if not ctype:
				660	ctype = _formatparam(param, value, requote)
				661	else:
				662	ctype = SEMISPACE.join(
				663	[ctype, _formatparam(param, value, requote)])
				664	else:
				665	ctype = ''
				666	for old_param, old_value in self.get_params(header=header,
				667	unquote=requote):
				668	append_param = ''
				669	if old_param.lower() == param.lower():
				670	append_param = _formatparam(param, value, requote)
				671	else:
				672	append_param = _formatparam(old_param, old_value, requote)
				673	if not ctype:
				674	ctype = append_param
				675	else:
				676	ctype = SEMISPACE.join([ctype, append_param])
				677	if ctype != self.get(header):
				678	del self[header]
				679	self[header] = ctype
				680
				681	def del_param(self, param, header='content-type', requote=True):
				682	"""Remove the given parameter completely from the Content-Type header.
				683
				684	The header will be re-written in place without the parameter or its
				685	value. All values will be quoted as necessary unless requote is
				686	False. Optional header specifies an alternative to the Content-Type
				687	header.
				688	"""
				689	if header not in self:
				690	return
				691	new_ctype = ''
				692	for p, v in self.get_params(header=header, unquote=requote):
				693	if p.lower() != param.lower():
				694	if not new_ctype:
				695	new_ctype = _formatparam(p, v, requote)
				696	else:
				697	new_ctype = SEMISPACE.join([new_ctype,
				698	_formatparam(p, v, requote)])
				699	if new_ctype != self.get(header):
				700	del self[header]
				701	self[header] = new_ctype
				702
				703	def set_type(self, type, header='Content-Type', requote=True):
				704	"""Set the main type and subtype for the Content-Type header.
				705
				706	type must be a string in the form "maintype/subtype", otherwise a
				707	ValueError is raised.
				708
				709	This method replaces the Content-Type header, keeping all the
				710	parameters in place. If requote is False, this leaves the existing
				711	header's quoting as is. Otherwise, the parameters will be quoted (the
				712	default).
				713
				714	An alternative header can be specified in the header argument. When
				715	the Content-Type header is set, we'll always also add a MIME-Version
				716	header.
				717	"""
				718	# BAW: should we be strict?
				719	if not type.count('/') == 1:
				720	raise ValueError
				721	# Set the Content-Type, you get a MIME-Version
				722	if header.lower() == 'content-type':
				723	del self['mime-version']
				724	self['MIME-Version'] = '1.0'
				725	if header not in self:
				726	self[header] = type
				727	return
				728	params = self.get_params(header=header, unquote=requote)
				729	del self[header]
				730	self[header] = type
				731	# Skip the first param; it's the old type.
				732	for p, v in params[1:]:
				733	self.set_param(p, v, header, requote)
				734
				735	def get_filename(self, failobj=None):
				736	"""Return the filename associated with the payload if present.
				737
				738	The filename is extracted from the Content-Disposition header's
				739	`filename' parameter, and it is unquoted. If that header is missing
				740	the `filename' parameter, this method falls back to looking for the
				741	`name' parameter.
				742	"""
				743	missing = object()
				744	filename = self.get_param('filename', missing, 'content-disposition')
				745	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	746	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	747	if filename is missing:
				748	return failobj
				749	return utils.collapse_rfc2231_value(filename).strip()
				750
				751	def get_boundary(self, failobj=None):
				752	"""Return the boundary associated with the payload if present.
				753
				754	The boundary is extracted from the Content-Type header's `boundary'
				755	parameter, and it is unquoted.
				756	"""
				757	missing = object()
				758	boundary = self.get_param('boundary', missing)
				759	if boundary is missing:
				760	return failobj
				761	# RFC 2046 says that boundaries may begin but not end in w/s
				762	return utils.collapse_rfc2231_value(boundary).rstrip()
				763
				764	def set_boundary(self, boundary):
				765	"""Set the boundary parameter in Content-Type to 'boundary'.
				766
				767	This is subtly different than deleting the Content-Type header and
				768	adding a new one with a new boundary parameter via add_header(). The
				769	main difference is that using the set_boundary() method preserves the
				770	order of the Content-Type header in the original message.
				771
				772	HeaderParseError is raised if the message has no Content-Type header.
				773	"""
				774	missing = object()
				775	params = self._get_params_preserve(missing, 'content-type')
				776	if params is missing:
				777	# There was no Content-Type header, and we don't know what type
				778	# to set it to, so raise an exception.
				779	raise errors.HeaderParseError('No Content-Type header found')
				780	newparams = []
				781	foundp = False
				782	for pk, pv in params:
				783	if pk.lower() == 'boundary':
				784	newparams.append(('boundary', '"%s"' % boundary))
				785	foundp = True
				786	else:
				787	newparams.append((pk, pv))
				788	if not foundp:
				789	# The original Content-Type header had no boundary attribute.
				790	# Tack one on the end. BAW: should we raise an exception
				791	# instead???
				792	newparams.append(('boundary', '"%s"' % boundary))
				793	# Replace the existing Content-Type header with the new value
				794	newheaders = []
				795	for h, v in self._headers:
				796	if h.lower() == 'content-type':
				797	parts = []
				798	for k, v in newparams:
				799	if v == '':
				800	parts.append(k)
				801	else:
				802	parts.append('%s=%s' % (k, v))
				803	newheaders.append((h, SEMISPACE.join(parts)))
				804
				805	else:
				806	newheaders.append((h, v))
				807	self._headers = newheaders
				808
				809	def get_content_charset(self, failobj=None):
				810	"""Return the charset parameter of the Content-Type header.
				811
				812	The returned string is always coerced to lower case. If there is no
				813	Content-Type header, or if that header has no charset parameter,
				814	failobj is returned.
				815	"""
				816	missing = object()
				817	charset = self.get_param('charset', missing)
				818	if charset is missing:
				819	return failobj
				820	if isinstance(charset, tuple):
				821	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				822	pcharset = charset[0] or 'us-ascii'
				823	try:
				824	# LookupError will be raised if the charset isn't known to
				825	# Python. UnicodeError will be raised if the encoded text
				826	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	827	as_bytes = charset[2].encode('raw-unicode-escape')
				828	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	829	except (LookupError, UnicodeError):
				830	charset = charset[2]
				831	# charset characters must be in us-ascii range
				832	try:
				833	charset.encode('us-ascii')
				834	except UnicodeError:
				835	return failobj
				836	# RFC 2046, $4.1.2 says charsets are not case sensitive
				837	return charset.lower()
				838
				839	def get_charsets(self, failobj=None):
				840	"""Return a list containing the charset(s) used in this message.
				841
				842	The returned list of items describes the Content-Type headers'
				843	charset parameter for this message and all the subparts in its
				844	payload.
				845
				846	Each item will either be a string (the value of the charset parameter
				847	in the Content-Type header of that part) or the value of the
				848	'failobj' parameter (defaults to None), if the part does not have a
				849	main MIME type of "text", or the charset is not defined.
				850
				851	The list will contain one string for each part of the message, plus
				852	one for the container message (i.e. self), so that a non-multipart
				853	message will still return a list of length 1.
				854	"""
				855	return [part.get_content_charset(failobj) for part in self.walk()]
				856
				857	# I.e. def walk(self): ...
				858	from email.iterators import walk