Blame - Lib/email/message.py - platform/external/python/cpython2

blob: d2483cacf69a9850d79655e9cd375ac157f18f05 [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	19	from email.charset import Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	20
				21	SEMISPACE = '; '
				22
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	23	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	24	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				26
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	27	# How to figure out if we are processing strings that come from a byte
				28	# source with undecodable characters.
				29	_has_surrogates = re.compile(
				30	'([^\ud800-\udbff]\|\A)[\udc00-\udfff]([^\udc00-\udfff]\|\Z)').search
				31
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	32
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	33	# Helper functions
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	34	def _sanitize_surrogates(value):
				35	# If the value contains surrogates, re-decode and replace the original
				36	# non-ascii bytes with '?'s. Used to sanitize header values before letting
				37	# them escape as strings.
				38	if not isinstance(value, str):
				39	# Header object
				40	return value
				41	if _has_surrogates(value):
				42	original_bytes = value.encode('ascii', 'surrogateescape')
				43	return original_bytes.decode('ascii', 'replace').replace('\ufffd', '?')
				44	else:
				45	return value
				46
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	47	def _splitparam(param):
				48	# Split header parameters. BAW: this may be too simple. It isn't
				49	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				50	# found in the wild. We may eventually need a full fledged parser
				51	# eventually.
				52	a, sep, b = param.partition(';')
				53	if not sep:
				54	return a.strip(), None
				55	return a.strip(), b.strip()
				56
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	57	def _formatparam(param, value=None, quote=True):
				58	"""Convenience function to format and return a key=value pair.
				59
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	60	This will quote the value if needed or if quote is true. If value is a
				61	three tuple (charset, language, value), it will be encoded according
				62	to RFC2231 rules. If it contains non-ascii characters it will likewise
				63	be encoded according to RFC2231 rules, using the utf-8 charset and
				64	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	65	"""
				66	if value is not None and len(value) > 0:
				67	# A tuple is used for RFC 2231 encoded parameter values where items
				68	# are (charset, language, value). charset is a string, not a Charset
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame^]	69	# instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	70	if isinstance(value, tuple):
				71	# Encode as per RFC 2231
				72	param += '*'
				73	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame^]	74	return '%s=%s' % (param, value)
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	75	else:
				76	try:
				77	value.encode('ascii')
				78	except UnicodeEncodeError:
				79	param += '*'
				80	value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame^]	81	return '%s=%s' % (param, value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	82	# BAW: Please check this. I think that if quote is set it should
				83	# force quoting even if not necessary.
				84	if quote or tspecials.search(value):
				85	return '%s="%s"' % (param, utils.quote(value))
				86	else:
				87	return '%s=%s' % (param, value)
				88	else:
				89	return param
				90
				91	def _parseparam(s):
				92	plist = []
				93	while s[:1] == ';':
				94	s = s[1:]
				95	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	96	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	97	end = s.find(';', end + 1)
				98	if end < 0:
				99	end = len(s)
				100	f = s[:end]
				101	if '=' in f:
				102	i = f.index('=')
				103	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				104	plist.append(f.strip())
				105	s = s[end:]
				106	return plist
				107
				108
				109	def _unquotevalue(value):
				110	# This is different than utils.collapse_rfc2231_value() because it doesn't
				111	# try to convert the value to a unicode. Message.get_param() and
				112	# Message.get_params() are both currently defined to return the tuple in
				113	# the face of RFC 2231 parameters.
				114	if isinstance(value, tuple):
				115	return value[0], value[1], utils.unquote(value[2])
				116	else:
				117	return utils.unquote(value)
				118
				119
				120
				121	class Message:
				122	"""Basic message object.
				123
				124	A message object is defined as something that has a bunch of RFC 2822
				125	headers and a payload. It may optionally have an envelope header
				126	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				127	multipart or a message/rfc822), then the payload is a list of Message
				128	objects, otherwise it is a string.
				129
				130	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	131	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	132	do in fact appear multiple times (e.g. Received) and for those headers,
				133	you must use the explicit API to set or get all the headers. Not all of
				134	the mapping methods are implemented.
				135	"""
				136	def __init__(self):
				137	self._headers = []
				138	self._unixfrom = None
				139	self._payload = None
				140	self._charset = None
				141	# Defaults for multipart messages
				142	self.preamble = self.epilogue = None
				143	self.defects = []
				144	# Default content type
				145	self._default_type = 'text/plain'
				146
				147	def __str__(self):
				148	"""Return the entire formatted message as a string.
				149	This includes the headers, body, and envelope header.
				150	"""
				151	return self.as_string()
				152
				153	def as_string(self, unixfrom=False, maxheaderlen=0):
				154	"""Return the entire formatted message as a string.
				155	Optional `unixfrom' when True, means include the Unix From_ envelope
				156	header.
				157
				158	This is a convenience method and may not generate the message exactly
				159	as you intend because by default it mangles lines that begin with
				160	"From ". For more flexibility, use the flatten() method of a
				161	Generator instance.
				162	"""
				163	from email.generator import Generator
				164	fp = StringIO()
				165	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				166	g.flatten(self, unixfrom=unixfrom)
				167	return fp.getvalue()
				168
				169	def is_multipart(self):
				170	"""Return True if the message consists of multiple parts."""
				171	return isinstance(self._payload, list)
				172
				173	#
				174	# Unix From_ line
				175	#
				176	def set_unixfrom(self, unixfrom):
				177	self._unixfrom = unixfrom
				178
				179	def get_unixfrom(self):
				180	return self._unixfrom
				181
				182	#
				183	# Payload manipulation.
				184	#
				185	def attach(self, payload):
				186	"""Add the given payload to the current payload.
				187
				188	The current payload will always be a list of objects after this method
				189	is called. If you want to set the payload to a scalar object, use
				190	set_payload() instead.
				191	"""
				192	if self._payload is None:
				193	self._payload = [payload]
				194	else:
				195	self._payload.append(payload)
				196
				197	def get_payload(self, i=None, decode=False):
				198	"""Return a reference to the payload.
				199
				200	The payload will either be a list object or a string. If you mutate
				201	the list object, you modify the message's payload in place. Optional
				202	i returns that index into the payload.
				203
				204	Optional decode is a flag indicating whether the payload should be
				205	decoded or not, according to the Content-Transfer-Encoding header
				206	(default is False).
				207
				208	When True and the message is not a multipart, the payload will be
				209	decoded if this header's value is `quoted-printable' or `base64'. If
				210	some other encoding is used, or the header is missing, or if the
				211	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				212	payload is returned as-is.
				213
				214	If the message is a multipart and the decode flag is True, then None
				215	is returned.
				216	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	217	# Here is the logic table for this code, based on the email5.0.0 code:
				218	# i decode is_multipart result
				219	# ------ ------ ------------ ------------------------------
				220	# None True True None
				221	# i True True None
				222	# None False True _payload (a list)
				223	# i False True _payload element i (a Message)
				224	# i False False error (not a list)
				225	# i True False error (not a list)
				226	# None False False _payload
				227	# None True False _payload decoded (bytes)
				228	# Note that Barry planned to factor out the 'decode' case, but that
				229	# isn't so easy now that we handle the 8 bit data, which needs to be
				230	# converted in both the decode and non-decode path.
				231	if self.is_multipart():
				232	if decode:
				233	return None
				234	if i is None:
				235	return self._payload
				236	else:
				237	return self._payload[i]
				238	# For backward compatibility, Use isinstance and this error message
				239	# instead of the more logical is_multipart test.
				240	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	241	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	242	payload = self._payload
				243	cte = self.get('content-transfer-encoding', '').lower()
				244	# payload can be bytes here, (I wonder if that is actually a bug?)
				245	if isinstance(payload, str):
				246	if _has_surrogates(payload):
				247	bpayload = payload.encode('ascii', 'surrogateescape')
				248	if not decode:
				249	try:
				250	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				251	except LookupError:
				252	payload = bpayload.decode('ascii', 'replace')
				253	elif decode:
				254	try:
				255	bpayload = payload.encode('ascii')
				256	except UnicodeError:
				257	# This won't happen for RFC compliant messages (messages
				258	# containing only ASCII codepoints in the unicode input).
				259	# If it does happen, turn the string into bytes in a way
				260	# guaranteed not to fail.
				261	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	262	if not decode:
				263	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	264	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	265	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	266	elif cte == 'base64':
				267	try:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	268	return base64.b64decode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	269	except binascii.Error:
				270	# Incorrect padding
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	271	return bpayload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	272	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	273	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	274	out_file = BytesIO()
				275	try:
				276	uu.decode(in_file, out_file, quiet=True)
				277	return out_file.getvalue()
				278	except uu.Error:
				279	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	280	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	281	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	282	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	283	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	284
				285	def set_payload(self, payload, charset=None):
				286	"""Set the payload to the given value.
				287
				288	Optional charset sets the message's default character set. See
				289	set_charset() for details.
				290	"""
				291	self._payload = payload
				292	if charset is not None:
				293	self.set_charset(charset)
				294
				295	def set_charset(self, charset):
				296	"""Set the charset of the payload to a given character set.
				297
				298	charset can be a Charset instance, a string naming a character set, or
				299	None. If it is a string it will be converted to a Charset instance.
				300	If charset is None, the charset parameter will be removed from the
				301	Content-Type field. Anything else will generate a TypeError.
				302
				303	The message will be assumed to be of type text/* encoded with
				304	charset.input_charset. It will be converted to charset.output_charset
				305	and encoded properly, if needed, when generating the plain text
				306	representation of the message. MIME headers (MIME-Version,
				307	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	308	"""
				309	if charset is None:
				310	self.del_param('charset')
				311	self._charset = None
				312	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	313	if not isinstance(charset, Charset):
				314	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	315	self._charset = charset
				316	if 'MIME-Version' not in self:
				317	self.add_header('MIME-Version', '1.0')
				318	if 'Content-Type' not in self:
				319	self.add_header('Content-Type', 'text/plain',
				320	charset=charset.get_output_charset())
				321	else:
				322	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	323	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	324	self._payload = charset.body_encode(self._payload)
				325	if 'Content-Transfer-Encoding' not in self:
				326	cte = charset.get_body_encoding()
				327	try:
				328	cte(self)
				329	except TypeError:
				330	self._payload = charset.body_encode(self._payload)
				331	self.add_header('Content-Transfer-Encoding', cte)
				332
				333	def get_charset(self):
				334	"""Return the Charset instance associated with the message's payload.
				335	"""
				336	return self._charset
				337
				338	#
				339	# MAPPING INTERFACE (partial)
				340	#
				341	def __len__(self):
				342	"""Return the total number of headers, including duplicates."""
				343	return len(self._headers)
				344
				345	def __getitem__(self, name):
				346	"""Get a header value.
				347
				348	Return None if the header is missing instead of raising an exception.
				349
				350	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	351	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	352	the values matching a header field name.
				353	"""
				354	return self.get(name)
				355
				356	def __setitem__(self, name, val):
				357	"""Set the value of a header.
				358
				359	Note: this does not overwrite an existing header with the same field
				360	name. Use __delitem__() first to delete any existing headers.
				361	"""
				362	self._headers.append((name, val))
				363
				364	def __delitem__(self, name):
				365	"""Delete all occurrences of a header, if present.
				366
				367	Does not raise an exception if the header is missing.
				368	"""
				369	name = name.lower()
				370	newheaders = []
				371	for k, v in self._headers:
				372	if k.lower() != name:
				373	newheaders.append((k, v))
				374	self._headers = newheaders
				375
				376	def __contains__(self, name):
				377	return name.lower() in [k.lower() for k, v in self._headers]
				378
				379	def __iter__(self):
				380	for field, value in self._headers:
				381	yield field
				382
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	383	def keys(self):
				384	"""Return a list of all the message's header field names.
				385
				386	These will be sorted in the order they appeared in the original
				387	message, or were added to the message, and may contain duplicates.
				388	Any fields deleted and re-inserted are always appended to the header
				389	list.
				390	"""
				391	return [k for k, v in self._headers]
				392
				393	def values(self):
				394	"""Return a list of all the message's header values.
				395
				396	These will be sorted in the order they appeared in the original
				397	message, or were added to the message, and may contain duplicates.
				398	Any fields deleted and re-inserted are always appended to the header
				399	list.
				400	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	401	return [_sanitize_surrogates(v) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	402
				403	def items(self):
				404	"""Get all the message's header fields and values.
				405
				406	These will be sorted in the order they appeared in the original
				407	message, or were added to the message, and may contain duplicates.
				408	Any fields deleted and re-inserted are always appended to the header
				409	list.
				410	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	411	return [(k, _sanitize_surrogates(v)) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	412
				413	def get(self, name, failobj=None):
				414	"""Get a header value.
				415
				416	Like __getitem__() but return failobj instead of None when the field
				417	is missing.
				418	"""
				419	name = name.lower()
				420	for k, v in self._headers:
				421	if k.lower() == name:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	422	return _sanitize_surrogates(v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	423	return failobj
				424
				425	#
				426	# Additional useful stuff
				427	#
				428
				429	def get_all(self, name, failobj=None):
				430	"""Return a list of all the values for the named field.
				431
				432	These will be sorted in the order they appeared in the original
				433	message, and may contain duplicates. Any fields deleted and
				434	re-inserted are always appended to the header list.
				435
				436	If no such fields exist, failobj is returned (defaults to None).
				437	"""
				438	values = []
				439	name = name.lower()
				440	for k, v in self._headers:
				441	if k.lower() == name:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	442	values.append(_sanitize_surrogates(v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	443	if not values:
				444	return failobj
				445	return values
				446
				447	def add_header(self, _name, _value, **_params):
				448	"""Extended header setting.
				449
				450	name is the header field to add. keyword arguments can be used to set
				451	additional parameters for the header field, with underscores converted
				452	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	453	value is None, in which case only the key will be added. If a
				454	parameter value contains non-ASCII characters it can be specified as a
				455	three-tuple of (charset, language, value), in which case it will be
				456	encoded according to RFC2231 rules. Otherwise it will be encoded using
				457	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	458
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	459	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	460
				461	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	462	msg.add_header('content-disposition', 'attachment',
				463	filename=('utf-8', '', Fußballer.ppt'))
				464	msg.add_header('content-disposition', 'attachment',
				465	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	466	"""
				467	parts = []
				468	for k, v in _params.items():
				469	if v is None:
				470	parts.append(k.replace('_', '-'))
				471	else:
				472	parts.append(_formatparam(k.replace('_', '-'), v))
				473	if _value is not None:
				474	parts.insert(0, _value)
				475	self._headers.append((_name, SEMISPACE.join(parts)))
				476
				477	def replace_header(self, _name, _value):
				478	"""Replace a header.
				479
				480	Replace the first matching header found in the message, retaining
				481	header order and case. If no matching header was found, a KeyError is
				482	raised.
				483	"""
				484	_name = _name.lower()
				485	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				486	if k.lower() == _name:
				487	self._headers[i] = (k, _value)
				488	break
				489	else:
				490	raise KeyError(_name)
				491
				492	#
				493	# Use these three methods instead of the three above.
				494	#
				495
				496	def get_content_type(self):
				497	"""Return the message's content type.
				498
				499	The returned string is coerced to lower case of the form
				500	`maintype/subtype'. If there was no Content-Type header in the
				501	message, the default type as given by get_default_type() will be
				502	returned. Since according to RFC 2045, messages always have a default
				503	type this will always return a value.
				504
				505	RFC 2045 defines a message's default type to be text/plain unless it
				506	appears inside a multipart/digest container, in which case it would be
				507	message/rfc822.
				508	"""
				509	missing = object()
				510	value = self.get('content-type', missing)
				511	if value is missing:
				512	# This should have no parameters
				513	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	514	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	515	# RFC 2045, section 5.2 says if its invalid, use text/plain
				516	if ctype.count('/') != 1:
				517	return 'text/plain'
				518	return ctype
				519
				520	def get_content_maintype(self):
				521	"""Return the message's main content type.
				522
				523	This is the `maintype' part of the string returned by
				524	get_content_type().
				525	"""
				526	ctype = self.get_content_type()
				527	return ctype.split('/')[0]
				528
				529	def get_content_subtype(self):
				530	"""Returns the message's sub-content type.
				531
				532	This is the `subtype' part of the string returned by
				533	get_content_type().
				534	"""
				535	ctype = self.get_content_type()
				536	return ctype.split('/')[1]
				537
				538	def get_default_type(self):
				539	"""Return the `default' content type.
				540
				541	Most messages have a default content type of text/plain, except for
				542	messages that are subparts of multipart/digest containers. Such
				543	subparts have a default content type of message/rfc822.
				544	"""
				545	return self._default_type
				546
				547	def set_default_type(self, ctype):
				548	"""Set the `default' content type.
				549
				550	ctype should be either "text/plain" or "message/rfc822", although this
				551	is not enforced. The default content type is not stored in the
				552	Content-Type header.
				553	"""
				554	self._default_type = ctype
				555
				556	def _get_params_preserve(self, failobj, header):
				557	# Like get_params() but preserves the quoting of values. BAW:
				558	# should this be part of the public interface?
				559	missing = object()
				560	value = self.get(header, missing)
				561	if value is missing:
				562	return failobj
				563	params = []
				564	for p in _parseparam(';' + value):
				565	try:
				566	name, val = p.split('=', 1)
				567	name = name.strip()
				568	val = val.strip()
				569	except ValueError:
				570	# Must have been a bare attribute
				571	name = p.strip()
				572	val = ''
				573	params.append((name, val))
				574	params = utils.decode_params(params)
				575	return params
				576
				577	def get_params(self, failobj=None, header='content-type', unquote=True):
				578	"""Return the message's Content-Type parameters, as a list.
				579
				580	The elements of the returned list are 2-tuples of key/value pairs, as
				581	split on the `=' sign. The left hand side of the `=' is the key,
				582	while the right hand side is the value. If there is no `=' sign in
				583	the parameter the value is the empty string. The value is as
				584	described in the get_param() method.
				585
				586	Optional failobj is the object to return if there is no Content-Type
				587	header. Optional header is the header to search instead of
				588	Content-Type. If unquote is True, the value is unquoted.
				589	"""
				590	missing = object()
				591	params = self._get_params_preserve(missing, header)
				592	if params is missing:
				593	return failobj
				594	if unquote:
				595	return [(k, _unquotevalue(v)) for k, v in params]
				596	else:
				597	return params
				598
				599	def get_param(self, param, failobj=None, header='content-type',
				600	unquote=True):
				601	"""Return the parameter value if found in the Content-Type header.
				602
				603	Optional failobj is the object to return if there is no Content-Type
				604	header, or the Content-Type header has no such parameter. Optional
				605	header is the header to search instead of Content-Type.
				606
				607	Parameter keys are always compared case insensitively. The return
				608	value can either be a string, or a 3-tuple if the parameter was RFC
				609	2231 encoded. When it's a 3-tuple, the elements of the value are of
				610	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				611	LANGUAGE can be None, in which case you should consider VALUE to be
				612	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				613
				614	Your application should be prepared to deal with 3-tuple return
				615	values, and can convert the parameter to a Unicode string like so:
				616
				617	param = msg.get_param('foo')
				618	if isinstance(param, tuple):
				619	param = unicode(param[2], param[0] or 'us-ascii')
				620
				621	In any case, the parameter value (either the returned string, or the
				622	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				623	to False.
				624	"""
				625	if header not in self:
				626	return failobj
				627	for k, v in self._get_params_preserve(failobj, header):
				628	if k.lower() == param.lower():
				629	if unquote:
				630	return _unquotevalue(v)
				631	else:
				632	return v
				633	return failobj
				634
				635	def set_param(self, param, value, header='Content-Type', requote=True,
				636	charset=None, language=''):
				637	"""Set a parameter in the Content-Type header.
				638
				639	If the parameter already exists in the header, its value will be
				640	replaced with the new value.
				641
				642	If header is Content-Type and has not yet been defined for this
				643	message, it will be set to "text/plain" and the new parameter and
				644	value will be appended as per RFC 2045.
				645
				646	An alternate header can specified in the header argument, and all
				647	parameters will be quoted as necessary unless requote is False.
				648
				649	If charset is specified, the parameter will be encoded according to RFC
				650	2231. Optional language specifies the RFC 2231 language, defaulting
				651	to the empty string. Both charset and language should be strings.
				652	"""
				653	if not isinstance(value, tuple) and charset:
				654	value = (charset, language, value)
				655
				656	if header not in self and header.lower() == 'content-type':
				657	ctype = 'text/plain'
				658	else:
				659	ctype = self.get(header)
				660	if not self.get_param(param, header=header):
				661	if not ctype:
				662	ctype = _formatparam(param, value, requote)
				663	else:
				664	ctype = SEMISPACE.join(
				665	[ctype, _formatparam(param, value, requote)])
				666	else:
				667	ctype = ''
				668	for old_param, old_value in self.get_params(header=header,
				669	unquote=requote):
				670	append_param = ''
				671	if old_param.lower() == param.lower():
				672	append_param = _formatparam(param, value, requote)
				673	else:
				674	append_param = _formatparam(old_param, old_value, requote)
				675	if not ctype:
				676	ctype = append_param
				677	else:
				678	ctype = SEMISPACE.join([ctype, append_param])
				679	if ctype != self.get(header):
				680	del self[header]
				681	self[header] = ctype
				682
				683	def del_param(self, param, header='content-type', requote=True):
				684	"""Remove the given parameter completely from the Content-Type header.
				685
				686	The header will be re-written in place without the parameter or its
				687	value. All values will be quoted as necessary unless requote is
				688	False. Optional header specifies an alternative to the Content-Type
				689	header.
				690	"""
				691	if header not in self:
				692	return
				693	new_ctype = ''
				694	for p, v in self.get_params(header=header, unquote=requote):
				695	if p.lower() != param.lower():
				696	if not new_ctype:
				697	new_ctype = _formatparam(p, v, requote)
				698	else:
				699	new_ctype = SEMISPACE.join([new_ctype,
				700	_formatparam(p, v, requote)])
				701	if new_ctype != self.get(header):
				702	del self[header]
				703	self[header] = new_ctype
				704
				705	def set_type(self, type, header='Content-Type', requote=True):
				706	"""Set the main type and subtype for the Content-Type header.
				707
				708	type must be a string in the form "maintype/subtype", otherwise a
				709	ValueError is raised.
				710
				711	This method replaces the Content-Type header, keeping all the
				712	parameters in place. If requote is False, this leaves the existing
				713	header's quoting as is. Otherwise, the parameters will be quoted (the
				714	default).
				715
				716	An alternative header can be specified in the header argument. When
				717	the Content-Type header is set, we'll always also add a MIME-Version
				718	header.
				719	"""
				720	# BAW: should we be strict?
				721	if not type.count('/') == 1:
				722	raise ValueError
				723	# Set the Content-Type, you get a MIME-Version
				724	if header.lower() == 'content-type':
				725	del self['mime-version']
				726	self['MIME-Version'] = '1.0'
				727	if header not in self:
				728	self[header] = type
				729	return
				730	params = self.get_params(header=header, unquote=requote)
				731	del self[header]
				732	self[header] = type
				733	# Skip the first param; it's the old type.
				734	for p, v in params[1:]:
				735	self.set_param(p, v, header, requote)
				736
				737	def get_filename(self, failobj=None):
				738	"""Return the filename associated with the payload if present.
				739
				740	The filename is extracted from the Content-Disposition header's
				741	`filename' parameter, and it is unquoted. If that header is missing
				742	the `filename' parameter, this method falls back to looking for the
				743	`name' parameter.
				744	"""
				745	missing = object()
				746	filename = self.get_param('filename', missing, 'content-disposition')
				747	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	748	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	749	if filename is missing:
				750	return failobj
				751	return utils.collapse_rfc2231_value(filename).strip()
				752
				753	def get_boundary(self, failobj=None):
				754	"""Return the boundary associated with the payload if present.
				755
				756	The boundary is extracted from the Content-Type header's `boundary'
				757	parameter, and it is unquoted.
				758	"""
				759	missing = object()
				760	boundary = self.get_param('boundary', missing)
				761	if boundary is missing:
				762	return failobj
				763	# RFC 2046 says that boundaries may begin but not end in w/s
				764	return utils.collapse_rfc2231_value(boundary).rstrip()
				765
				766	def set_boundary(self, boundary):
				767	"""Set the boundary parameter in Content-Type to 'boundary'.
				768
				769	This is subtly different than deleting the Content-Type header and
				770	adding a new one with a new boundary parameter via add_header(). The
				771	main difference is that using the set_boundary() method preserves the
				772	order of the Content-Type header in the original message.
				773
				774	HeaderParseError is raised if the message has no Content-Type header.
				775	"""
				776	missing = object()
				777	params = self._get_params_preserve(missing, 'content-type')
				778	if params is missing:
				779	# There was no Content-Type header, and we don't know what type
				780	# to set it to, so raise an exception.
				781	raise errors.HeaderParseError('No Content-Type header found')
				782	newparams = []
				783	foundp = False
				784	for pk, pv in params:
				785	if pk.lower() == 'boundary':
				786	newparams.append(('boundary', '"%s"' % boundary))
				787	foundp = True
				788	else:
				789	newparams.append((pk, pv))
				790	if not foundp:
				791	# The original Content-Type header had no boundary attribute.
				792	# Tack one on the end. BAW: should we raise an exception
				793	# instead???
				794	newparams.append(('boundary', '"%s"' % boundary))
				795	# Replace the existing Content-Type header with the new value
				796	newheaders = []
				797	for h, v in self._headers:
				798	if h.lower() == 'content-type':
				799	parts = []
				800	for k, v in newparams:
				801	if v == '':
				802	parts.append(k)
				803	else:
				804	parts.append('%s=%s' % (k, v))
				805	newheaders.append((h, SEMISPACE.join(parts)))
				806
				807	else:
				808	newheaders.append((h, v))
				809	self._headers = newheaders
				810
				811	def get_content_charset(self, failobj=None):
				812	"""Return the charset parameter of the Content-Type header.
				813
				814	The returned string is always coerced to lower case. If there is no
				815	Content-Type header, or if that header has no charset parameter,
				816	failobj is returned.
				817	"""
				818	missing = object()
				819	charset = self.get_param('charset', missing)
				820	if charset is missing:
				821	return failobj
				822	if isinstance(charset, tuple):
				823	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				824	pcharset = charset[0] or 'us-ascii'
				825	try:
				826	# LookupError will be raised if the charset isn't known to
				827	# Python. UnicodeError will be raised if the encoded text
				828	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	829	as_bytes = charset[2].encode('raw-unicode-escape')
				830	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	831	except (LookupError, UnicodeError):
				832	charset = charset[2]
				833	# charset characters must be in us-ascii range
				834	try:
				835	charset.encode('us-ascii')
				836	except UnicodeError:
				837	return failobj
				838	# RFC 2046, $4.1.2 says charsets are not case sensitive
				839	return charset.lower()
				840
				841	def get_charsets(self, failobj=None):
				842	"""Return a list containing the charset(s) used in this message.
				843
				844	The returned list of items describes the Content-Type headers'
				845	charset parameter for this message and all the subparts in its
				846	payload.
				847
				848	Each item will either be a string (the value of the charset parameter
				849	in the Content-Type header of that part) or the value of the
				850	'failobj' parameter (defaults to None), if the part does not have a
				851	main MIME type of "text", or the charset is not defined.
				852
				853	The list will contain one string for each part of the message, plus
				854	one for the container message (i.e. self), so that a non-multipart
				855	message will still return a list of length 1.
				856	"""
				857	return [part.get_content_charset(failobj) for part in self.walk()]
				858
				859	# I.e. def walk(self): ...
				860	from email.iterators import walk