Blame - Lib/email/message.py - platform/external/python/cpython3

blob: b5f7b3a957cc844ac2bce40998d1a27e790b4f64 [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	13	from io import BytesIO, StringIO
				14
				15	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	16	from email import utils
				17	from email import errors
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	18	from email._policybase import compat32
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	19	from email import charset as _charset
R David Murray	80e0aee	2012-05-27 21:23:34 -0400	[diff] [blame]	20	from email._encoded_words import decode_b
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	21	Charset = _charset.Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	22
				23	SEMISPACE = '; '
				24
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	26	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	27	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				28
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	29
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	30	def _splitparam(param):
				31	# Split header parameters. BAW: this may be too simple. It isn't
				32	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	33	# found in the wild. We may eventually need a full fledged parser.
				34	# RDM: we might have a Header here; for now just stringify it.
				35	a, sep, b = str(param).partition(';')
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	36	if not sep:
				37	return a.strip(), None
				38	return a.strip(), b.strip()
				39
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	40	def _formatparam(param, value=None, quote=True):
				41	"""Convenience function to format and return a key=value pair.
				42
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	43	This will quote the value if needed or if quote is true. If value is a
				44	three tuple (charset, language, value), it will be encoded according
				45	to RFC2231 rules. If it contains non-ascii characters it will likewise
				46	be encoded according to RFC2231 rules, using the utf-8 charset and
				47	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	48	"""
				49	if value is not None and len(value) > 0:
				50	# A tuple is used for RFC 2231 encoded parameter values where items
				51	# are (charset, language, value). charset is a string, not a Charset
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	52	# instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	53	if isinstance(value, tuple):
				54	# Encode as per RFC 2231
				55	param += '*'
				56	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	57	return '%s=%s' % (param, value)
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	58	else:
				59	try:
				60	value.encode('ascii')
				61	except UnicodeEncodeError:
				62	param += '*'
				63	value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	64	return '%s=%s' % (param, value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	65	# BAW: Please check this. I think that if quote is set it should
				66	# force quoting even if not necessary.
				67	if quote or tspecials.search(value):
				68	return '%s="%s"' % (param, utils.quote(value))
				69	else:
				70	return '%s=%s' % (param, value)
				71	else:
				72	return param
				73
				74	def _parseparam(s):
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	75	# RDM This might be a Header, so for now stringify it.
				76	s = ';' + str(s)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	77	plist = []
				78	while s[:1] == ';':
				79	s = s[1:]
				80	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	81	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	82	end = s.find(';', end + 1)
				83	if end < 0:
				84	end = len(s)
				85	f = s[:end]
				86	if '=' in f:
				87	i = f.index('=')
				88	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				89	plist.append(f.strip())
				90	s = s[end:]
				91	return plist
				92
				93
				94	def _unquotevalue(value):
				95	# This is different than utils.collapse_rfc2231_value() because it doesn't
				96	# try to convert the value to a unicode. Message.get_param() and
				97	# Message.get_params() are both currently defined to return the tuple in
				98	# the face of RFC 2231 parameters.
				99	if isinstance(value, tuple):
				100	return value[0], value[1], utils.unquote(value[2])
				101	else:
				102	return utils.unquote(value)
				103
				104
				105
				106	class Message:
				107	"""Basic message object.
				108
				109	A message object is defined as something that has a bunch of RFC 2822
				110	headers and a payload. It may optionally have an envelope header
				111	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				112	multipart or a message/rfc822), then the payload is a list of Message
				113	objects, otherwise it is a string.
				114
				115	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	116	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	117	do in fact appear multiple times (e.g. Received) and for those headers,
				118	you must use the explicit API to set or get all the headers. Not all of
				119	the mapping methods are implemented.
				120	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	121	def __init__(self, policy=compat32):
				122	self.policy = policy
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	123	self._headers = []
				124	self._unixfrom = None
				125	self._payload = None
				126	self._charset = None
				127	# Defaults for multipart messages
				128	self.preamble = self.epilogue = None
				129	self.defects = []
				130	# Default content type
				131	self._default_type = 'text/plain'
				132
				133	def __str__(self):
				134	"""Return the entire formatted message as a string.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	135	"""
				136	return self.as_string()
				137
R David Murray	bb17d2b	2013-08-09 16:15:28 -0400	[diff] [blame]	138	def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	139	"""Return the entire formatted message as a string.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	140
R David Murray	bb17d2b	2013-08-09 16:15:28 -0400	[diff] [blame]	141	Optional 'unixfrom', when true, means include the Unix From_ envelope
				142	header. For backward compatibility reasons, if maxheaderlen is
				143	not specified it defaults to 0, so you must override it explicitly
				144	if you want a different maxheaderlen. 'policy' is passed to the
				145	Generator instance used to serialize the mesasge; if it is not
				146	specified the policy associated with the message instance is used.
				147
				148	If the message object contains binary data that is not encoded
				149	according to RFC standards, the non-compliant data will be replaced by
				150	unicode "unknown character" code points.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	151	"""
				152	from email.generator import Generator
R David Murray	bb17d2b	2013-08-09 16:15:28 -0400	[diff] [blame]	153	policy = self.policy if policy is None else policy
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	154	fp = StringIO()
R David Murray	bb17d2b	2013-08-09 16:15:28 -0400	[diff] [blame]	155	g = Generator(fp,
				156	mangle_from_=False,
				157	maxheaderlen=maxheaderlen,
				158	policy=policy)
				159	g.flatten(self, unixfrom=unixfrom)
				160	return fp.getvalue()
				161
				162	def __bytes__(self):
				163	"""Return the entire formatted message as a bytes object.
				164	"""
				165	return self.as_bytes()
				166
				167	def as_bytes(self, unixfrom=False, policy=None):
				168	"""Return the entire formatted message as a bytes object.
				169
				170	Optional 'unixfrom', when true, means include the Unix From_ envelope
				171	header. 'policy' is passed to the BytesGenerator instance used to
				172	serialize the message; if not specified the policy associated with
				173	the message instance is used.
				174	"""
				175	from email.generator import BytesGenerator
				176	policy = self.policy if policy is None else policy
				177	fp = BytesIO()
				178	g = BytesGenerator(fp, mangle_from_=False, policy=policy)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	179	g.flatten(self, unixfrom=unixfrom)
				180	return fp.getvalue()
				181
				182	def is_multipart(self):
				183	"""Return True if the message consists of multiple parts."""
				184	return isinstance(self._payload, list)
				185
				186	#
				187	# Unix From_ line
				188	#
				189	def set_unixfrom(self, unixfrom):
				190	self._unixfrom = unixfrom
				191
				192	def get_unixfrom(self):
				193	return self._unixfrom
				194
				195	#
				196	# Payload manipulation.
				197	#
				198	def attach(self, payload):
				199	"""Add the given payload to the current payload.
				200
				201	The current payload will always be a list of objects after this method
				202	is called. If you want to set the payload to a scalar object, use
				203	set_payload() instead.
				204	"""
				205	if self._payload is None:
				206	self._payload = [payload]
				207	else:
				208	self._payload.append(payload)
				209
				210	def get_payload(self, i=None, decode=False):
				211	"""Return a reference to the payload.
				212
				213	The payload will either be a list object or a string. If you mutate
				214	the list object, you modify the message's payload in place. Optional
				215	i returns that index into the payload.
				216
				217	Optional decode is a flag indicating whether the payload should be
				218	decoded or not, according to the Content-Transfer-Encoding header
				219	(default is False).
				220
				221	When True and the message is not a multipart, the payload will be
				222	decoded if this header's value is `quoted-printable' or `base64'. If
				223	some other encoding is used, or the header is missing, or if the
				224	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				225	payload is returned as-is.
				226
				227	If the message is a multipart and the decode flag is True, then None
				228	is returned.
				229	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	230	# Here is the logic table for this code, based on the email5.0.0 code:
				231	# i decode is_multipart result
				232	# ------ ------ ------------ ------------------------------
				233	# None True True None
				234	# i True True None
				235	# None False True _payload (a list)
				236	# i False True _payload element i (a Message)
				237	# i False False error (not a list)
				238	# i True False error (not a list)
				239	# None False False _payload
				240	# None True False _payload decoded (bytes)
				241	# Note that Barry planned to factor out the 'decode' case, but that
				242	# isn't so easy now that we handle the 8 bit data, which needs to be
				243	# converted in both the decode and non-decode path.
				244	if self.is_multipart():
				245	if decode:
				246	return None
				247	if i is None:
				248	return self._payload
				249	else:
				250	return self._payload[i]
				251	# For backward compatibility, Use isinstance and this error message
				252	# instead of the more logical is_multipart test.
				253	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	254	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	255	payload = self._payload
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	256	# cte might be a Header, so for now stringify it.
				257	cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray	106f8e3	2011-03-15 12:48:41 -0400	[diff] [blame]	258	# payload may be bytes here.
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	259	if isinstance(payload, str):
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	260	if utils._has_surrogates(payload):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	261	bpayload = payload.encode('ascii', 'surrogateescape')
				262	if not decode:
				263	try:
				264	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				265	except LookupError:
				266	payload = bpayload.decode('ascii', 'replace')
				267	elif decode:
				268	try:
				269	bpayload = payload.encode('ascii')
				270	except UnicodeError:
				271	# This won't happen for RFC compliant messages (messages
				272	# containing only ASCII codepoints in the unicode input).
				273	# If it does happen, turn the string into bytes in a way
				274	# guaranteed not to fail.
				275	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	276	if not decode:
				277	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	278	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	279	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	280	elif cte == 'base64':
R David Murray	80e0aee	2012-05-27 21:23:34 -0400	[diff] [blame]	281	# XXX: this is a bit of a hack; decode_b should probably be factored
				282	# out somewhere, but I haven't figured out where yet.
				283	value, defects = decode_b(b''.join(bpayload.splitlines()))
				284	for defect in defects:
				285	self.policy.handle_defect(self, defect)
				286	return value
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	287	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	288	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	289	out_file = BytesIO()
				290	try:
				291	uu.decode(in_file, out_file, quiet=True)
				292	return out_file.getvalue()
				293	except uu.Error:
				294	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	295	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	296	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	297	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	298	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	299
				300	def set_payload(self, payload, charset=None):
				301	"""Set the payload to the given value.
				302
				303	Optional charset sets the message's default character set. See
				304	set_charset() for details.
				305	"""
				306	self._payload = payload
				307	if charset is not None:
				308	self.set_charset(charset)
				309
				310	def set_charset(self, charset):
				311	"""Set the charset of the payload to a given character set.
				312
				313	charset can be a Charset instance, a string naming a character set, or
				314	None. If it is a string it will be converted to a Charset instance.
				315	If charset is None, the charset parameter will be removed from the
				316	Content-Type field. Anything else will generate a TypeError.
				317
				318	The message will be assumed to be of type text/* encoded with
				319	charset.input_charset. It will be converted to charset.output_charset
				320	and encoded properly, if needed, when generating the plain text
				321	representation of the message. MIME headers (MIME-Version,
				322	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	323	"""
				324	if charset is None:
				325	self.del_param('charset')
				326	self._charset = None
				327	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	328	if not isinstance(charset, Charset):
				329	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	330	self._charset = charset
				331	if 'MIME-Version' not in self:
				332	self.add_header('MIME-Version', '1.0')
				333	if 'Content-Type' not in self:
				334	self.add_header('Content-Type', 'text/plain',
				335	charset=charset.get_output_charset())
				336	else:
				337	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	338	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	339	self._payload = charset.body_encode(self._payload)
				340	if 'Content-Transfer-Encoding' not in self:
				341	cte = charset.get_body_encoding()
				342	try:
				343	cte(self)
				344	except TypeError:
				345	self._payload = charset.body_encode(self._payload)
				346	self.add_header('Content-Transfer-Encoding', cte)
				347
				348	def get_charset(self):
				349	"""Return the Charset instance associated with the message's payload.
				350	"""
				351	return self._charset
				352
				353	#
				354	# MAPPING INTERFACE (partial)
				355	#
				356	def __len__(self):
				357	"""Return the total number of headers, including duplicates."""
				358	return len(self._headers)
				359
				360	def __getitem__(self, name):
				361	"""Get a header value.
				362
				363	Return None if the header is missing instead of raising an exception.
				364
				365	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	366	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	367	the values matching a header field name.
				368	"""
				369	return self.get(name)
				370
				371	def __setitem__(self, name, val):
				372	"""Set the value of a header.
				373
				374	Note: this does not overwrite an existing header with the same field
				375	name. Use __delitem__() first to delete any existing headers.
				376	"""
R David Murray	abfc374	2012-05-29 09:14:44 -0400	[diff] [blame]	377	max_count = self.policy.header_max_count(name)
				378	if max_count:
				379	lname = name.lower()
				380	found = 0
				381	for k, v in self._headers:
				382	if k.lower() == lname:
				383	found += 1
				384	if found >= max_count:
				385	raise ValueError("There may be at most {} {} headers "
				386	"in a message".format(max_count, name))
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	387	self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	388
				389	def __delitem__(self, name):
				390	"""Delete all occurrences of a header, if present.
				391
				392	Does not raise an exception if the header is missing.
				393	"""
				394	name = name.lower()
				395	newheaders = []
				396	for k, v in self._headers:
				397	if k.lower() != name:
				398	newheaders.append((k, v))
				399	self._headers = newheaders
				400
				401	def __contains__(self, name):
				402	return name.lower() in [k.lower() for k, v in self._headers]
				403
				404	def __iter__(self):
				405	for field, value in self._headers:
				406	yield field
				407
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	408	def keys(self):
				409	"""Return a list of all the message's header field names.
				410
				411	These will be sorted in the order they appeared in the original
				412	message, or were added to the message, and may contain duplicates.
				413	Any fields deleted and re-inserted are always appended to the header
				414	list.
				415	"""
				416	return [k for k, v in self._headers]
				417
				418	def values(self):
				419	"""Return a list of all the message's header values.
				420
				421	These will be sorted in the order they appeared in the original
				422	message, or were added to the message, and may contain duplicates.
				423	Any fields deleted and re-inserted are always appended to the header
				424	list.
				425	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	426	return [self.policy.header_fetch_parse(k, v)
				427	for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	428
				429	def items(self):
				430	"""Get all the message's header fields and values.
				431
				432	These will be sorted in the order they appeared in the original
				433	message, or were added to the message, and may contain duplicates.
				434	Any fields deleted and re-inserted are always appended to the header
				435	list.
				436	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	437	return [(k, self.policy.header_fetch_parse(k, v))
				438	for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	439
				440	def get(self, name, failobj=None):
				441	"""Get a header value.
				442
				443	Like __getitem__() but return failobj instead of None when the field
				444	is missing.
				445	"""
				446	name = name.lower()
				447	for k, v in self._headers:
				448	if k.lower() == name:
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	449	return self.policy.header_fetch_parse(k, v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	450	return failobj
				451
				452	#
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	453	# "Internal" methods (public API, but only intended for use by a parser
				454	# or generator, not normal application code.
				455	#
				456
				457	def set_raw(self, name, value):
				458	"""Store name and value in the model without modification.
				459
				460	This is an "internal" API, intended only for use by a parser.
				461	"""
				462	self._headers.append((name, value))
				463
				464	def raw_items(self):
				465	"""Return the (name, value) header pairs without modification.
				466
				467	This is an "internal" API, intended only for use by a generator.
				468	"""
				469	return iter(self._headers.copy())
				470
				471	#
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	472	# Additional useful stuff
				473	#
				474
				475	def get_all(self, name, failobj=None):
				476	"""Return a list of all the values for the named field.
				477
				478	These will be sorted in the order they appeared in the original
				479	message, and may contain duplicates. Any fields deleted and
				480	re-inserted are always appended to the header list.
				481
				482	If no such fields exist, failobj is returned (defaults to None).
				483	"""
				484	values = []
				485	name = name.lower()
				486	for k, v in self._headers:
				487	if k.lower() == name:
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	488	values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	489	if not values:
				490	return failobj
				491	return values
				492
				493	def add_header(self, _name, _value, **_params):
				494	"""Extended header setting.
				495
				496	name is the header field to add. keyword arguments can be used to set
				497	additional parameters for the header field, with underscores converted
				498	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	499	value is None, in which case only the key will be added. If a
				500	parameter value contains non-ASCII characters it can be specified as a
				501	three-tuple of (charset, language, value), in which case it will be
				502	encoded according to RFC2231 rules. Otherwise it will be encoded using
				503	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	504
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	505	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	506
				507	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	508	msg.add_header('content-disposition', 'attachment',
				509	filename=('utf-8', '', Fußballer.ppt'))
				510	msg.add_header('content-disposition', 'attachment',
				511	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	512	"""
				513	parts = []
				514	for k, v in _params.items():
				515	if v is None:
				516	parts.append(k.replace('_', '-'))
				517	else:
				518	parts.append(_formatparam(k.replace('_', '-'), v))
				519	if _value is not None:
				520	parts.insert(0, _value)
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	521	self[_name] = SEMISPACE.join(parts)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	522
				523	def replace_header(self, _name, _value):
				524	"""Replace a header.
				525
				526	Replace the first matching header found in the message, retaining
				527	header order and case. If no matching header was found, a KeyError is
				528	raised.
				529	"""
				530	_name = _name.lower()
				531	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				532	if k.lower() == _name:
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	533	self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	534	break
				535	else:
				536	raise KeyError(_name)
				537
				538	#
				539	# Use these three methods instead of the three above.
				540	#
				541
				542	def get_content_type(self):
				543	"""Return the message's content type.
				544
				545	The returned string is coerced to lower case of the form
				546	`maintype/subtype'. If there was no Content-Type header in the
				547	message, the default type as given by get_default_type() will be
				548	returned. Since according to RFC 2045, messages always have a default
				549	type this will always return a value.
				550
				551	RFC 2045 defines a message's default type to be text/plain unless it
				552	appears inside a multipart/digest container, in which case it would be
				553	message/rfc822.
				554	"""
				555	missing = object()
				556	value = self.get('content-type', missing)
				557	if value is missing:
				558	# This should have no parameters
				559	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	560	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	561	# RFC 2045, section 5.2 says if its invalid, use text/plain
				562	if ctype.count('/') != 1:
				563	return 'text/plain'
				564	return ctype
				565
				566	def get_content_maintype(self):
				567	"""Return the message's main content type.
				568
				569	This is the `maintype' part of the string returned by
				570	get_content_type().
				571	"""
				572	ctype = self.get_content_type()
				573	return ctype.split('/')[0]
				574
				575	def get_content_subtype(self):
				576	"""Returns the message's sub-content type.
				577
				578	This is the `subtype' part of the string returned by
				579	get_content_type().
				580	"""
				581	ctype = self.get_content_type()
				582	return ctype.split('/')[1]
				583
				584	def get_default_type(self):
				585	"""Return the `default' content type.
				586
				587	Most messages have a default content type of text/plain, except for
				588	messages that are subparts of multipart/digest containers. Such
				589	subparts have a default content type of message/rfc822.
				590	"""
				591	return self._default_type
				592
				593	def set_default_type(self, ctype):
				594	"""Set the `default' content type.
				595
				596	ctype should be either "text/plain" or "message/rfc822", although this
				597	is not enforced. The default content type is not stored in the
				598	Content-Type header.
				599	"""
				600	self._default_type = ctype
				601
				602	def _get_params_preserve(self, failobj, header):
				603	# Like get_params() but preserves the quoting of values. BAW:
				604	# should this be part of the public interface?
				605	missing = object()
				606	value = self.get(header, missing)
				607	if value is missing:
				608	return failobj
				609	params = []
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	610	for p in _parseparam(value):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	611	try:
				612	name, val = p.split('=', 1)
				613	name = name.strip()
				614	val = val.strip()
				615	except ValueError:
				616	# Must have been a bare attribute
				617	name = p.strip()
				618	val = ''
				619	params.append((name, val))
				620	params = utils.decode_params(params)
				621	return params
				622
				623	def get_params(self, failobj=None, header='content-type', unquote=True):
				624	"""Return the message's Content-Type parameters, as a list.
				625
				626	The elements of the returned list are 2-tuples of key/value pairs, as
				627	split on the `=' sign. The left hand side of the `=' is the key,
				628	while the right hand side is the value. If there is no `=' sign in
				629	the parameter the value is the empty string. The value is as
				630	described in the get_param() method.
				631
				632	Optional failobj is the object to return if there is no Content-Type
				633	header. Optional header is the header to search instead of
				634	Content-Type. If unquote is True, the value is unquoted.
				635	"""
				636	missing = object()
				637	params = self._get_params_preserve(missing, header)
				638	if params is missing:
				639	return failobj
				640	if unquote:
				641	return [(k, _unquotevalue(v)) for k, v in params]
				642	else:
				643	return params
				644
				645	def get_param(self, param, failobj=None, header='content-type',
				646	unquote=True):
				647	"""Return the parameter value if found in the Content-Type header.
				648
				649	Optional failobj is the object to return if there is no Content-Type
				650	header, or the Content-Type header has no such parameter. Optional
				651	header is the header to search instead of Content-Type.
				652
				653	Parameter keys are always compared case insensitively. The return
				654	value can either be a string, or a 3-tuple if the parameter was RFC
				655	2231 encoded. When it's a 3-tuple, the elements of the value are of
				656	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				657	LANGUAGE can be None, in which case you should consider VALUE to be
				658	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray	3ac8c78	2012-06-17 15:26:35 -0400	[diff] [blame]	659	The parameter value (either the returned string, or the VALUE item in
				660	the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	661
R David Murray	3ac8c78	2012-06-17 15:26:35 -0400	[diff] [blame]	662	If your application doesn't care whether the parameter was RFC 2231
				663	encoded, it can turn the return value into a string as follows:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	664
				665	param = msg.get_param('foo')
R David Murray	3ac8c78	2012-06-17 15:26:35 -0400	[diff] [blame]	666	param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	667
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	668	"""
				669	if header not in self:
				670	return failobj
				671	for k, v in self._get_params_preserve(failobj, header):
				672	if k.lower() == param.lower():
				673	if unquote:
				674	return _unquotevalue(v)
				675	else:
				676	return v
				677	return failobj
				678
				679	def set_param(self, param, value, header='Content-Type', requote=True,
				680	charset=None, language=''):
				681	"""Set a parameter in the Content-Type header.
				682
				683	If the parameter already exists in the header, its value will be
				684	replaced with the new value.
				685
				686	If header is Content-Type and has not yet been defined for this
				687	message, it will be set to "text/plain" and the new parameter and
				688	value will be appended as per RFC 2045.
				689
				690	An alternate header can specified in the header argument, and all
				691	parameters will be quoted as necessary unless requote is False.
				692
				693	If charset is specified, the parameter will be encoded according to RFC
				694	2231. Optional language specifies the RFC 2231 language, defaulting
				695	to the empty string. Both charset and language should be strings.
				696	"""
				697	if not isinstance(value, tuple) and charset:
				698	value = (charset, language, value)
				699
				700	if header not in self and header.lower() == 'content-type':
				701	ctype = 'text/plain'
				702	else:
				703	ctype = self.get(header)
				704	if not self.get_param(param, header=header):
				705	if not ctype:
				706	ctype = _formatparam(param, value, requote)
				707	else:
				708	ctype = SEMISPACE.join(
				709	[ctype, _formatparam(param, value, requote)])
				710	else:
				711	ctype = ''
				712	for old_param, old_value in self.get_params(header=header,
				713	unquote=requote):
				714	append_param = ''
				715	if old_param.lower() == param.lower():
				716	append_param = _formatparam(param, value, requote)
				717	else:
				718	append_param = _formatparam(old_param, old_value, requote)
				719	if not ctype:
				720	ctype = append_param
				721	else:
				722	ctype = SEMISPACE.join([ctype, append_param])
				723	if ctype != self.get(header):
				724	del self[header]
				725	self[header] = ctype
				726
				727	def del_param(self, param, header='content-type', requote=True):
				728	"""Remove the given parameter completely from the Content-Type header.
				729
				730	The header will be re-written in place without the parameter or its
				731	value. All values will be quoted as necessary unless requote is
				732	False. Optional header specifies an alternative to the Content-Type
				733	header.
				734	"""
				735	if header not in self:
				736	return
				737	new_ctype = ''
				738	for p, v in self.get_params(header=header, unquote=requote):
				739	if p.lower() != param.lower():
				740	if not new_ctype:
				741	new_ctype = _formatparam(p, v, requote)
				742	else:
				743	new_ctype = SEMISPACE.join([new_ctype,
				744	_formatparam(p, v, requote)])
				745	if new_ctype != self.get(header):
				746	del self[header]
				747	self[header] = new_ctype
				748
				749	def set_type(self, type, header='Content-Type', requote=True):
				750	"""Set the main type and subtype for the Content-Type header.
				751
				752	type must be a string in the form "maintype/subtype", otherwise a
				753	ValueError is raised.
				754
				755	This method replaces the Content-Type header, keeping all the
				756	parameters in place. If requote is False, this leaves the existing
				757	header's quoting as is. Otherwise, the parameters will be quoted (the
				758	default).
				759
				760	An alternative header can be specified in the header argument. When
				761	the Content-Type header is set, we'll always also add a MIME-Version
				762	header.
				763	"""
				764	# BAW: should we be strict?
				765	if not type.count('/') == 1:
				766	raise ValueError
				767	# Set the Content-Type, you get a MIME-Version
				768	if header.lower() == 'content-type':
				769	del self['mime-version']
				770	self['MIME-Version'] = '1.0'
				771	if header not in self:
				772	self[header] = type
				773	return
				774	params = self.get_params(header=header, unquote=requote)
				775	del self[header]
				776	self[header] = type
				777	# Skip the first param; it's the old type.
				778	for p, v in params[1:]:
				779	self.set_param(p, v, header, requote)
				780
				781	def get_filename(self, failobj=None):
				782	"""Return the filename associated with the payload if present.
				783
				784	The filename is extracted from the Content-Disposition header's
				785	`filename' parameter, and it is unquoted. If that header is missing
				786	the `filename' parameter, this method falls back to looking for the
				787	`name' parameter.
				788	"""
				789	missing = object()
				790	filename = self.get_param('filename', missing, 'content-disposition')
				791	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	792	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	793	if filename is missing:
				794	return failobj
				795	return utils.collapse_rfc2231_value(filename).strip()
				796
				797	def get_boundary(self, failobj=None):
				798	"""Return the boundary associated with the payload if present.
				799
				800	The boundary is extracted from the Content-Type header's `boundary'
				801	parameter, and it is unquoted.
				802	"""
				803	missing = object()
				804	boundary = self.get_param('boundary', missing)
				805	if boundary is missing:
				806	return failobj
				807	# RFC 2046 says that boundaries may begin but not end in w/s
				808	return utils.collapse_rfc2231_value(boundary).rstrip()
				809
				810	def set_boundary(self, boundary):
				811	"""Set the boundary parameter in Content-Type to 'boundary'.
				812
				813	This is subtly different than deleting the Content-Type header and
				814	adding a new one with a new boundary parameter via add_header(). The
				815	main difference is that using the set_boundary() method preserves the
				816	order of the Content-Type header in the original message.
				817
				818	HeaderParseError is raised if the message has no Content-Type header.
				819	"""
				820	missing = object()
				821	params = self._get_params_preserve(missing, 'content-type')
				822	if params is missing:
				823	# There was no Content-Type header, and we don't know what type
				824	# to set it to, so raise an exception.
				825	raise errors.HeaderParseError('No Content-Type header found')
				826	newparams = []
				827	foundp = False
				828	for pk, pv in params:
				829	if pk.lower() == 'boundary':
				830	newparams.append(('boundary', '"%s"' % boundary))
				831	foundp = True
				832	else:
				833	newparams.append((pk, pv))
				834	if not foundp:
				835	# The original Content-Type header had no boundary attribute.
				836	# Tack one on the end. BAW: should we raise an exception
				837	# instead???
				838	newparams.append(('boundary', '"%s"' % boundary))
				839	# Replace the existing Content-Type header with the new value
				840	newheaders = []
				841	for h, v in self._headers:
				842	if h.lower() == 'content-type':
				843	parts = []
				844	for k, v in newparams:
				845	if v == '':
				846	parts.append(k)
				847	else:
				848	parts.append('%s=%s' % (k, v))
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	849	val = SEMISPACE.join(parts)
				850	newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	851
				852	else:
				853	newheaders.append((h, v))
				854	self._headers = newheaders
				855
				856	def get_content_charset(self, failobj=None):
				857	"""Return the charset parameter of the Content-Type header.
				858
				859	The returned string is always coerced to lower case. If there is no
				860	Content-Type header, or if that header has no charset parameter,
				861	failobj is returned.
				862	"""
				863	missing = object()
				864	charset = self.get_param('charset', missing)
				865	if charset is missing:
				866	return failobj
				867	if isinstance(charset, tuple):
				868	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				869	pcharset = charset[0] or 'us-ascii'
				870	try:
				871	# LookupError will be raised if the charset isn't known to
				872	# Python. UnicodeError will be raised if the encoded text
				873	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	874	as_bytes = charset[2].encode('raw-unicode-escape')
				875	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	876	except (LookupError, UnicodeError):
				877	charset = charset[2]
				878	# charset characters must be in us-ascii range
				879	try:
				880	charset.encode('us-ascii')
				881	except UnicodeError:
				882	return failobj
				883	# RFC 2046, $4.1.2 says charsets are not case sensitive
				884	return charset.lower()
				885
				886	def get_charsets(self, failobj=None):
				887	"""Return a list containing the charset(s) used in this message.
				888
				889	The returned list of items describes the Content-Type headers'
				890	charset parameter for this message and all the subparts in its
				891	payload.
				892
				893	Each item will either be a string (the value of the charset parameter
				894	in the Content-Type header of that part) or the value of the
				895	'failobj' parameter (defaults to None), if the part does not have a
				896	main MIME type of "text", or the charset is not defined.
				897
				898	The list will contain one string for each part of the message, plus
				899	one for the container message (i.e. self), so that a non-multipart
				900	message will still return a list of length 1.
				901	"""
				902	return [part.get_content_charset(failobj) for part in self.walk()]
				903
				904	# I.e. def walk(self): ...
				905	from email.iterators import walk