Blame - Lib/email/message.py - platform/external/python/cpython2

blob: 62b82b79c100fa91100024ab34966d0087eaf5ad [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	13	from io import BytesIO, StringIO
				14
				15	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	16	from email import utils
				17	from email import errors
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	18	from email._policybase import compat32
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	19	from email import charset as _charset
R David Murray	80e0aee	2012-05-27 21:23:34 -0400	[diff] [blame]	20	from email._encoded_words import decode_b
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	21	Charset = _charset.Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	22
				23	SEMISPACE = '; '
				24
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	26	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	27	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				28
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	29
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	30	def _splitparam(param):
				31	# Split header parameters. BAW: this may be too simple. It isn't
				32	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	33	# found in the wild. We may eventually need a full fledged parser.
				34	# RDM: we might have a Header here; for now just stringify it.
				35	a, sep, b = str(param).partition(';')
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	36	if not sep:
				37	return a.strip(), None
				38	return a.strip(), b.strip()
				39
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	40	def _formatparam(param, value=None, quote=True):
				41	"""Convenience function to format and return a key=value pair.
				42
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	43	This will quote the value if needed or if quote is true. If value is a
				44	three tuple (charset, language, value), it will be encoded according
				45	to RFC2231 rules. If it contains non-ascii characters it will likewise
				46	be encoded according to RFC2231 rules, using the utf-8 charset and
				47	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	48	"""
				49	if value is not None and len(value) > 0:
				50	# A tuple is used for RFC 2231 encoded parameter values where items
				51	# are (charset, language, value). charset is a string, not a Charset
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	52	# instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	53	if isinstance(value, tuple):
				54	# Encode as per RFC 2231
				55	param += '*'
				56	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	57	return '%s=%s' % (param, value)
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	58	else:
				59	try:
				60	value.encode('ascii')
				61	except UnicodeEncodeError:
				62	param += '*'
				63	value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	64	return '%s=%s' % (param, value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	65	# BAW: Please check this. I think that if quote is set it should
				66	# force quoting even if not necessary.
				67	if quote or tspecials.search(value):
				68	return '%s="%s"' % (param, utils.quote(value))
				69	else:
				70	return '%s=%s' % (param, value)
				71	else:
				72	return param
				73
				74	def _parseparam(s):
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	75	# RDM This might be a Header, so for now stringify it.
				76	s = ';' + str(s)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	77	plist = []
				78	while s[:1] == ';':
				79	s = s[1:]
				80	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	81	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	82	end = s.find(';', end + 1)
				83	if end < 0:
				84	end = len(s)
				85	f = s[:end]
				86	if '=' in f:
				87	i = f.index('=')
				88	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				89	plist.append(f.strip())
				90	s = s[end:]
				91	return plist
				92
				93
				94	def _unquotevalue(value):
				95	# This is different than utils.collapse_rfc2231_value() because it doesn't
				96	# try to convert the value to a unicode. Message.get_param() and
				97	# Message.get_params() are both currently defined to return the tuple in
				98	# the face of RFC 2231 parameters.
				99	if isinstance(value, tuple):
				100	return value[0], value[1], utils.unquote(value[2])
				101	else:
				102	return utils.unquote(value)
				103
				104
				105
				106	class Message:
				107	"""Basic message object.
				108
				109	A message object is defined as something that has a bunch of RFC 2822
				110	headers and a payload. It may optionally have an envelope header
				111	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				112	multipart or a message/rfc822), then the payload is a list of Message
				113	objects, otherwise it is a string.
				114
				115	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	116	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	117	do in fact appear multiple times (e.g. Received) and for those headers,
				118	you must use the explicit API to set or get all the headers. Not all of
				119	the mapping methods are implemented.
				120	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	121	def __init__(self, policy=compat32):
				122	self.policy = policy
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	123	self._headers = []
				124	self._unixfrom = None
				125	self._payload = None
				126	self._charset = None
				127	# Defaults for multipart messages
				128	self.preamble = self.epilogue = None
				129	self.defects = []
				130	# Default content type
				131	self._default_type = 'text/plain'
				132
				133	def __str__(self):
				134	"""Return the entire formatted message as a string.
				135	This includes the headers, body, and envelope header.
				136	"""
				137	return self.as_string()
				138
				139	def as_string(self, unixfrom=False, maxheaderlen=0):
				140	"""Return the entire formatted message as a string.
				141	Optional `unixfrom' when True, means include the Unix From_ envelope
				142	header.
				143
				144	This is a convenience method and may not generate the message exactly
R David Murray	7dedcb4	2011-03-15 14:01:18 -0400	[diff] [blame]	145	as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	146	Generator instance.
				147	"""
				148	from email.generator import Generator
				149	fp = StringIO()
				150	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				151	g.flatten(self, unixfrom=unixfrom)
				152	return fp.getvalue()
				153
				154	def is_multipart(self):
				155	"""Return True if the message consists of multiple parts."""
				156	return isinstance(self._payload, list)
				157
				158	#
				159	# Unix From_ line
				160	#
				161	def set_unixfrom(self, unixfrom):
				162	self._unixfrom = unixfrom
				163
				164	def get_unixfrom(self):
				165	return self._unixfrom
				166
				167	#
				168	# Payload manipulation.
				169	#
				170	def attach(self, payload):
				171	"""Add the given payload to the current payload.
				172
				173	The current payload will always be a list of objects after this method
				174	is called. If you want to set the payload to a scalar object, use
				175	set_payload() instead.
				176	"""
				177	if self._payload is None:
				178	self._payload = [payload]
				179	else:
				180	self._payload.append(payload)
				181
				182	def get_payload(self, i=None, decode=False):
				183	"""Return a reference to the payload.
				184
				185	The payload will either be a list object or a string. If you mutate
				186	the list object, you modify the message's payload in place. Optional
				187	i returns that index into the payload.
				188
				189	Optional decode is a flag indicating whether the payload should be
				190	decoded or not, according to the Content-Transfer-Encoding header
				191	(default is False).
				192
				193	When True and the message is not a multipart, the payload will be
				194	decoded if this header's value is `quoted-printable' or `base64'. If
				195	some other encoding is used, or the header is missing, or if the
				196	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				197	payload is returned as-is.
				198
				199	If the message is a multipart and the decode flag is True, then None
				200	is returned.
				201	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	202	# Here is the logic table for this code, based on the email5.0.0 code:
				203	# i decode is_multipart result
				204	# ------ ------ ------------ ------------------------------
				205	# None True True None
				206	# i True True None
				207	# None False True _payload (a list)
				208	# i False True _payload element i (a Message)
				209	# i False False error (not a list)
				210	# i True False error (not a list)
				211	# None False False _payload
				212	# None True False _payload decoded (bytes)
				213	# Note that Barry planned to factor out the 'decode' case, but that
				214	# isn't so easy now that we handle the 8 bit data, which needs to be
				215	# converted in both the decode and non-decode path.
				216	if self.is_multipart():
				217	if decode:
				218	return None
				219	if i is None:
				220	return self._payload
				221	else:
				222	return self._payload[i]
				223	# For backward compatibility, Use isinstance and this error message
				224	# instead of the more logical is_multipart test.
				225	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	226	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	227	payload = self._payload
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	228	# cte might be a Header, so for now stringify it.
				229	cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray	106f8e3	2011-03-15 12:48:41 -0400	[diff] [blame]	230	# payload may be bytes here.
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	231	if isinstance(payload, str):
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	232	if utils._has_surrogates(payload):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	233	bpayload = payload.encode('ascii', 'surrogateescape')
				234	if not decode:
				235	try:
				236	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				237	except LookupError:
				238	payload = bpayload.decode('ascii', 'replace')
				239	elif decode:
				240	try:
				241	bpayload = payload.encode('ascii')
				242	except UnicodeError:
				243	# This won't happen for RFC compliant messages (messages
				244	# containing only ASCII codepoints in the unicode input).
				245	# If it does happen, turn the string into bytes in a way
				246	# guaranteed not to fail.
				247	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	248	if not decode:
				249	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	250	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	251	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	252	elif cte == 'base64':
R David Murray	80e0aee	2012-05-27 21:23:34 -0400	[diff] [blame]	253	# XXX: this is a bit of a hack; decode_b should probably be factored
				254	# out somewhere, but I haven't figured out where yet.
				255	value, defects = decode_b(b''.join(bpayload.splitlines()))
				256	for defect in defects:
				257	self.policy.handle_defect(self, defect)
				258	return value
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	259	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	260	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	261	out_file = BytesIO()
				262	try:
				263	uu.decode(in_file, out_file, quiet=True)
				264	return out_file.getvalue()
				265	except uu.Error:
				266	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	267	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	268	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	269	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	270	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	271
				272	def set_payload(self, payload, charset=None):
				273	"""Set the payload to the given value.
				274
				275	Optional charset sets the message's default character set. See
				276	set_charset() for details.
				277	"""
				278	self._payload = payload
				279	if charset is not None:
				280	self.set_charset(charset)
				281
				282	def set_charset(self, charset):
				283	"""Set the charset of the payload to a given character set.
				284
				285	charset can be a Charset instance, a string naming a character set, or
				286	None. If it is a string it will be converted to a Charset instance.
				287	If charset is None, the charset parameter will be removed from the
				288	Content-Type field. Anything else will generate a TypeError.
				289
				290	The message will be assumed to be of type text/* encoded with
				291	charset.input_charset. It will be converted to charset.output_charset
				292	and encoded properly, if needed, when generating the plain text
				293	representation of the message. MIME headers (MIME-Version,
				294	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	295	"""
				296	if charset is None:
				297	self.del_param('charset')
				298	self._charset = None
				299	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	300	if not isinstance(charset, Charset):
				301	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	302	self._charset = charset
				303	if 'MIME-Version' not in self:
				304	self.add_header('MIME-Version', '1.0')
				305	if 'Content-Type' not in self:
				306	self.add_header('Content-Type', 'text/plain',
				307	charset=charset.get_output_charset())
				308	else:
				309	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	310	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	311	self._payload = charset.body_encode(self._payload)
				312	if 'Content-Transfer-Encoding' not in self:
				313	cte = charset.get_body_encoding()
				314	try:
				315	cte(self)
				316	except TypeError:
				317	self._payload = charset.body_encode(self._payload)
				318	self.add_header('Content-Transfer-Encoding', cte)
				319
				320	def get_charset(self):
				321	"""Return the Charset instance associated with the message's payload.
				322	"""
				323	return self._charset
				324
				325	#
				326	# MAPPING INTERFACE (partial)
				327	#
				328	def __len__(self):
				329	"""Return the total number of headers, including duplicates."""
				330	return len(self._headers)
				331
				332	def __getitem__(self, name):
				333	"""Get a header value.
				334
				335	Return None if the header is missing instead of raising an exception.
				336
				337	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	338	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	339	the values matching a header field name.
				340	"""
				341	return self.get(name)
				342
				343	def __setitem__(self, name, val):
				344	"""Set the value of a header.
				345
				346	Note: this does not overwrite an existing header with the same field
				347	name. Use __delitem__() first to delete any existing headers.
				348	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	349	self._headers.append(self.policy.header_store_parse(name, val))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	350
				351	def __delitem__(self, name):
				352	"""Delete all occurrences of a header, if present.
				353
				354	Does not raise an exception if the header is missing.
				355	"""
				356	name = name.lower()
				357	newheaders = []
				358	for k, v in self._headers:
				359	if k.lower() != name:
				360	newheaders.append((k, v))
				361	self._headers = newheaders
				362
				363	def __contains__(self, name):
				364	return name.lower() in [k.lower() for k, v in self._headers]
				365
				366	def __iter__(self):
				367	for field, value in self._headers:
				368	yield field
				369
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	370	def keys(self):
				371	"""Return a list of all the message's header field names.
				372
				373	These will be sorted in the order they appeared in the original
				374	message, or were added to the message, and may contain duplicates.
				375	Any fields deleted and re-inserted are always appended to the header
				376	list.
				377	"""
				378	return [k for k, v in self._headers]
				379
				380	def values(self):
				381	"""Return a list of all the message's header values.
				382
				383	These will be sorted in the order they appeared in the original
				384	message, or were added to the message, and may contain duplicates.
				385	Any fields deleted and re-inserted are always appended to the header
				386	list.
				387	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	388	return [self.policy.header_fetch_parse(k, v)
				389	for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	390
				391	def items(self):
				392	"""Get all the message's header fields and values.
				393
				394	These will be sorted in the order they appeared in the original
				395	message, or were added to the message, and may contain duplicates.
				396	Any fields deleted and re-inserted are always appended to the header
				397	list.
				398	"""
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	399	return [(k, self.policy.header_fetch_parse(k, v))
				400	for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	401
				402	def get(self, name, failobj=None):
				403	"""Get a header value.
				404
				405	Like __getitem__() but return failobj instead of None when the field
				406	is missing.
				407	"""
				408	name = name.lower()
				409	for k, v in self._headers:
				410	if k.lower() == name:
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	411	return self.policy.header_fetch_parse(k, v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	412	return failobj
				413
				414	#
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	415	# "Internal" methods (public API, but only intended for use by a parser
				416	# or generator, not normal application code.
				417	#
				418
				419	def set_raw(self, name, value):
				420	"""Store name and value in the model without modification.
				421
				422	This is an "internal" API, intended only for use by a parser.
				423	"""
				424	self._headers.append((name, value))
				425
				426	def raw_items(self):
				427	"""Return the (name, value) header pairs without modification.
				428
				429	This is an "internal" API, intended only for use by a generator.
				430	"""
				431	return iter(self._headers.copy())
				432
				433	#
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	434	# Additional useful stuff
				435	#
				436
				437	def get_all(self, name, failobj=None):
				438	"""Return a list of all the values for the named field.
				439
				440	These will be sorted in the order they appeared in the original
				441	message, and may contain duplicates. Any fields deleted and
				442	re-inserted are always appended to the header list.
				443
				444	If no such fields exist, failobj is returned (defaults to None).
				445	"""
				446	values = []
				447	name = name.lower()
				448	for k, v in self._headers:
				449	if k.lower() == name:
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	450	values.append(self.policy.header_fetch_parse(k, v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	451	if not values:
				452	return failobj
				453	return values
				454
				455	def add_header(self, _name, _value, **_params):
				456	"""Extended header setting.
				457
				458	name is the header field to add. keyword arguments can be used to set
				459	additional parameters for the header field, with underscores converted
				460	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	461	value is None, in which case only the key will be added. If a
				462	parameter value contains non-ASCII characters it can be specified as a
				463	three-tuple of (charset, language, value), in which case it will be
				464	encoded according to RFC2231 rules. Otherwise it will be encoded using
				465	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	466
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	467	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	468
				469	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	470	msg.add_header('content-disposition', 'attachment',
				471	filename=('utf-8', '', Fußballer.ppt'))
				472	msg.add_header('content-disposition', 'attachment',
				473	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	474	"""
				475	parts = []
				476	for k, v in _params.items():
				477	if v is None:
				478	parts.append(k.replace('_', '-'))
				479	else:
				480	parts.append(_formatparam(k.replace('_', '-'), v))
				481	if _value is not None:
				482	parts.insert(0, _value)
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	483	self[_name] = SEMISPACE.join(parts)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	484
				485	def replace_header(self, _name, _value):
				486	"""Replace a header.
				487
				488	Replace the first matching header found in the message, retaining
				489	header order and case. If no matching header was found, a KeyError is
				490	raised.
				491	"""
				492	_name = _name.lower()
				493	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				494	if k.lower() == _name:
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	495	self._headers[i] = self.policy.header_store_parse(k, _value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	496	break
				497	else:
				498	raise KeyError(_name)
				499
				500	#
				501	# Use these three methods instead of the three above.
				502	#
				503
				504	def get_content_type(self):
				505	"""Return the message's content type.
				506
				507	The returned string is coerced to lower case of the form
				508	`maintype/subtype'. If there was no Content-Type header in the
				509	message, the default type as given by get_default_type() will be
				510	returned. Since according to RFC 2045, messages always have a default
				511	type this will always return a value.
				512
				513	RFC 2045 defines a message's default type to be text/plain unless it
				514	appears inside a multipart/digest container, in which case it would be
				515	message/rfc822.
				516	"""
				517	missing = object()
				518	value = self.get('content-type', missing)
				519	if value is missing:
				520	# This should have no parameters
				521	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	522	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	523	# RFC 2045, section 5.2 says if its invalid, use text/plain
				524	if ctype.count('/') != 1:
				525	return 'text/plain'
				526	return ctype
				527
				528	def get_content_maintype(self):
				529	"""Return the message's main content type.
				530
				531	This is the `maintype' part of the string returned by
				532	get_content_type().
				533	"""
				534	ctype = self.get_content_type()
				535	return ctype.split('/')[0]
				536
				537	def get_content_subtype(self):
				538	"""Returns the message's sub-content type.
				539
				540	This is the `subtype' part of the string returned by
				541	get_content_type().
				542	"""
				543	ctype = self.get_content_type()
				544	return ctype.split('/')[1]
				545
				546	def get_default_type(self):
				547	"""Return the `default' content type.
				548
				549	Most messages have a default content type of text/plain, except for
				550	messages that are subparts of multipart/digest containers. Such
				551	subparts have a default content type of message/rfc822.
				552	"""
				553	return self._default_type
				554
				555	def set_default_type(self, ctype):
				556	"""Set the `default' content type.
				557
				558	ctype should be either "text/plain" or "message/rfc822", although this
				559	is not enforced. The default content type is not stored in the
				560	Content-Type header.
				561	"""
				562	self._default_type = ctype
				563
				564	def _get_params_preserve(self, failobj, header):
				565	# Like get_params() but preserves the quoting of values. BAW:
				566	# should this be part of the public interface?
				567	missing = object()
				568	value = self.get(header, missing)
				569	if value is missing:
				570	return failobj
				571	params = []
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	572	for p in _parseparam(value):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	573	try:
				574	name, val = p.split('=', 1)
				575	name = name.strip()
				576	val = val.strip()
				577	except ValueError:
				578	# Must have been a bare attribute
				579	name = p.strip()
				580	val = ''
				581	params.append((name, val))
				582	params = utils.decode_params(params)
				583	return params
				584
				585	def get_params(self, failobj=None, header='content-type', unquote=True):
				586	"""Return the message's Content-Type parameters, as a list.
				587
				588	The elements of the returned list are 2-tuples of key/value pairs, as
				589	split on the `=' sign. The left hand side of the `=' is the key,
				590	while the right hand side is the value. If there is no `=' sign in
				591	the parameter the value is the empty string. The value is as
				592	described in the get_param() method.
				593
				594	Optional failobj is the object to return if there is no Content-Type
				595	header. Optional header is the header to search instead of
				596	Content-Type. If unquote is True, the value is unquoted.
				597	"""
				598	missing = object()
				599	params = self._get_params_preserve(missing, header)
				600	if params is missing:
				601	return failobj
				602	if unquote:
				603	return [(k, _unquotevalue(v)) for k, v in params]
				604	else:
				605	return params
				606
				607	def get_param(self, param, failobj=None, header='content-type',
				608	unquote=True):
				609	"""Return the parameter value if found in the Content-Type header.
				610
				611	Optional failobj is the object to return if there is no Content-Type
				612	header, or the Content-Type header has no such parameter. Optional
				613	header is the header to search instead of Content-Type.
				614
				615	Parameter keys are always compared case insensitively. The return
				616	value can either be a string, or a 3-tuple if the parameter was RFC
				617	2231 encoded. When it's a 3-tuple, the elements of the value are of
				618	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				619	LANGUAGE can be None, in which case you should consider VALUE to be
				620	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				621
				622	Your application should be prepared to deal with 3-tuple return
				623	values, and can convert the parameter to a Unicode string like so:
				624
				625	param = msg.get_param('foo')
				626	if isinstance(param, tuple):
				627	param = unicode(param[2], param[0] or 'us-ascii')
				628
				629	In any case, the parameter value (either the returned string, or the
				630	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				631	to False.
				632	"""
				633	if header not in self:
				634	return failobj
				635	for k, v in self._get_params_preserve(failobj, header):
				636	if k.lower() == param.lower():
				637	if unquote:
				638	return _unquotevalue(v)
				639	else:
				640	return v
				641	return failobj
				642
				643	def set_param(self, param, value, header='Content-Type', requote=True,
				644	charset=None, language=''):
				645	"""Set a parameter in the Content-Type header.
				646
				647	If the parameter already exists in the header, its value will be
				648	replaced with the new value.
				649
				650	If header is Content-Type and has not yet been defined for this
				651	message, it will be set to "text/plain" and the new parameter and
				652	value will be appended as per RFC 2045.
				653
				654	An alternate header can specified in the header argument, and all
				655	parameters will be quoted as necessary unless requote is False.
				656
				657	If charset is specified, the parameter will be encoded according to RFC
				658	2231. Optional language specifies the RFC 2231 language, defaulting
				659	to the empty string. Both charset and language should be strings.
				660	"""
				661	if not isinstance(value, tuple) and charset:
				662	value = (charset, language, value)
				663
				664	if header not in self and header.lower() == 'content-type':
				665	ctype = 'text/plain'
				666	else:
				667	ctype = self.get(header)
				668	if not self.get_param(param, header=header):
				669	if not ctype:
				670	ctype = _formatparam(param, value, requote)
				671	else:
				672	ctype = SEMISPACE.join(
				673	[ctype, _formatparam(param, value, requote)])
				674	else:
				675	ctype = ''
				676	for old_param, old_value in self.get_params(header=header,
				677	unquote=requote):
				678	append_param = ''
				679	if old_param.lower() == param.lower():
				680	append_param = _formatparam(param, value, requote)
				681	else:
				682	append_param = _formatparam(old_param, old_value, requote)
				683	if not ctype:
				684	ctype = append_param
				685	else:
				686	ctype = SEMISPACE.join([ctype, append_param])
				687	if ctype != self.get(header):
				688	del self[header]
				689	self[header] = ctype
				690
				691	def del_param(self, param, header='content-type', requote=True):
				692	"""Remove the given parameter completely from the Content-Type header.
				693
				694	The header will be re-written in place without the parameter or its
				695	value. All values will be quoted as necessary unless requote is
				696	False. Optional header specifies an alternative to the Content-Type
				697	header.
				698	"""
				699	if header not in self:
				700	return
				701	new_ctype = ''
				702	for p, v in self.get_params(header=header, unquote=requote):
				703	if p.lower() != param.lower():
				704	if not new_ctype:
				705	new_ctype = _formatparam(p, v, requote)
				706	else:
				707	new_ctype = SEMISPACE.join([new_ctype,
				708	_formatparam(p, v, requote)])
				709	if new_ctype != self.get(header):
				710	del self[header]
				711	self[header] = new_ctype
				712
				713	def set_type(self, type, header='Content-Type', requote=True):
				714	"""Set the main type and subtype for the Content-Type header.
				715
				716	type must be a string in the form "maintype/subtype", otherwise a
				717	ValueError is raised.
				718
				719	This method replaces the Content-Type header, keeping all the
				720	parameters in place. If requote is False, this leaves the existing
				721	header's quoting as is. Otherwise, the parameters will be quoted (the
				722	default).
				723
				724	An alternative header can be specified in the header argument. When
				725	the Content-Type header is set, we'll always also add a MIME-Version
				726	header.
				727	"""
				728	# BAW: should we be strict?
				729	if not type.count('/') == 1:
				730	raise ValueError
				731	# Set the Content-Type, you get a MIME-Version
				732	if header.lower() == 'content-type':
				733	del self['mime-version']
				734	self['MIME-Version'] = '1.0'
				735	if header not in self:
				736	self[header] = type
				737	return
				738	params = self.get_params(header=header, unquote=requote)
				739	del self[header]
				740	self[header] = type
				741	# Skip the first param; it's the old type.
				742	for p, v in params[1:]:
				743	self.set_param(p, v, header, requote)
				744
				745	def get_filename(self, failobj=None):
				746	"""Return the filename associated with the payload if present.
				747
				748	The filename is extracted from the Content-Disposition header's
				749	`filename' parameter, and it is unquoted. If that header is missing
				750	the `filename' parameter, this method falls back to looking for the
				751	`name' parameter.
				752	"""
				753	missing = object()
				754	filename = self.get_param('filename', missing, 'content-disposition')
				755	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	756	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	757	if filename is missing:
				758	return failobj
				759	return utils.collapse_rfc2231_value(filename).strip()
				760
				761	def get_boundary(self, failobj=None):
				762	"""Return the boundary associated with the payload if present.
				763
				764	The boundary is extracted from the Content-Type header's `boundary'
				765	parameter, and it is unquoted.
				766	"""
				767	missing = object()
				768	boundary = self.get_param('boundary', missing)
				769	if boundary is missing:
				770	return failobj
				771	# RFC 2046 says that boundaries may begin but not end in w/s
				772	return utils.collapse_rfc2231_value(boundary).rstrip()
				773
				774	def set_boundary(self, boundary):
				775	"""Set the boundary parameter in Content-Type to 'boundary'.
				776
				777	This is subtly different than deleting the Content-Type header and
				778	adding a new one with a new boundary parameter via add_header(). The
				779	main difference is that using the set_boundary() method preserves the
				780	order of the Content-Type header in the original message.
				781
				782	HeaderParseError is raised if the message has no Content-Type header.
				783	"""
				784	missing = object()
				785	params = self._get_params_preserve(missing, 'content-type')
				786	if params is missing:
				787	# There was no Content-Type header, and we don't know what type
				788	# to set it to, so raise an exception.
				789	raise errors.HeaderParseError('No Content-Type header found')
				790	newparams = []
				791	foundp = False
				792	for pk, pv in params:
				793	if pk.lower() == 'boundary':
				794	newparams.append(('boundary', '"%s"' % boundary))
				795	foundp = True
				796	else:
				797	newparams.append((pk, pv))
				798	if not foundp:
				799	# The original Content-Type header had no boundary attribute.
				800	# Tack one on the end. BAW: should we raise an exception
				801	# instead???
				802	newparams.append(('boundary', '"%s"' % boundary))
				803	# Replace the existing Content-Type header with the new value
				804	newheaders = []
				805	for h, v in self._headers:
				806	if h.lower() == 'content-type':
				807	parts = []
				808	for k, v in newparams:
				809	if v == '':
				810	parts.append(k)
				811	else:
				812	parts.append('%s=%s' % (k, v))
R David Murray	c27e522	2012-05-25 15:01:48 -0400	[diff] [blame]	813	val = SEMISPACE.join(parts)
				814	newheaders.append(self.policy.header_store_parse(h, val))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	815
				816	else:
				817	newheaders.append((h, v))
				818	self._headers = newheaders
				819
				820	def get_content_charset(self, failobj=None):
				821	"""Return the charset parameter of the Content-Type header.
				822
				823	The returned string is always coerced to lower case. If there is no
				824	Content-Type header, or if that header has no charset parameter,
				825	failobj is returned.
				826	"""
				827	missing = object()
				828	charset = self.get_param('charset', missing)
				829	if charset is missing:
				830	return failobj
				831	if isinstance(charset, tuple):
				832	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				833	pcharset = charset[0] or 'us-ascii'
				834	try:
				835	# LookupError will be raised if the charset isn't known to
				836	# Python. UnicodeError will be raised if the encoded text
				837	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	838	as_bytes = charset[2].encode('raw-unicode-escape')
				839	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	840	except (LookupError, UnicodeError):
				841	charset = charset[2]
				842	# charset characters must be in us-ascii range
				843	try:
				844	charset.encode('us-ascii')
				845	except UnicodeError:
				846	return failobj
				847	# RFC 2046, $4.1.2 says charsets are not case sensitive
				848	return charset.lower()
				849
				850	def get_charsets(self, failobj=None):
				851	"""Return a list containing the charset(s) used in this message.
				852
				853	The returned list of items describes the Content-Type headers'
				854	charset parameter for this message and all the subparts in its
				855	payload.
				856
				857	Each item will either be a string (the value of the charset parameter
				858	in the Content-Type header of that part) or the value of the
				859	'failobj' parameter (defaults to None), if the part does not have a
				860	main MIME type of "text", or the charset is not defined.
				861
				862	The list will contain one string for each part of the message, plus
				863	one for the container message (i.e. self), so that a non-multipart
				864	message will still return a list of length 1.
				865	"""
				866	return [part.get_content_charset(failobj) for part in self.walk()]
				867
				868	# I.e. def walk(self): ...
				869	from email.iterators import walk