Blame - Lib/email/message.py - platform/external/python/cpython2

blob: f1ffcdb4de060278e22473d96514a82e24529a6d [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	19	from email import header
				20	from email import charset as _charset
				21	Charset = _charset.Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	22
				23	SEMISPACE = '; '
				24
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	26	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	27	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				28
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	29	# How to figure out if we are processing strings that come from a byte
				30	# source with undecodable characters.
				31	_has_surrogates = re.compile(
				32	'([^\ud800-\udbff]\|\A)[\udc00-\udfff]([^\udc00-\udfff]\|\Z)').search
				33
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	34
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	35	# Helper functions
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	36	def _sanitize_header(name, value):
				37	# If the header value contains surrogates, return a Header using
				38	# the unknown-8bit charset to encode the bytes as encoded words.
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	39	if not isinstance(value, str):
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	40	# Assume it is already a header object
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	41	return value
				42	if _has_surrogates(value):
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	43	return header.Header(value, charset=_charset.UNKNOWN8BIT,
				44	header_name=name)
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	45	else:
				46	return value
				47
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	48	def _splitparam(param):
				49	# Split header parameters. BAW: this may be too simple. It isn't
				50	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	51	# found in the wild. We may eventually need a full fledged parser.
				52	# RDM: we might have a Header here; for now just stringify it.
				53	a, sep, b = str(param).partition(';')
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	54	if not sep:
				55	return a.strip(), None
				56	return a.strip(), b.strip()
				57
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	58	def _formatparam(param, value=None, quote=True):
				59	"""Convenience function to format and return a key=value pair.
				60
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	61	This will quote the value if needed or if quote is true. If value is a
				62	three tuple (charset, language, value), it will be encoded according
				63	to RFC2231 rules. If it contains non-ascii characters it will likewise
				64	be encoded according to RFC2231 rules, using the utf-8 charset and
				65	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	66	"""
				67	if value is not None and len(value) > 0:
				68	# A tuple is used for RFC 2231 encoded parameter values where items
				69	# are (charset, language, value). charset is a string, not a Charset
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	70	# instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	71	if isinstance(value, tuple):
				72	# Encode as per RFC 2231
				73	param += '*'
				74	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	75	return '%s=%s' % (param, value)
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	76	else:
				77	try:
				78	value.encode('ascii')
				79	except UnicodeEncodeError:
				80	param += '*'
				81	value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	82	return '%s=%s' % (param, value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	83	# BAW: Please check this. I think that if quote is set it should
				84	# force quoting even if not necessary.
				85	if quote or tspecials.search(value):
				86	return '%s="%s"' % (param, utils.quote(value))
				87	else:
				88	return '%s=%s' % (param, value)
				89	else:
				90	return param
				91
				92	def _parseparam(s):
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	93	# RDM This might be a Header, so for now stringify it.
				94	s = ';' + str(s)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	95	plist = []
				96	while s[:1] == ';':
				97	s = s[1:]
				98	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	99	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	100	end = s.find(';', end + 1)
				101	if end < 0:
				102	end = len(s)
				103	f = s[:end]
				104	if '=' in f:
				105	i = f.index('=')
				106	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				107	plist.append(f.strip())
				108	s = s[end:]
				109	return plist
				110
				111
				112	def _unquotevalue(value):
				113	# This is different than utils.collapse_rfc2231_value() because it doesn't
				114	# try to convert the value to a unicode. Message.get_param() and
				115	# Message.get_params() are both currently defined to return the tuple in
				116	# the face of RFC 2231 parameters.
				117	if isinstance(value, tuple):
				118	return value[0], value[1], utils.unquote(value[2])
				119	else:
				120	return utils.unquote(value)
				121
				122
				123
				124	class Message:
				125	"""Basic message object.
				126
				127	A message object is defined as something that has a bunch of RFC 2822
				128	headers and a payload. It may optionally have an envelope header
				129	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				130	multipart or a message/rfc822), then the payload is a list of Message
				131	objects, otherwise it is a string.
				132
				133	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	134	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	135	do in fact appear multiple times (e.g. Received) and for those headers,
				136	you must use the explicit API to set or get all the headers. Not all of
				137	the mapping methods are implemented.
				138	"""
				139	def __init__(self):
				140	self._headers = []
				141	self._unixfrom = None
				142	self._payload = None
				143	self._charset = None
				144	# Defaults for multipart messages
				145	self.preamble = self.epilogue = None
				146	self.defects = []
				147	# Default content type
				148	self._default_type = 'text/plain'
				149
				150	def __str__(self):
				151	"""Return the entire formatted message as a string.
				152	This includes the headers, body, and envelope header.
				153	"""
				154	return self.as_string()
				155
				156	def as_string(self, unixfrom=False, maxheaderlen=0):
				157	"""Return the entire formatted message as a string.
				158	Optional `unixfrom' when True, means include the Unix From_ envelope
				159	header.
				160
				161	This is a convenience method and may not generate the message exactly
R David Murray	7dedcb4	2011-03-15 14:01:18 -0400	[diff] [blame]	162	as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	163	Generator instance.
				164	"""
				165	from email.generator import Generator
				166	fp = StringIO()
				167	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				168	g.flatten(self, unixfrom=unixfrom)
				169	return fp.getvalue()
				170
				171	def is_multipart(self):
				172	"""Return True if the message consists of multiple parts."""
				173	return isinstance(self._payload, list)
				174
				175	#
				176	# Unix From_ line
				177	#
				178	def set_unixfrom(self, unixfrom):
				179	self._unixfrom = unixfrom
				180
				181	def get_unixfrom(self):
				182	return self._unixfrom
				183
				184	#
				185	# Payload manipulation.
				186	#
				187	def attach(self, payload):
				188	"""Add the given payload to the current payload.
				189
				190	The current payload will always be a list of objects after this method
				191	is called. If you want to set the payload to a scalar object, use
				192	set_payload() instead.
				193	"""
				194	if self._payload is None:
				195	self._payload = [payload]
				196	else:
				197	self._payload.append(payload)
				198
				199	def get_payload(self, i=None, decode=False):
				200	"""Return a reference to the payload.
				201
				202	The payload will either be a list object or a string. If you mutate
				203	the list object, you modify the message's payload in place. Optional
				204	i returns that index into the payload.
				205
				206	Optional decode is a flag indicating whether the payload should be
				207	decoded or not, according to the Content-Transfer-Encoding header
				208	(default is False).
				209
				210	When True and the message is not a multipart, the payload will be
				211	decoded if this header's value is `quoted-printable' or `base64'. If
				212	some other encoding is used, or the header is missing, or if the
				213	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				214	payload is returned as-is.
				215
				216	If the message is a multipart and the decode flag is True, then None
				217	is returned.
				218	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	219	# Here is the logic table for this code, based on the email5.0.0 code:
				220	# i decode is_multipart result
				221	# ------ ------ ------------ ------------------------------
				222	# None True True None
				223	# i True True None
				224	# None False True _payload (a list)
				225	# i False True _payload element i (a Message)
				226	# i False False error (not a list)
				227	# i True False error (not a list)
				228	# None False False _payload
				229	# None True False _payload decoded (bytes)
				230	# Note that Barry planned to factor out the 'decode' case, but that
				231	# isn't so easy now that we handle the 8 bit data, which needs to be
				232	# converted in both the decode and non-decode path.
				233	if self.is_multipart():
				234	if decode:
				235	return None
				236	if i is None:
				237	return self._payload
				238	else:
				239	return self._payload[i]
				240	# For backward compatibility, Use isinstance and this error message
				241	# instead of the more logical is_multipart test.
				242	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	243	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	244	payload = self._payload
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	245	# cte might be a Header, so for now stringify it.
				246	cte = str(self.get('content-transfer-encoding', '')).lower()
R David Murray	106f8e3	2011-03-15 12:48:41 -0400	[diff] [blame]	247	# payload may be bytes here.
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	248	if isinstance(payload, str):
				249	if _has_surrogates(payload):
				250	bpayload = payload.encode('ascii', 'surrogateescape')
				251	if not decode:
				252	try:
				253	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				254	except LookupError:
				255	payload = bpayload.decode('ascii', 'replace')
				256	elif decode:
				257	try:
				258	bpayload = payload.encode('ascii')
				259	except UnicodeError:
				260	# This won't happen for RFC compliant messages (messages
				261	# containing only ASCII codepoints in the unicode input).
				262	# If it does happen, turn the string into bytes in a way
				263	# guaranteed not to fail.
				264	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	265	if not decode:
				266	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	267	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	268	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	269	elif cte == 'base64':
				270	try:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	271	return base64.b64decode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	272	except binascii.Error:
				273	# Incorrect padding
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	274	return bpayload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	275	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	276	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	277	out_file = BytesIO()
				278	try:
				279	uu.decode(in_file, out_file, quiet=True)
				280	return out_file.getvalue()
				281	except uu.Error:
				282	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	283	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	284	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	285	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	286	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	287
				288	def set_payload(self, payload, charset=None):
				289	"""Set the payload to the given value.
				290
				291	Optional charset sets the message's default character set. See
				292	set_charset() for details.
				293	"""
				294	self._payload = payload
				295	if charset is not None:
				296	self.set_charset(charset)
				297
				298	def set_charset(self, charset):
				299	"""Set the charset of the payload to a given character set.
				300
				301	charset can be a Charset instance, a string naming a character set, or
				302	None. If it is a string it will be converted to a Charset instance.
				303	If charset is None, the charset parameter will be removed from the
				304	Content-Type field. Anything else will generate a TypeError.
				305
				306	The message will be assumed to be of type text/* encoded with
				307	charset.input_charset. It will be converted to charset.output_charset
				308	and encoded properly, if needed, when generating the plain text
				309	representation of the message. MIME headers (MIME-Version,
				310	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	311	"""
				312	if charset is None:
				313	self.del_param('charset')
				314	self._charset = None
				315	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	316	if not isinstance(charset, Charset):
				317	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	318	self._charset = charset
				319	if 'MIME-Version' not in self:
				320	self.add_header('MIME-Version', '1.0')
				321	if 'Content-Type' not in self:
				322	self.add_header('Content-Type', 'text/plain',
				323	charset=charset.get_output_charset())
				324	else:
				325	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	326	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	327	self._payload = charset.body_encode(self._payload)
				328	if 'Content-Transfer-Encoding' not in self:
				329	cte = charset.get_body_encoding()
				330	try:
				331	cte(self)
				332	except TypeError:
				333	self._payload = charset.body_encode(self._payload)
				334	self.add_header('Content-Transfer-Encoding', cte)
				335
				336	def get_charset(self):
				337	"""Return the Charset instance associated with the message's payload.
				338	"""
				339	return self._charset
				340
				341	#
				342	# MAPPING INTERFACE (partial)
				343	#
				344	def __len__(self):
				345	"""Return the total number of headers, including duplicates."""
				346	return len(self._headers)
				347
				348	def __getitem__(self, name):
				349	"""Get a header value.
				350
				351	Return None if the header is missing instead of raising an exception.
				352
				353	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	354	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	355	the values matching a header field name.
				356	"""
				357	return self.get(name)
				358
				359	def __setitem__(self, name, val):
				360	"""Set the value of a header.
				361
				362	Note: this does not overwrite an existing header with the same field
				363	name. Use __delitem__() first to delete any existing headers.
				364	"""
				365	self._headers.append((name, val))
				366
				367	def __delitem__(self, name):
				368	"""Delete all occurrences of a header, if present.
				369
				370	Does not raise an exception if the header is missing.
				371	"""
				372	name = name.lower()
				373	newheaders = []
				374	for k, v in self._headers:
				375	if k.lower() != name:
				376	newheaders.append((k, v))
				377	self._headers = newheaders
				378
				379	def __contains__(self, name):
				380	return name.lower() in [k.lower() for k, v in self._headers]
				381
				382	def __iter__(self):
				383	for field, value in self._headers:
				384	yield field
				385
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	386	def keys(self):
				387	"""Return a list of all the message's header field names.
				388
				389	These will be sorted in the order they appeared in the original
				390	message, or were added to the message, and may contain duplicates.
				391	Any fields deleted and re-inserted are always appended to the header
				392	list.
				393	"""
				394	return [k for k, v in self._headers]
				395
				396	def values(self):
				397	"""Return a list of all the message's header values.
				398
				399	These will be sorted in the order they appeared in the original
				400	message, or were added to the message, and may contain duplicates.
				401	Any fields deleted and re-inserted are always appended to the header
				402	list.
				403	"""
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	404	return [_sanitize_header(k, v) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	405
				406	def items(self):
				407	"""Get all the message's header fields and values.
				408
				409	These will be sorted in the order they appeared in the original
				410	message, or were added to the message, and may contain duplicates.
				411	Any fields deleted and re-inserted are always appended to the header
				412	list.
				413	"""
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	414	return [(k, _sanitize_header(k, v)) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	415
				416	def get(self, name, failobj=None):
				417	"""Get a header value.
				418
				419	Like __getitem__() but return failobj instead of None when the field
				420	is missing.
				421	"""
				422	name = name.lower()
				423	for k, v in self._headers:
				424	if k.lower() == name:
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	425	return _sanitize_header(k, v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	426	return failobj
				427
				428	#
				429	# Additional useful stuff
				430	#
				431
				432	def get_all(self, name, failobj=None):
				433	"""Return a list of all the values for the named field.
				434
				435	These will be sorted in the order they appeared in the original
				436	message, and may contain duplicates. Any fields deleted and
				437	re-inserted are always appended to the header list.
				438
				439	If no such fields exist, failobj is returned (defaults to None).
				440	"""
				441	values = []
				442	name = name.lower()
				443	for k, v in self._headers:
				444	if k.lower() == name:
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	445	values.append(_sanitize_header(k, v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	446	if not values:
				447	return failobj
				448	return values
				449
				450	def add_header(self, _name, _value, **_params):
				451	"""Extended header setting.
				452
				453	name is the header field to add. keyword arguments can be used to set
				454	additional parameters for the header field, with underscores converted
				455	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	456	value is None, in which case only the key will be added. If a
				457	parameter value contains non-ASCII characters it can be specified as a
				458	three-tuple of (charset, language, value), in which case it will be
				459	encoded according to RFC2231 rules. Otherwise it will be encoded using
				460	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	461
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	462	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	463
				464	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	465	msg.add_header('content-disposition', 'attachment',
				466	filename=('utf-8', '', Fußballer.ppt'))
				467	msg.add_header('content-disposition', 'attachment',
				468	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	469	"""
				470	parts = []
				471	for k, v in _params.items():
				472	if v is None:
				473	parts.append(k.replace('_', '-'))
				474	else:
				475	parts.append(_formatparam(k.replace('_', '-'), v))
				476	if _value is not None:
				477	parts.insert(0, _value)
				478	self._headers.append((_name, SEMISPACE.join(parts)))
				479
				480	def replace_header(self, _name, _value):
				481	"""Replace a header.
				482
				483	Replace the first matching header found in the message, retaining
				484	header order and case. If no matching header was found, a KeyError is
				485	raised.
				486	"""
				487	_name = _name.lower()
				488	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				489	if k.lower() == _name:
				490	self._headers[i] = (k, _value)
				491	break
				492	else:
				493	raise KeyError(_name)
				494
				495	#
				496	# Use these three methods instead of the three above.
				497	#
				498
				499	def get_content_type(self):
				500	"""Return the message's content type.
				501
				502	The returned string is coerced to lower case of the form
				503	`maintype/subtype'. If there was no Content-Type header in the
				504	message, the default type as given by get_default_type() will be
				505	returned. Since according to RFC 2045, messages always have a default
				506	type this will always return a value.
				507
				508	RFC 2045 defines a message's default type to be text/plain unless it
				509	appears inside a multipart/digest container, in which case it would be
				510	message/rfc822.
				511	"""
				512	missing = object()
				513	value = self.get('content-type', missing)
				514	if value is missing:
				515	# This should have no parameters
				516	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	517	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	518	# RFC 2045, section 5.2 says if its invalid, use text/plain
				519	if ctype.count('/') != 1:
				520	return 'text/plain'
				521	return ctype
				522
				523	def get_content_maintype(self):
				524	"""Return the message's main content type.
				525
				526	This is the `maintype' part of the string returned by
				527	get_content_type().
				528	"""
				529	ctype = self.get_content_type()
				530	return ctype.split('/')[0]
				531
				532	def get_content_subtype(self):
				533	"""Returns the message's sub-content type.
				534
				535	This is the `subtype' part of the string returned by
				536	get_content_type().
				537	"""
				538	ctype = self.get_content_type()
				539	return ctype.split('/')[1]
				540
				541	def get_default_type(self):
				542	"""Return the `default' content type.
				543
				544	Most messages have a default content type of text/plain, except for
				545	messages that are subparts of multipart/digest containers. Such
				546	subparts have a default content type of message/rfc822.
				547	"""
				548	return self._default_type
				549
				550	def set_default_type(self, ctype):
				551	"""Set the `default' content type.
				552
				553	ctype should be either "text/plain" or "message/rfc822", although this
				554	is not enforced. The default content type is not stored in the
				555	Content-Type header.
				556	"""
				557	self._default_type = ctype
				558
				559	def _get_params_preserve(self, failobj, header):
				560	# Like get_params() but preserves the quoting of values. BAW:
				561	# should this be part of the public interface?
				562	missing = object()
				563	value = self.get(header, missing)
				564	if value is missing:
				565	return failobj
				566	params = []
R David Murray	a215023	2011-03-16 21:11:23 -0400	[diff] [blame]	567	for p in _parseparam(value):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	568	try:
				569	name, val = p.split('=', 1)
				570	name = name.strip()
				571	val = val.strip()
				572	except ValueError:
				573	# Must have been a bare attribute
				574	name = p.strip()
				575	val = ''
				576	params.append((name, val))
				577	params = utils.decode_params(params)
				578	return params
				579
				580	def get_params(self, failobj=None, header='content-type', unquote=True):
				581	"""Return the message's Content-Type parameters, as a list.
				582
				583	The elements of the returned list are 2-tuples of key/value pairs, as
				584	split on the `=' sign. The left hand side of the `=' is the key,
				585	while the right hand side is the value. If there is no `=' sign in
				586	the parameter the value is the empty string. The value is as
				587	described in the get_param() method.
				588
				589	Optional failobj is the object to return if there is no Content-Type
				590	header. Optional header is the header to search instead of
				591	Content-Type. If unquote is True, the value is unquoted.
				592	"""
				593	missing = object()
				594	params = self._get_params_preserve(missing, header)
				595	if params is missing:
				596	return failobj
				597	if unquote:
				598	return [(k, _unquotevalue(v)) for k, v in params]
				599	else:
				600	return params
				601
				602	def get_param(self, param, failobj=None, header='content-type',
				603	unquote=True):
				604	"""Return the parameter value if found in the Content-Type header.
				605
				606	Optional failobj is the object to return if there is no Content-Type
				607	header, or the Content-Type header has no such parameter. Optional
				608	header is the header to search instead of Content-Type.
				609
				610	Parameter keys are always compared case insensitively. The return
				611	value can either be a string, or a 3-tuple if the parameter was RFC
				612	2231 encoded. When it's a 3-tuple, the elements of the value are of
				613	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				614	LANGUAGE can be None, in which case you should consider VALUE to be
				615	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
R David Murray	3ac8c78	2012-06-17 15:26:35 -0400	[diff] [blame^]	616	The parameter value (either the returned string, or the VALUE item in
				617	the 3-tuple) is always unquoted, unless unquote is set to False.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	618
R David Murray	3ac8c78	2012-06-17 15:26:35 -0400	[diff] [blame^]	619	If your application doesn't care whether the parameter was RFC 2231
				620	encoded, it can turn the return value into a string as follows:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	621
				622	param = msg.get_param('foo')
R David Murray	3ac8c78	2012-06-17 15:26:35 -0400	[diff] [blame^]	623	param = email.utils.collapse_rfc2231_value(rawparam)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	624
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	625	"""
				626	if header not in self:
				627	return failobj
				628	for k, v in self._get_params_preserve(failobj, header):
				629	if k.lower() == param.lower():
				630	if unquote:
				631	return _unquotevalue(v)
				632	else:
				633	return v
				634	return failobj
				635
				636	def set_param(self, param, value, header='Content-Type', requote=True,
				637	charset=None, language=''):
				638	"""Set a parameter in the Content-Type header.
				639
				640	If the parameter already exists in the header, its value will be
				641	replaced with the new value.
				642
				643	If header is Content-Type and has not yet been defined for this
				644	message, it will be set to "text/plain" and the new parameter and
				645	value will be appended as per RFC 2045.
				646
				647	An alternate header can specified in the header argument, and all
				648	parameters will be quoted as necessary unless requote is False.
				649
				650	If charset is specified, the parameter will be encoded according to RFC
				651	2231. Optional language specifies the RFC 2231 language, defaulting
				652	to the empty string. Both charset and language should be strings.
				653	"""
				654	if not isinstance(value, tuple) and charset:
				655	value = (charset, language, value)
				656
				657	if header not in self and header.lower() == 'content-type':
				658	ctype = 'text/plain'
				659	else:
				660	ctype = self.get(header)
				661	if not self.get_param(param, header=header):
				662	if not ctype:
				663	ctype = _formatparam(param, value, requote)
				664	else:
				665	ctype = SEMISPACE.join(
				666	[ctype, _formatparam(param, value, requote)])
				667	else:
				668	ctype = ''
				669	for old_param, old_value in self.get_params(header=header,
				670	unquote=requote):
				671	append_param = ''
				672	if old_param.lower() == param.lower():
				673	append_param = _formatparam(param, value, requote)
				674	else:
				675	append_param = _formatparam(old_param, old_value, requote)
				676	if not ctype:
				677	ctype = append_param
				678	else:
				679	ctype = SEMISPACE.join([ctype, append_param])
				680	if ctype != self.get(header):
				681	del self[header]
				682	self[header] = ctype
				683
				684	def del_param(self, param, header='content-type', requote=True):
				685	"""Remove the given parameter completely from the Content-Type header.
				686
				687	The header will be re-written in place without the parameter or its
				688	value. All values will be quoted as necessary unless requote is
				689	False. Optional header specifies an alternative to the Content-Type
				690	header.
				691	"""
				692	if header not in self:
				693	return
				694	new_ctype = ''
				695	for p, v in self.get_params(header=header, unquote=requote):
				696	if p.lower() != param.lower():
				697	if not new_ctype:
				698	new_ctype = _formatparam(p, v, requote)
				699	else:
				700	new_ctype = SEMISPACE.join([new_ctype,
				701	_formatparam(p, v, requote)])
				702	if new_ctype != self.get(header):
				703	del self[header]
				704	self[header] = new_ctype
				705
				706	def set_type(self, type, header='Content-Type', requote=True):
				707	"""Set the main type and subtype for the Content-Type header.
				708
				709	type must be a string in the form "maintype/subtype", otherwise a
				710	ValueError is raised.
				711
				712	This method replaces the Content-Type header, keeping all the
				713	parameters in place. If requote is False, this leaves the existing
				714	header's quoting as is. Otherwise, the parameters will be quoted (the
				715	default).
				716
				717	An alternative header can be specified in the header argument. When
				718	the Content-Type header is set, we'll always also add a MIME-Version
				719	header.
				720	"""
				721	# BAW: should we be strict?
				722	if not type.count('/') == 1:
				723	raise ValueError
				724	# Set the Content-Type, you get a MIME-Version
				725	if header.lower() == 'content-type':
				726	del self['mime-version']
				727	self['MIME-Version'] = '1.0'
				728	if header not in self:
				729	self[header] = type
				730	return
				731	params = self.get_params(header=header, unquote=requote)
				732	del self[header]
				733	self[header] = type
				734	# Skip the first param; it's the old type.
				735	for p, v in params[1:]:
				736	self.set_param(p, v, header, requote)
				737
				738	def get_filename(self, failobj=None):
				739	"""Return the filename associated with the payload if present.
				740
				741	The filename is extracted from the Content-Disposition header's
				742	`filename' parameter, and it is unquoted. If that header is missing
				743	the `filename' parameter, this method falls back to looking for the
				744	`name' parameter.
				745	"""
				746	missing = object()
				747	filename = self.get_param('filename', missing, 'content-disposition')
				748	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	749	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	750	if filename is missing:
				751	return failobj
				752	return utils.collapse_rfc2231_value(filename).strip()
				753
				754	def get_boundary(self, failobj=None):
				755	"""Return the boundary associated with the payload if present.
				756
				757	The boundary is extracted from the Content-Type header's `boundary'
				758	parameter, and it is unquoted.
				759	"""
				760	missing = object()
				761	boundary = self.get_param('boundary', missing)
				762	if boundary is missing:
				763	return failobj
				764	# RFC 2046 says that boundaries may begin but not end in w/s
				765	return utils.collapse_rfc2231_value(boundary).rstrip()
				766
				767	def set_boundary(self, boundary):
				768	"""Set the boundary parameter in Content-Type to 'boundary'.
				769
				770	This is subtly different than deleting the Content-Type header and
				771	adding a new one with a new boundary parameter via add_header(). The
				772	main difference is that using the set_boundary() method preserves the
				773	order of the Content-Type header in the original message.
				774
				775	HeaderParseError is raised if the message has no Content-Type header.
				776	"""
				777	missing = object()
				778	params = self._get_params_preserve(missing, 'content-type')
				779	if params is missing:
				780	# There was no Content-Type header, and we don't know what type
				781	# to set it to, so raise an exception.
				782	raise errors.HeaderParseError('No Content-Type header found')
				783	newparams = []
				784	foundp = False
				785	for pk, pv in params:
				786	if pk.lower() == 'boundary':
				787	newparams.append(('boundary', '"%s"' % boundary))
				788	foundp = True
				789	else:
				790	newparams.append((pk, pv))
				791	if not foundp:
				792	# The original Content-Type header had no boundary attribute.
				793	# Tack one on the end. BAW: should we raise an exception
				794	# instead???
				795	newparams.append(('boundary', '"%s"' % boundary))
				796	# Replace the existing Content-Type header with the new value
				797	newheaders = []
				798	for h, v in self._headers:
				799	if h.lower() == 'content-type':
				800	parts = []
				801	for k, v in newparams:
				802	if v == '':
				803	parts.append(k)
				804	else:
				805	parts.append('%s=%s' % (k, v))
				806	newheaders.append((h, SEMISPACE.join(parts)))
				807
				808	else:
				809	newheaders.append((h, v))
				810	self._headers = newheaders
				811
				812	def get_content_charset(self, failobj=None):
				813	"""Return the charset parameter of the Content-Type header.
				814
				815	The returned string is always coerced to lower case. If there is no
				816	Content-Type header, or if that header has no charset parameter,
				817	failobj is returned.
				818	"""
				819	missing = object()
				820	charset = self.get_param('charset', missing)
				821	if charset is missing:
				822	return failobj
				823	if isinstance(charset, tuple):
				824	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				825	pcharset = charset[0] or 'us-ascii'
				826	try:
				827	# LookupError will be raised if the charset isn't known to
				828	# Python. UnicodeError will be raised if the encoded text
				829	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	830	as_bytes = charset[2].encode('raw-unicode-escape')
				831	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	832	except (LookupError, UnicodeError):
				833	charset = charset[2]
				834	# charset characters must be in us-ascii range
				835	try:
				836	charset.encode('us-ascii')
				837	except UnicodeError:
				838	return failobj
				839	# RFC 2046, $4.1.2 says charsets are not case sensitive
				840	return charset.lower()
				841
				842	def get_charsets(self, failobj=None):
				843	"""Return a list containing the charset(s) used in this message.
				844
				845	The returned list of items describes the Content-Type headers'
				846	charset parameter for this message and all the subparts in its
				847	payload.
				848
				849	Each item will either be a string (the value of the charset parameter
				850	in the Content-Type header of that part) or the value of the
				851	'failobj' parameter (defaults to None), if the part does not have a
				852	main MIME type of "text", or the charset is not defined.
				853
				854	The list will contain one string for each part of the message, plus
				855	one for the container message (i.e. self), so that a non-multipart
				856	message will still return a list of length 1.
				857	"""
				858	return [part.get_content_charset(failobj) for part in self.walk()]
				859
				860	# I.e. def walk(self): ...
				861	from email.iterators import walk