Blame - Lib/email/message.py - platform/external/python/cpython3

blob: a835ce2e69bd2749fa3550f77f2fef408b4d63df [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	19	from email.charset import Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	20
				21	SEMISPACE = '; '
				22
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	23	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	24	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				26
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	27	# How to figure out if we are processing strings that come from a byte
				28	# source with undecodable characters.
				29	_has_surrogates = re.compile(
				30	'([^\ud800-\udbff]\|\A)[\udc00-\udfff]([^\udc00-\udfff]\|\Z)').search
				31
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	32
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	33	# Helper functions
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	34	def _sanitize_surrogates(value):
				35	# If the value contains surrogates, re-decode and replace the original
				36	# non-ascii bytes with '?'s. Used to sanitize header values before letting
				37	# them escape as strings.
				38	if not isinstance(value, str):
				39	# Header object
				40	return value
				41	if _has_surrogates(value):
				42	original_bytes = value.encode('ascii', 'surrogateescape')
				43	return original_bytes.decode('ascii', 'replace').replace('\ufffd', '?')
				44	else:
				45	return value
				46
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	47	def _splitparam(param):
				48	# Split header parameters. BAW: this may be too simple. It isn't
				49	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				50	# found in the wild. We may eventually need a full fledged parser
				51	# eventually.
				52	a, sep, b = param.partition(';')
				53	if not sep:
				54	return a.strip(), None
				55	return a.strip(), b.strip()
				56
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	57	def _formatparam(param, value=None, quote=True):
				58	"""Convenience function to format and return a key=value pair.
				59
				60	This will quote the value if needed or if quote is true.
				61	"""
				62	if value is not None and len(value) > 0:
				63	# A tuple is used for RFC 2231 encoded parameter values where items
				64	# are (charset, language, value). charset is a string, not a Charset
				65	# instance.
				66	if isinstance(value, tuple):
				67	# Encode as per RFC 2231
				68	param += '*'
				69	value = utils.encode_rfc2231(value[2], value[0], value[1])
				70	# BAW: Please check this. I think that if quote is set it should
				71	# force quoting even if not necessary.
				72	if quote or tspecials.search(value):
				73	return '%s="%s"' % (param, utils.quote(value))
				74	else:
				75	return '%s=%s' % (param, value)
				76	else:
				77	return param
				78
				79	def _parseparam(s):
				80	plist = []
				81	while s[:1] == ';':
				82	s = s[1:]
				83	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	84	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	85	end = s.find(';', end + 1)
				86	if end < 0:
				87	end = len(s)
				88	f = s[:end]
				89	if '=' in f:
				90	i = f.index('=')
				91	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				92	plist.append(f.strip())
				93	s = s[end:]
				94	return plist
				95
				96
				97	def _unquotevalue(value):
				98	# This is different than utils.collapse_rfc2231_value() because it doesn't
				99	# try to convert the value to a unicode. Message.get_param() and
				100	# Message.get_params() are both currently defined to return the tuple in
				101	# the face of RFC 2231 parameters.
				102	if isinstance(value, tuple):
				103	return value[0], value[1], utils.unquote(value[2])
				104	else:
				105	return utils.unquote(value)
				106
				107
				108
				109	class Message:
				110	"""Basic message object.
				111
				112	A message object is defined as something that has a bunch of RFC 2822
				113	headers and a payload. It may optionally have an envelope header
				114	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				115	multipart or a message/rfc822), then the payload is a list of Message
				116	objects, otherwise it is a string.
				117
				118	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	119	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	120	do in fact appear multiple times (e.g. Received) and for those headers,
				121	you must use the explicit API to set or get all the headers. Not all of
				122	the mapping methods are implemented.
				123	"""
				124	def __init__(self):
				125	self._headers = []
				126	self._unixfrom = None
				127	self._payload = None
				128	self._charset = None
				129	# Defaults for multipart messages
				130	self.preamble = self.epilogue = None
				131	self.defects = []
				132	# Default content type
				133	self._default_type = 'text/plain'
				134
				135	def __str__(self):
				136	"""Return the entire formatted message as a string.
				137	This includes the headers, body, and envelope header.
				138	"""
				139	return self.as_string()
				140
				141	def as_string(self, unixfrom=False, maxheaderlen=0):
				142	"""Return the entire formatted message as a string.
				143	Optional `unixfrom' when True, means include the Unix From_ envelope
				144	header.
				145
				146	This is a convenience method and may not generate the message exactly
				147	as you intend because by default it mangles lines that begin with
				148	"From ". For more flexibility, use the flatten() method of a
				149	Generator instance.
				150	"""
				151	from email.generator import Generator
				152	fp = StringIO()
				153	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				154	g.flatten(self, unixfrom=unixfrom)
				155	return fp.getvalue()
				156
				157	def is_multipart(self):
				158	"""Return True if the message consists of multiple parts."""
				159	return isinstance(self._payload, list)
				160
				161	#
				162	# Unix From_ line
				163	#
				164	def set_unixfrom(self, unixfrom):
				165	self._unixfrom = unixfrom
				166
				167	def get_unixfrom(self):
				168	return self._unixfrom
				169
				170	#
				171	# Payload manipulation.
				172	#
				173	def attach(self, payload):
				174	"""Add the given payload to the current payload.
				175
				176	The current payload will always be a list of objects after this method
				177	is called. If you want to set the payload to a scalar object, use
				178	set_payload() instead.
				179	"""
				180	if self._payload is None:
				181	self._payload = [payload]
				182	else:
				183	self._payload.append(payload)
				184
				185	def get_payload(self, i=None, decode=False):
				186	"""Return a reference to the payload.
				187
				188	The payload will either be a list object or a string. If you mutate
				189	the list object, you modify the message's payload in place. Optional
				190	i returns that index into the payload.
				191
				192	Optional decode is a flag indicating whether the payload should be
				193	decoded or not, according to the Content-Transfer-Encoding header
				194	(default is False).
				195
				196	When True and the message is not a multipart, the payload will be
				197	decoded if this header's value is `quoted-printable' or `base64'. If
				198	some other encoding is used, or the header is missing, or if the
				199	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				200	payload is returned as-is.
				201
				202	If the message is a multipart and the decode flag is True, then None
				203	is returned.
				204	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	205	# Here is the logic table for this code, based on the email5.0.0 code:
				206	# i decode is_multipart result
				207	# ------ ------ ------------ ------------------------------
				208	# None True True None
				209	# i True True None
				210	# None False True _payload (a list)
				211	# i False True _payload element i (a Message)
				212	# i False False error (not a list)
				213	# i True False error (not a list)
				214	# None False False _payload
				215	# None True False _payload decoded (bytes)
				216	# Note that Barry planned to factor out the 'decode' case, but that
				217	# isn't so easy now that we handle the 8 bit data, which needs to be
				218	# converted in both the decode and non-decode path.
				219	if self.is_multipart():
				220	if decode:
				221	return None
				222	if i is None:
				223	return self._payload
				224	else:
				225	return self._payload[i]
				226	# For backward compatibility, Use isinstance and this error message
				227	# instead of the more logical is_multipart test.
				228	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	229	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	230	payload = self._payload
				231	cte = self.get('content-transfer-encoding', '').lower()
				232	# payload can be bytes here, (I wonder if that is actually a bug?)
				233	if isinstance(payload, str):
				234	if _has_surrogates(payload):
				235	bpayload = payload.encode('ascii', 'surrogateescape')
				236	if not decode:
				237	try:
				238	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				239	except LookupError:
				240	payload = bpayload.decode('ascii', 'replace')
				241	elif decode:
				242	try:
				243	bpayload = payload.encode('ascii')
				244	except UnicodeError:
				245	# This won't happen for RFC compliant messages (messages
				246	# containing only ASCII codepoints in the unicode input).
				247	# If it does happen, turn the string into bytes in a way
				248	# guaranteed not to fail.
				249	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	250	if not decode:
				251	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	252	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	253	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	254	elif cte == 'base64':
				255	try:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	256	return base64.b64decode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	257	except binascii.Error:
				258	# Incorrect padding
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	259	return bpayload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	260	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	261	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	262	out_file = BytesIO()
				263	try:
				264	uu.decode(in_file, out_file, quiet=True)
				265	return out_file.getvalue()
				266	except uu.Error:
				267	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	268	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	269	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	270	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	271	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	272
				273	def set_payload(self, payload, charset=None):
				274	"""Set the payload to the given value.
				275
				276	Optional charset sets the message's default character set. See
				277	set_charset() for details.
				278	"""
				279	self._payload = payload
				280	if charset is not None:
				281	self.set_charset(charset)
				282
				283	def set_charset(self, charset):
				284	"""Set the charset of the payload to a given character set.
				285
				286	charset can be a Charset instance, a string naming a character set, or
				287	None. If it is a string it will be converted to a Charset instance.
				288	If charset is None, the charset parameter will be removed from the
				289	Content-Type field. Anything else will generate a TypeError.
				290
				291	The message will be assumed to be of type text/* encoded with
				292	charset.input_charset. It will be converted to charset.output_charset
				293	and encoded properly, if needed, when generating the plain text
				294	representation of the message. MIME headers (MIME-Version,
				295	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	296	"""
				297	if charset is None:
				298	self.del_param('charset')
				299	self._charset = None
				300	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	301	if not isinstance(charset, Charset):
				302	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	303	self._charset = charset
				304	if 'MIME-Version' not in self:
				305	self.add_header('MIME-Version', '1.0')
				306	if 'Content-Type' not in self:
				307	self.add_header('Content-Type', 'text/plain',
				308	charset=charset.get_output_charset())
				309	else:
				310	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	311	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	312	self._payload = charset.body_encode(self._payload)
				313	if 'Content-Transfer-Encoding' not in self:
				314	cte = charset.get_body_encoding()
				315	try:
				316	cte(self)
				317	except TypeError:
				318	self._payload = charset.body_encode(self._payload)
				319	self.add_header('Content-Transfer-Encoding', cte)
				320
				321	def get_charset(self):
				322	"""Return the Charset instance associated with the message's payload.
				323	"""
				324	return self._charset
				325
				326	#
				327	# MAPPING INTERFACE (partial)
				328	#
				329	def __len__(self):
				330	"""Return the total number of headers, including duplicates."""
				331	return len(self._headers)
				332
				333	def __getitem__(self, name):
				334	"""Get a header value.
				335
				336	Return None if the header is missing instead of raising an exception.
				337
				338	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	339	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	340	the values matching a header field name.
				341	"""
				342	return self.get(name)
				343
				344	def __setitem__(self, name, val):
				345	"""Set the value of a header.
				346
				347	Note: this does not overwrite an existing header with the same field
				348	name. Use __delitem__() first to delete any existing headers.
				349	"""
				350	self._headers.append((name, val))
				351
				352	def __delitem__(self, name):
				353	"""Delete all occurrences of a header, if present.
				354
				355	Does not raise an exception if the header is missing.
				356	"""
				357	name = name.lower()
				358	newheaders = []
				359	for k, v in self._headers:
				360	if k.lower() != name:
				361	newheaders.append((k, v))
				362	self._headers = newheaders
				363
				364	def __contains__(self, name):
				365	return name.lower() in [k.lower() for k, v in self._headers]
				366
				367	def __iter__(self):
				368	for field, value in self._headers:
				369	yield field
				370
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	371	def keys(self):
				372	"""Return a list of all the message's header field names.
				373
				374	These will be sorted in the order they appeared in the original
				375	message, or were added to the message, and may contain duplicates.
				376	Any fields deleted and re-inserted are always appended to the header
				377	list.
				378	"""
				379	return [k for k, v in self._headers]
				380
				381	def values(self):
				382	"""Return a list of all the message's header values.
				383
				384	These will be sorted in the order they appeared in the original
				385	message, or were added to the message, and may contain duplicates.
				386	Any fields deleted and re-inserted are always appended to the header
				387	list.
				388	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	389	return [_sanitize_surrogates(v) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	390
				391	def items(self):
				392	"""Get all the message's header fields and values.
				393
				394	These will be sorted in the order they appeared in the original
				395	message, or were added to the message, and may contain duplicates.
				396	Any fields deleted and re-inserted are always appended to the header
				397	list.
				398	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	399	return [(k, _sanitize_surrogates(v)) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	400
				401	def get(self, name, failobj=None):
				402	"""Get a header value.
				403
				404	Like __getitem__() but return failobj instead of None when the field
				405	is missing.
				406	"""
				407	name = name.lower()
				408	for k, v in self._headers:
				409	if k.lower() == name:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	410	return _sanitize_surrogates(v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	411	return failobj
				412
				413	#
				414	# Additional useful stuff
				415	#
				416
				417	def get_all(self, name, failobj=None):
				418	"""Return a list of all the values for the named field.
				419
				420	These will be sorted in the order they appeared in the original
				421	message, and may contain duplicates. Any fields deleted and
				422	re-inserted are always appended to the header list.
				423
				424	If no such fields exist, failobj is returned (defaults to None).
				425	"""
				426	values = []
				427	name = name.lower()
				428	for k, v in self._headers:
				429	if k.lower() == name:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	430	values.append(_sanitize_surrogates(v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	431	if not values:
				432	return failobj
				433	return values
				434
				435	def add_header(self, _name, _value, **_params):
				436	"""Extended header setting.
				437
				438	name is the header field to add. keyword arguments can be used to set
				439	additional parameters for the header field, with underscores converted
				440	to dashes. Normally the parameter will be added as key="value" unless
				441	value is None, in which case only the key will be added.
				442
				443	Example:
				444
				445	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
				446	"""
				447	parts = []
				448	for k, v in _params.items():
				449	if v is None:
				450	parts.append(k.replace('_', '-'))
				451	else:
				452	parts.append(_formatparam(k.replace('_', '-'), v))
				453	if _value is not None:
				454	parts.insert(0, _value)
				455	self._headers.append((_name, SEMISPACE.join(parts)))
				456
				457	def replace_header(self, _name, _value):
				458	"""Replace a header.
				459
				460	Replace the first matching header found in the message, retaining
				461	header order and case. If no matching header was found, a KeyError is
				462	raised.
				463	"""
				464	_name = _name.lower()
				465	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				466	if k.lower() == _name:
				467	self._headers[i] = (k, _value)
				468	break
				469	else:
				470	raise KeyError(_name)
				471
				472	#
				473	# Use these three methods instead of the three above.
				474	#
				475
				476	def get_content_type(self):
				477	"""Return the message's content type.
				478
				479	The returned string is coerced to lower case of the form
				480	`maintype/subtype'. If there was no Content-Type header in the
				481	message, the default type as given by get_default_type() will be
				482	returned. Since according to RFC 2045, messages always have a default
				483	type this will always return a value.
				484
				485	RFC 2045 defines a message's default type to be text/plain unless it
				486	appears inside a multipart/digest container, in which case it would be
				487	message/rfc822.
				488	"""
				489	missing = object()
				490	value = self.get('content-type', missing)
				491	if value is missing:
				492	# This should have no parameters
				493	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	494	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	495	# RFC 2045, section 5.2 says if its invalid, use text/plain
				496	if ctype.count('/') != 1:
				497	return 'text/plain'
				498	return ctype
				499
				500	def get_content_maintype(self):
				501	"""Return the message's main content type.
				502
				503	This is the `maintype' part of the string returned by
				504	get_content_type().
				505	"""
				506	ctype = self.get_content_type()
				507	return ctype.split('/')[0]
				508
				509	def get_content_subtype(self):
				510	"""Returns the message's sub-content type.
				511
				512	This is the `subtype' part of the string returned by
				513	get_content_type().
				514	"""
				515	ctype = self.get_content_type()
				516	return ctype.split('/')[1]
				517
				518	def get_default_type(self):
				519	"""Return the `default' content type.
				520
				521	Most messages have a default content type of text/plain, except for
				522	messages that are subparts of multipart/digest containers. Such
				523	subparts have a default content type of message/rfc822.
				524	"""
				525	return self._default_type
				526
				527	def set_default_type(self, ctype):
				528	"""Set the `default' content type.
				529
				530	ctype should be either "text/plain" or "message/rfc822", although this
				531	is not enforced. The default content type is not stored in the
				532	Content-Type header.
				533	"""
				534	self._default_type = ctype
				535
				536	def _get_params_preserve(self, failobj, header):
				537	# Like get_params() but preserves the quoting of values. BAW:
				538	# should this be part of the public interface?
				539	missing = object()
				540	value = self.get(header, missing)
				541	if value is missing:
				542	return failobj
				543	params = []
				544	for p in _parseparam(';' + value):
				545	try:
				546	name, val = p.split('=', 1)
				547	name = name.strip()
				548	val = val.strip()
				549	except ValueError:
				550	# Must have been a bare attribute
				551	name = p.strip()
				552	val = ''
				553	params.append((name, val))
				554	params = utils.decode_params(params)
				555	return params
				556
				557	def get_params(self, failobj=None, header='content-type', unquote=True):
				558	"""Return the message's Content-Type parameters, as a list.
				559
				560	The elements of the returned list are 2-tuples of key/value pairs, as
				561	split on the `=' sign. The left hand side of the `=' is the key,
				562	while the right hand side is the value. If there is no `=' sign in
				563	the parameter the value is the empty string. The value is as
				564	described in the get_param() method.
				565
				566	Optional failobj is the object to return if there is no Content-Type
				567	header. Optional header is the header to search instead of
				568	Content-Type. If unquote is True, the value is unquoted.
				569	"""
				570	missing = object()
				571	params = self._get_params_preserve(missing, header)
				572	if params is missing:
				573	return failobj
				574	if unquote:
				575	return [(k, _unquotevalue(v)) for k, v in params]
				576	else:
				577	return params
				578
				579	def get_param(self, param, failobj=None, header='content-type',
				580	unquote=True):
				581	"""Return the parameter value if found in the Content-Type header.
				582
				583	Optional failobj is the object to return if there is no Content-Type
				584	header, or the Content-Type header has no such parameter. Optional
				585	header is the header to search instead of Content-Type.
				586
				587	Parameter keys are always compared case insensitively. The return
				588	value can either be a string, or a 3-tuple if the parameter was RFC
				589	2231 encoded. When it's a 3-tuple, the elements of the value are of
				590	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				591	LANGUAGE can be None, in which case you should consider VALUE to be
				592	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				593
				594	Your application should be prepared to deal with 3-tuple return
				595	values, and can convert the parameter to a Unicode string like so:
				596
				597	param = msg.get_param('foo')
				598	if isinstance(param, tuple):
				599	param = unicode(param[2], param[0] or 'us-ascii')
				600
				601	In any case, the parameter value (either the returned string, or the
				602	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				603	to False.
				604	"""
				605	if header not in self:
				606	return failobj
				607	for k, v in self._get_params_preserve(failobj, header):
				608	if k.lower() == param.lower():
				609	if unquote:
				610	return _unquotevalue(v)
				611	else:
				612	return v
				613	return failobj
				614
				615	def set_param(self, param, value, header='Content-Type', requote=True,
				616	charset=None, language=''):
				617	"""Set a parameter in the Content-Type header.
				618
				619	If the parameter already exists in the header, its value will be
				620	replaced with the new value.
				621
				622	If header is Content-Type and has not yet been defined for this
				623	message, it will be set to "text/plain" and the new parameter and
				624	value will be appended as per RFC 2045.
				625
				626	An alternate header can specified in the header argument, and all
				627	parameters will be quoted as necessary unless requote is False.
				628
				629	If charset is specified, the parameter will be encoded according to RFC
				630	2231. Optional language specifies the RFC 2231 language, defaulting
				631	to the empty string. Both charset and language should be strings.
				632	"""
				633	if not isinstance(value, tuple) and charset:
				634	value = (charset, language, value)
				635
				636	if header not in self and header.lower() == 'content-type':
				637	ctype = 'text/plain'
				638	else:
				639	ctype = self.get(header)
				640	if not self.get_param(param, header=header):
				641	if not ctype:
				642	ctype = _formatparam(param, value, requote)
				643	else:
				644	ctype = SEMISPACE.join(
				645	[ctype, _formatparam(param, value, requote)])
				646	else:
				647	ctype = ''
				648	for old_param, old_value in self.get_params(header=header,
				649	unquote=requote):
				650	append_param = ''
				651	if old_param.lower() == param.lower():
				652	append_param = _formatparam(param, value, requote)
				653	else:
				654	append_param = _formatparam(old_param, old_value, requote)
				655	if not ctype:
				656	ctype = append_param
				657	else:
				658	ctype = SEMISPACE.join([ctype, append_param])
				659	if ctype != self.get(header):
				660	del self[header]
				661	self[header] = ctype
				662
				663	def del_param(self, param, header='content-type', requote=True):
				664	"""Remove the given parameter completely from the Content-Type header.
				665
				666	The header will be re-written in place without the parameter or its
				667	value. All values will be quoted as necessary unless requote is
				668	False. Optional header specifies an alternative to the Content-Type
				669	header.
				670	"""
				671	if header not in self:
				672	return
				673	new_ctype = ''
				674	for p, v in self.get_params(header=header, unquote=requote):
				675	if p.lower() != param.lower():
				676	if not new_ctype:
				677	new_ctype = _formatparam(p, v, requote)
				678	else:
				679	new_ctype = SEMISPACE.join([new_ctype,
				680	_formatparam(p, v, requote)])
				681	if new_ctype != self.get(header):
				682	del self[header]
				683	self[header] = new_ctype
				684
				685	def set_type(self, type, header='Content-Type', requote=True):
				686	"""Set the main type and subtype for the Content-Type header.
				687
				688	type must be a string in the form "maintype/subtype", otherwise a
				689	ValueError is raised.
				690
				691	This method replaces the Content-Type header, keeping all the
				692	parameters in place. If requote is False, this leaves the existing
				693	header's quoting as is. Otherwise, the parameters will be quoted (the
				694	default).
				695
				696	An alternative header can be specified in the header argument. When
				697	the Content-Type header is set, we'll always also add a MIME-Version
				698	header.
				699	"""
				700	# BAW: should we be strict?
				701	if not type.count('/') == 1:
				702	raise ValueError
				703	# Set the Content-Type, you get a MIME-Version
				704	if header.lower() == 'content-type':
				705	del self['mime-version']
				706	self['MIME-Version'] = '1.0'
				707	if header not in self:
				708	self[header] = type
				709	return
				710	params = self.get_params(header=header, unquote=requote)
				711	del self[header]
				712	self[header] = type
				713	# Skip the first param; it's the old type.
				714	for p, v in params[1:]:
				715	self.set_param(p, v, header, requote)
				716
				717	def get_filename(self, failobj=None):
				718	"""Return the filename associated with the payload if present.
				719
				720	The filename is extracted from the Content-Disposition header's
				721	`filename' parameter, and it is unquoted. If that header is missing
				722	the `filename' parameter, this method falls back to looking for the
				723	`name' parameter.
				724	"""
				725	missing = object()
				726	filename = self.get_param('filename', missing, 'content-disposition')
				727	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	728	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	729	if filename is missing:
				730	return failobj
				731	return utils.collapse_rfc2231_value(filename).strip()
				732
				733	def get_boundary(self, failobj=None):
				734	"""Return the boundary associated with the payload if present.
				735
				736	The boundary is extracted from the Content-Type header's `boundary'
				737	parameter, and it is unquoted.
				738	"""
				739	missing = object()
				740	boundary = self.get_param('boundary', missing)
				741	if boundary is missing:
				742	return failobj
				743	# RFC 2046 says that boundaries may begin but not end in w/s
				744	return utils.collapse_rfc2231_value(boundary).rstrip()
				745
				746	def set_boundary(self, boundary):
				747	"""Set the boundary parameter in Content-Type to 'boundary'.
				748
				749	This is subtly different than deleting the Content-Type header and
				750	adding a new one with a new boundary parameter via add_header(). The
				751	main difference is that using the set_boundary() method preserves the
				752	order of the Content-Type header in the original message.
				753
				754	HeaderParseError is raised if the message has no Content-Type header.
				755	"""
				756	missing = object()
				757	params = self._get_params_preserve(missing, 'content-type')
				758	if params is missing:
				759	# There was no Content-Type header, and we don't know what type
				760	# to set it to, so raise an exception.
				761	raise errors.HeaderParseError('No Content-Type header found')
				762	newparams = []
				763	foundp = False
				764	for pk, pv in params:
				765	if pk.lower() == 'boundary':
				766	newparams.append(('boundary', '"%s"' % boundary))
				767	foundp = True
				768	else:
				769	newparams.append((pk, pv))
				770	if not foundp:
				771	# The original Content-Type header had no boundary attribute.
				772	# Tack one on the end. BAW: should we raise an exception
				773	# instead???
				774	newparams.append(('boundary', '"%s"' % boundary))
				775	# Replace the existing Content-Type header with the new value
				776	newheaders = []
				777	for h, v in self._headers:
				778	if h.lower() == 'content-type':
				779	parts = []
				780	for k, v in newparams:
				781	if v == '':
				782	parts.append(k)
				783	else:
				784	parts.append('%s=%s' % (k, v))
				785	newheaders.append((h, SEMISPACE.join(parts)))
				786
				787	else:
				788	newheaders.append((h, v))
				789	self._headers = newheaders
				790
				791	def get_content_charset(self, failobj=None):
				792	"""Return the charset parameter of the Content-Type header.
				793
				794	The returned string is always coerced to lower case. If there is no
				795	Content-Type header, or if that header has no charset parameter,
				796	failobj is returned.
				797	"""
				798	missing = object()
				799	charset = self.get_param('charset', missing)
				800	if charset is missing:
				801	return failobj
				802	if isinstance(charset, tuple):
				803	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				804	pcharset = charset[0] or 'us-ascii'
				805	try:
				806	# LookupError will be raised if the charset isn't known to
				807	# Python. UnicodeError will be raised if the encoded text
				808	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	809	as_bytes = charset[2].encode('raw-unicode-escape')
				810	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	811	except (LookupError, UnicodeError):
				812	charset = charset[2]
				813	# charset characters must be in us-ascii range
				814	try:
				815	charset.encode('us-ascii')
				816	except UnicodeError:
				817	return failobj
				818	# RFC 2046, $4.1.2 says charsets are not case sensitive
				819	return charset.lower()
				820
				821	def get_charsets(self, failobj=None):
				822	"""Return a list containing the charset(s) used in this message.
				823
				824	The returned list of items describes the Content-Type headers'
				825	charset parameter for this message and all the subparts in its
				826	payload.
				827
				828	Each item will either be a string (the value of the charset parameter
				829	in the Content-Type header of that part) or the value of the
				830	'failobj' parameter (defaults to None), if the part does not have a
				831	main MIME type of "text", or the charset is not defined.
				832
				833	The list will contain one string for each part of the message, plus
				834	one for the container message (i.e. self), so that a non-multipart
				835	message will still return a list of length 1.
				836	"""
				837	return [part.get_content_charset(failobj) for part in self.walk()]
				838
				839	# I.e. def walk(self): ...
				840	from email.iterators import walk