Blame - Lib/email/message.py - platform/external/python/cpython3

blob: d30f109a5ba2f9b710ba45c286c702fd71448f4b [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	19	from email.charset import Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	20
				21	SEMISPACE = '; '
				22
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	23	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	24	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				26
				27
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	28	# Helper functions
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	29	def _splitparam(param):
				30	# Split header parameters. BAW: this may be too simple. It isn't
				31	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				32	# found in the wild. We may eventually need a full fledged parser
				33	# eventually.
				34	a, sep, b = param.partition(';')
				35	if not sep:
				36	return a.strip(), None
				37	return a.strip(), b.strip()
				38
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	39	def _formatparam(param, value=None, quote=True):
				40	"""Convenience function to format and return a key=value pair.
				41
R. David Murray	ccb9d05	2010-12-13 23:57:01 +0000	[diff] [blame]	42	This will quote the value if needed or if quote is true. If value is a
				43	three tuple (charset, language, value), it will be encoded according
				44	to RFC2231 rules. If it contains non-ascii characters it will likewise
				45	be encoded according to RFC2231 rules, using the utf-8 charset and
				46	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	47	"""
				48	if value is not None and len(value) > 0:
				49	# A tuple is used for RFC 2231 encoded parameter values where items
				50	# are (charset, language, value). charset is a string, not a Charset
				51	# instance.
				52	if isinstance(value, tuple):
				53	# Encode as per RFC 2231
				54	param += '*'
				55	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	ccb9d05	2010-12-13 23:57:01 +0000	[diff] [blame]	56	else:
				57	try:
				58	value.encode('ascii')
				59	except UnicodeEncodeError:
				60	param += '*'
				61	value = utils.encode_rfc2231(value, 'utf-8', '')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	62	# BAW: Please check this. I think that if quote is set it should
				63	# force quoting even if not necessary.
				64	if quote or tspecials.search(value):
				65	return '%s="%s"' % (param, utils.quote(value))
				66	else:
				67	return '%s=%s' % (param, value)
				68	else:
				69	return param
				70
				71	def _parseparam(s):
				72	plist = []
				73	while s[:1] == ';':
				74	s = s[1:]
				75	end = s.find(';')
R. David Murray	84ee310	2010-04-14 19:05:38 +0000	[diff] [blame]	76	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	77	end = s.find(';', end + 1)
				78	if end < 0:
				79	end = len(s)
				80	f = s[:end]
				81	if '=' in f:
				82	i = f.index('=')
				83	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				84	plist.append(f.strip())
				85	s = s[end:]
				86	return plist
				87
				88
				89	def _unquotevalue(value):
				90	# This is different than utils.collapse_rfc2231_value() because it doesn't
				91	# try to convert the value to a unicode. Message.get_param() and
				92	# Message.get_params() are both currently defined to return the tuple in
				93	# the face of RFC 2231 parameters.
				94	if isinstance(value, tuple):
				95	return value[0], value[1], utils.unquote(value[2])
				96	else:
				97	return utils.unquote(value)
				98
				99
				100
				101	class Message:
				102	"""Basic message object.
				103
				104	A message object is defined as something that has a bunch of RFC 2822
				105	headers and a payload. It may optionally have an envelope header
				106	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				107	multipart or a message/rfc822), then the payload is a list of Message
				108	objects, otherwise it is a string.
				109
				110	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	c1b3ed5	2010-12-06 18:39:32 +0000	[diff] [blame]	111	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	112	do in fact appear multiple times (e.g. Received) and for those headers,
				113	you must use the explicit API to set or get all the headers. Not all of
				114	the mapping methods are implemented.
				115	"""
				116	def __init__(self):
				117	self._headers = []
				118	self._unixfrom = None
				119	self._payload = None
				120	self._charset = None
				121	# Defaults for multipart messages
				122	self.preamble = self.epilogue = None
				123	self.defects = []
				124	# Default content type
				125	self._default_type = 'text/plain'
				126
				127	def __str__(self):
				128	"""Return the entire formatted message as a string.
				129	This includes the headers, body, and envelope header.
				130	"""
				131	return self.as_string()
				132
				133	def as_string(self, unixfrom=False, maxheaderlen=0):
				134	"""Return the entire formatted message as a string.
				135	Optional `unixfrom' when True, means include the Unix From_ envelope
				136	header.
				137
				138	This is a convenience method and may not generate the message exactly
R David Murray	7dedcb4	2011-03-15 14:01:18 -0400	[diff] [blame^]	139	as you intend. For more flexibility, use the flatten() method of a
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	140	Generator instance.
				141	"""
				142	from email.generator import Generator
				143	fp = StringIO()
				144	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				145	g.flatten(self, unixfrom=unixfrom)
				146	return fp.getvalue()
				147
				148	def is_multipart(self):
				149	"""Return True if the message consists of multiple parts."""
				150	return isinstance(self._payload, list)
				151
				152	#
				153	# Unix From_ line
				154	#
				155	def set_unixfrom(self, unixfrom):
				156	self._unixfrom = unixfrom
				157
				158	def get_unixfrom(self):
				159	return self._unixfrom
				160
				161	#
				162	# Payload manipulation.
				163	#
				164	def attach(self, payload):
				165	"""Add the given payload to the current payload.
				166
				167	The current payload will always be a list of objects after this method
				168	is called. If you want to set the payload to a scalar object, use
				169	set_payload() instead.
				170	"""
				171	if self._payload is None:
				172	self._payload = [payload]
				173	else:
				174	self._payload.append(payload)
				175
				176	def get_payload(self, i=None, decode=False):
				177	"""Return a reference to the payload.
				178
				179	The payload will either be a list object or a string. If you mutate
				180	the list object, you modify the message's payload in place. Optional
				181	i returns that index into the payload.
				182
				183	Optional decode is a flag indicating whether the payload should be
				184	decoded or not, according to the Content-Transfer-Encoding header
				185	(default is False).
				186
				187	When True and the message is not a multipart, the payload will be
				188	decoded if this header's value is `quoted-printable' or `base64'. If
				189	some other encoding is used, or the header is missing, or if the
				190	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				191	payload is returned as-is.
				192
				193	If the message is a multipart and the decode flag is True, then None
				194	is returned.
				195	"""
				196	if i is None:
				197	payload = self._payload
				198	elif not isinstance(self._payload, list):
				199	raise TypeError('Expected list, got %s' % type(self._payload))
				200	else:
				201	payload = self._payload[i]
				202	if not decode:
				203	return payload
				204	# Decoded payloads always return bytes. XXX split this part out into
				205	# a new method called .get_decoded_payload().
				206	if self.is_multipart():
				207	return None
				208	cte = self.get('content-transfer-encoding', '').lower()
				209	if cte == 'quoted-printable':
				210	return utils._qdecode(payload)
				211	elif cte == 'base64':
				212	try:
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	213	if isinstance(payload, str):
				214	payload = payload.encode('raw-unicode-escape')
				215	return base64.b64decode(payload)
				216	#return utils._bdecode(payload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	217	except binascii.Error:
				218	# Incorrect padding
				219	pass
				220	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	221	in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	222	out_file = BytesIO()
				223	try:
				224	uu.decode(in_file, out_file, quiet=True)
				225	return out_file.getvalue()
				226	except uu.Error:
				227	# Some decoding problem
				228	pass
				229	# Is there a better way to do this? We can't use the bytes
				230	# constructor.
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	231	if isinstance(payload, str):
				232	return payload.encode('raw-unicode-escape')
				233	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	234
				235	def set_payload(self, payload, charset=None):
				236	"""Set the payload to the given value.
				237
				238	Optional charset sets the message's default character set. See
				239	set_charset() for details.
				240	"""
				241	self._payload = payload
				242	if charset is not None:
				243	self.set_charset(charset)
				244
				245	def set_charset(self, charset):
				246	"""Set the charset of the payload to a given character set.
				247
				248	charset can be a Charset instance, a string naming a character set, or
				249	None. If it is a string it will be converted to a Charset instance.
				250	If charset is None, the charset parameter will be removed from the
				251	Content-Type field. Anything else will generate a TypeError.
				252
				253	The message will be assumed to be of type text/* encoded with
				254	charset.input_charset. It will be converted to charset.output_charset
				255	and encoded properly, if needed, when generating the plain text
				256	representation of the message. MIME headers (MIME-Version,
				257	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	258	"""
				259	if charset is None:
				260	self.del_param('charset')
				261	self._charset = None
				262	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	263	if not isinstance(charset, Charset):
				264	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	265	self._charset = charset
				266	if 'MIME-Version' not in self:
				267	self.add_header('MIME-Version', '1.0')
				268	if 'Content-Type' not in self:
				269	self.add_header('Content-Type', 'text/plain',
				270	charset=charset.get_output_charset())
				271	else:
				272	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	273	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	274	self._payload = charset.body_encode(self._payload)
				275	if 'Content-Transfer-Encoding' not in self:
				276	cte = charset.get_body_encoding()
				277	try:
				278	cte(self)
				279	except TypeError:
				280	self._payload = charset.body_encode(self._payload)
				281	self.add_header('Content-Transfer-Encoding', cte)
				282
				283	def get_charset(self):
				284	"""Return the Charset instance associated with the message's payload.
				285	"""
				286	return self._charset
				287
				288	#
				289	# MAPPING INTERFACE (partial)
				290	#
				291	def __len__(self):
				292	"""Return the total number of headers, including duplicates."""
				293	return len(self._headers)
				294
				295	def __getitem__(self, name):
				296	"""Get a header value.
				297
				298	Return None if the header is missing instead of raising an exception.
				299
				300	Note that if the header appeared multiple times, exactly which
R. David Murray	c1b3ed5	2010-12-06 18:39:32 +0000	[diff] [blame]	301	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	302	the values matching a header field name.
				303	"""
				304	return self.get(name)
				305
				306	def __setitem__(self, name, val):
				307	"""Set the value of a header.
				308
				309	Note: this does not overwrite an existing header with the same field
				310	name. Use __delitem__() first to delete any existing headers.
				311	"""
				312	self._headers.append((name, val))
				313
				314	def __delitem__(self, name):
				315	"""Delete all occurrences of a header, if present.
				316
				317	Does not raise an exception if the header is missing.
				318	"""
				319	name = name.lower()
				320	newheaders = []
				321	for k, v in self._headers:
				322	if k.lower() != name:
				323	newheaders.append((k, v))
				324	self._headers = newheaders
				325
				326	def __contains__(self, name):
				327	return name.lower() in [k.lower() for k, v in self._headers]
				328
				329	def __iter__(self):
				330	for field, value in self._headers:
				331	yield field
				332
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	333	def keys(self):
				334	"""Return a list of all the message's header field names.
				335
				336	These will be sorted in the order they appeared in the original
				337	message, or were added to the message, and may contain duplicates.
				338	Any fields deleted and re-inserted are always appended to the header
				339	list.
				340	"""
				341	return [k for k, v in self._headers]
				342
				343	def values(self):
				344	"""Return a list of all the message's header values.
				345
				346	These will be sorted in the order they appeared in the original
				347	message, or were added to the message, and may contain duplicates.
				348	Any fields deleted and re-inserted are always appended to the header
				349	list.
				350	"""
				351	return [v for k, v in self._headers]
				352
				353	def items(self):
				354	"""Get all the message's header fields and values.
				355
				356	These will be sorted in the order they appeared in the original
				357	message, or were added to the message, and may contain duplicates.
				358	Any fields deleted and re-inserted are always appended to the header
				359	list.
				360	"""
				361	return self._headers[:]
				362
				363	def get(self, name, failobj=None):
				364	"""Get a header value.
				365
				366	Like __getitem__() but return failobj instead of None when the field
				367	is missing.
				368	"""
				369	name = name.lower()
				370	for k, v in self._headers:
				371	if k.lower() == name:
				372	return v
				373	return failobj
				374
				375	#
				376	# Additional useful stuff
				377	#
				378
				379	def get_all(self, name, failobj=None):
				380	"""Return a list of all the values for the named field.
				381
				382	These will be sorted in the order they appeared in the original
				383	message, and may contain duplicates. Any fields deleted and
				384	re-inserted are always appended to the header list.
				385
				386	If no such fields exist, failobj is returned (defaults to None).
				387	"""
				388	values = []
				389	name = name.lower()
				390	for k, v in self._headers:
				391	if k.lower() == name:
				392	values.append(v)
				393	if not values:
				394	return failobj
				395	return values
				396
				397	def add_header(self, _name, _value, **_params):
				398	"""Extended header setting.
				399
				400	name is the header field to add. keyword arguments can be used to set
				401	additional parameters for the header field, with underscores converted
				402	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	ccb9d05	2010-12-13 23:57:01 +0000	[diff] [blame]	403	value is None, in which case only the key will be added. If a
				404	parameter value contains non-ASCII characters it can be specified as a
				405	three-tuple of (charset, language, value), in which case it will be
				406	encoded according to RFC2231 rules. Otherwise it will be encoded using
				407	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	408
R. David Murray	ccb9d05	2010-12-13 23:57:01 +0000	[diff] [blame]	409	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	410
				411	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	ccb9d05	2010-12-13 23:57:01 +0000	[diff] [blame]	412	msg.add_header('content-disposition', 'attachment',
				413	filename=('utf-8', '', Fußballer.ppt'))
				414	msg.add_header('content-disposition', 'attachment',
				415	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	416	"""
				417	parts = []
				418	for k, v in _params.items():
				419	if v is None:
				420	parts.append(k.replace('_', '-'))
				421	else:
				422	parts.append(_formatparam(k.replace('_', '-'), v))
				423	if _value is not None:
				424	parts.insert(0, _value)
				425	self._headers.append((_name, SEMISPACE.join(parts)))
				426
				427	def replace_header(self, _name, _value):
				428	"""Replace a header.
				429
				430	Replace the first matching header found in the message, retaining
				431	header order and case. If no matching header was found, a KeyError is
				432	raised.
				433	"""
				434	_name = _name.lower()
				435	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				436	if k.lower() == _name:
				437	self._headers[i] = (k, _value)
				438	break
				439	else:
				440	raise KeyError(_name)
				441
				442	#
				443	# Use these three methods instead of the three above.
				444	#
				445
				446	def get_content_type(self):
				447	"""Return the message's content type.
				448
				449	The returned string is coerced to lower case of the form
				450	`maintype/subtype'. If there was no Content-Type header in the
				451	message, the default type as given by get_default_type() will be
				452	returned. Since according to RFC 2045, messages always have a default
				453	type this will always return a value.
				454
				455	RFC 2045 defines a message's default type to be text/plain unless it
				456	appears inside a multipart/digest container, in which case it would be
				457	message/rfc822.
				458	"""
				459	missing = object()
				460	value = self.get('content-type', missing)
				461	if value is missing:
				462	# This should have no parameters
				463	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	464	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	465	# RFC 2045, section 5.2 says if its invalid, use text/plain
				466	if ctype.count('/') != 1:
				467	return 'text/plain'
				468	return ctype
				469
				470	def get_content_maintype(self):
				471	"""Return the message's main content type.
				472
				473	This is the `maintype' part of the string returned by
				474	get_content_type().
				475	"""
				476	ctype = self.get_content_type()
				477	return ctype.split('/')[0]
				478
				479	def get_content_subtype(self):
				480	"""Returns the message's sub-content type.
				481
				482	This is the `subtype' part of the string returned by
				483	get_content_type().
				484	"""
				485	ctype = self.get_content_type()
				486	return ctype.split('/')[1]
				487
				488	def get_default_type(self):
				489	"""Return the `default' content type.
				490
				491	Most messages have a default content type of text/plain, except for
				492	messages that are subparts of multipart/digest containers. Such
				493	subparts have a default content type of message/rfc822.
				494	"""
				495	return self._default_type
				496
				497	def set_default_type(self, ctype):
				498	"""Set the `default' content type.
				499
				500	ctype should be either "text/plain" or "message/rfc822", although this
				501	is not enforced. The default content type is not stored in the
				502	Content-Type header.
				503	"""
				504	self._default_type = ctype
				505
				506	def _get_params_preserve(self, failobj, header):
				507	# Like get_params() but preserves the quoting of values. BAW:
				508	# should this be part of the public interface?
				509	missing = object()
				510	value = self.get(header, missing)
				511	if value is missing:
				512	return failobj
				513	params = []
				514	for p in _parseparam(';' + value):
				515	try:
				516	name, val = p.split('=', 1)
				517	name = name.strip()
				518	val = val.strip()
				519	except ValueError:
				520	# Must have been a bare attribute
				521	name = p.strip()
				522	val = ''
				523	params.append((name, val))
				524	params = utils.decode_params(params)
				525	return params
				526
				527	def get_params(self, failobj=None, header='content-type', unquote=True):
				528	"""Return the message's Content-Type parameters, as a list.
				529
				530	The elements of the returned list are 2-tuples of key/value pairs, as
				531	split on the `=' sign. The left hand side of the `=' is the key,
				532	while the right hand side is the value. If there is no `=' sign in
				533	the parameter the value is the empty string. The value is as
				534	described in the get_param() method.
				535
				536	Optional failobj is the object to return if there is no Content-Type
				537	header. Optional header is the header to search instead of
				538	Content-Type. If unquote is True, the value is unquoted.
				539	"""
				540	missing = object()
				541	params = self._get_params_preserve(missing, header)
				542	if params is missing:
				543	return failobj
				544	if unquote:
				545	return [(k, _unquotevalue(v)) for k, v in params]
				546	else:
				547	return params
				548
				549	def get_param(self, param, failobj=None, header='content-type',
				550	unquote=True):
				551	"""Return the parameter value if found in the Content-Type header.
				552
				553	Optional failobj is the object to return if there is no Content-Type
				554	header, or the Content-Type header has no such parameter. Optional
				555	header is the header to search instead of Content-Type.
				556
				557	Parameter keys are always compared case insensitively. The return
				558	value can either be a string, or a 3-tuple if the parameter was RFC
				559	2231 encoded. When it's a 3-tuple, the elements of the value are of
				560	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				561	LANGUAGE can be None, in which case you should consider VALUE to be
				562	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				563
				564	Your application should be prepared to deal with 3-tuple return
				565	values, and can convert the parameter to a Unicode string like so:
				566
				567	param = msg.get_param('foo')
				568	if isinstance(param, tuple):
				569	param = unicode(param[2], param[0] or 'us-ascii')
				570
				571	In any case, the parameter value (either the returned string, or the
				572	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				573	to False.
				574	"""
				575	if header not in self:
				576	return failobj
				577	for k, v in self._get_params_preserve(failobj, header):
				578	if k.lower() == param.lower():
				579	if unquote:
				580	return _unquotevalue(v)
				581	else:
				582	return v
				583	return failobj
				584
				585	def set_param(self, param, value, header='Content-Type', requote=True,
				586	charset=None, language=''):
				587	"""Set a parameter in the Content-Type header.
				588
				589	If the parameter already exists in the header, its value will be
				590	replaced with the new value.
				591
				592	If header is Content-Type and has not yet been defined for this
				593	message, it will be set to "text/plain" and the new parameter and
				594	value will be appended as per RFC 2045.
				595
				596	An alternate header can specified in the header argument, and all
				597	parameters will be quoted as necessary unless requote is False.
				598
				599	If charset is specified, the parameter will be encoded according to RFC
				600	2231. Optional language specifies the RFC 2231 language, defaulting
				601	to the empty string. Both charset and language should be strings.
				602	"""
				603	if not isinstance(value, tuple) and charset:
				604	value = (charset, language, value)
				605
				606	if header not in self and header.lower() == 'content-type':
				607	ctype = 'text/plain'
				608	else:
				609	ctype = self.get(header)
				610	if not self.get_param(param, header=header):
				611	if not ctype:
				612	ctype = _formatparam(param, value, requote)
				613	else:
				614	ctype = SEMISPACE.join(
				615	[ctype, _formatparam(param, value, requote)])
				616	else:
				617	ctype = ''
				618	for old_param, old_value in self.get_params(header=header,
				619	unquote=requote):
				620	append_param = ''
				621	if old_param.lower() == param.lower():
				622	append_param = _formatparam(param, value, requote)
				623	else:
				624	append_param = _formatparam(old_param, old_value, requote)
				625	if not ctype:
				626	ctype = append_param
				627	else:
				628	ctype = SEMISPACE.join([ctype, append_param])
				629	if ctype != self.get(header):
				630	del self[header]
				631	self[header] = ctype
				632
				633	def del_param(self, param, header='content-type', requote=True):
				634	"""Remove the given parameter completely from the Content-Type header.
				635
				636	The header will be re-written in place without the parameter or its
				637	value. All values will be quoted as necessary unless requote is
				638	False. Optional header specifies an alternative to the Content-Type
				639	header.
				640	"""
				641	if header not in self:
				642	return
				643	new_ctype = ''
				644	for p, v in self.get_params(header=header, unquote=requote):
				645	if p.lower() != param.lower():
				646	if not new_ctype:
				647	new_ctype = _formatparam(p, v, requote)
				648	else:
				649	new_ctype = SEMISPACE.join([new_ctype,
				650	_formatparam(p, v, requote)])
				651	if new_ctype != self.get(header):
				652	del self[header]
				653	self[header] = new_ctype
				654
				655	def set_type(self, type, header='Content-Type', requote=True):
				656	"""Set the main type and subtype for the Content-Type header.
				657
				658	type must be a string in the form "maintype/subtype", otherwise a
				659	ValueError is raised.
				660
				661	This method replaces the Content-Type header, keeping all the
				662	parameters in place. If requote is False, this leaves the existing
				663	header's quoting as is. Otherwise, the parameters will be quoted (the
				664	default).
				665
				666	An alternative header can be specified in the header argument. When
				667	the Content-Type header is set, we'll always also add a MIME-Version
				668	header.
				669	"""
				670	# BAW: should we be strict?
				671	if not type.count('/') == 1:
				672	raise ValueError
				673	# Set the Content-Type, you get a MIME-Version
				674	if header.lower() == 'content-type':
				675	del self['mime-version']
				676	self['MIME-Version'] = '1.0'
				677	if header not in self:
				678	self[header] = type
				679	return
				680	params = self.get_params(header=header, unquote=requote)
				681	del self[header]
				682	self[header] = type
				683	# Skip the first param; it's the old type.
				684	for p, v in params[1:]:
				685	self.set_param(p, v, header, requote)
				686
				687	def get_filename(self, failobj=None):
				688	"""Return the filename associated with the payload if present.
				689
				690	The filename is extracted from the Content-Disposition header's
				691	`filename' parameter, and it is unquoted. If that header is missing
				692	the `filename' parameter, this method falls back to looking for the
				693	`name' parameter.
				694	"""
				695	missing = object()
				696	filename = self.get_param('filename', missing, 'content-disposition')
				697	if filename is missing:
R. David Murray	290e939	2009-10-10 00:57:04 +0000	[diff] [blame]	698	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	699	if filename is missing:
				700	return failobj
				701	return utils.collapse_rfc2231_value(filename).strip()
				702
				703	def get_boundary(self, failobj=None):
				704	"""Return the boundary associated with the payload if present.
				705
				706	The boundary is extracted from the Content-Type header's `boundary'
				707	parameter, and it is unquoted.
				708	"""
				709	missing = object()
				710	boundary = self.get_param('boundary', missing)
				711	if boundary is missing:
				712	return failobj
				713	# RFC 2046 says that boundaries may begin but not end in w/s
				714	return utils.collapse_rfc2231_value(boundary).rstrip()
				715
				716	def set_boundary(self, boundary):
				717	"""Set the boundary parameter in Content-Type to 'boundary'.
				718
				719	This is subtly different than deleting the Content-Type header and
				720	adding a new one with a new boundary parameter via add_header(). The
				721	main difference is that using the set_boundary() method preserves the
				722	order of the Content-Type header in the original message.
				723
				724	HeaderParseError is raised if the message has no Content-Type header.
				725	"""
				726	missing = object()
				727	params = self._get_params_preserve(missing, 'content-type')
				728	if params is missing:
				729	# There was no Content-Type header, and we don't know what type
				730	# to set it to, so raise an exception.
				731	raise errors.HeaderParseError('No Content-Type header found')
				732	newparams = []
				733	foundp = False
				734	for pk, pv in params:
				735	if pk.lower() == 'boundary':
				736	newparams.append(('boundary', '"%s"' % boundary))
				737	foundp = True
				738	else:
				739	newparams.append((pk, pv))
				740	if not foundp:
				741	# The original Content-Type header had no boundary attribute.
				742	# Tack one on the end. BAW: should we raise an exception
				743	# instead???
				744	newparams.append(('boundary', '"%s"' % boundary))
				745	# Replace the existing Content-Type header with the new value
				746	newheaders = []
				747	for h, v in self._headers:
				748	if h.lower() == 'content-type':
				749	parts = []
				750	for k, v in newparams:
				751	if v == '':
				752	parts.append(k)
				753	else:
				754	parts.append('%s=%s' % (k, v))
				755	newheaders.append((h, SEMISPACE.join(parts)))
				756
				757	else:
				758	newheaders.append((h, v))
				759	self._headers = newheaders
				760
				761	def get_content_charset(self, failobj=None):
				762	"""Return the charset parameter of the Content-Type header.
				763
				764	The returned string is always coerced to lower case. If there is no
				765	Content-Type header, or if that header has no charset parameter,
				766	failobj is returned.
				767	"""
				768	missing = object()
				769	charset = self.get_param('charset', missing)
				770	if charset is missing:
				771	return failobj
				772	if isinstance(charset, tuple):
				773	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				774	pcharset = charset[0] or 'us-ascii'
				775	try:
				776	# LookupError will be raised if the charset isn't known to
				777	# Python. UnicodeError will be raised if the encoded text
				778	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	779	as_bytes = charset[2].encode('raw-unicode-escape')
				780	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	781	except (LookupError, UnicodeError):
				782	charset = charset[2]
				783	# charset characters must be in us-ascii range
				784	try:
				785	charset.encode('us-ascii')
				786	except UnicodeError:
				787	return failobj
				788	# RFC 2046, $4.1.2 says charsets are not case sensitive
				789	return charset.lower()
				790
				791	def get_charsets(self, failobj=None):
				792	"""Return a list containing the charset(s) used in this message.
				793
				794	The returned list of items describes the Content-Type headers'
				795	charset parameter for this message and all the subparts in its
				796	payload.
				797
				798	Each item will either be a string (the value of the charset parameter
				799	in the Content-Type header of that part) or the value of the
				800	'failobj' parameter (defaults to None), if the part does not have a
				801	main MIME type of "text", or the charset is not defined.
				802
				803	The list will contain one string for each part of the message, plus
				804	one for the container message (i.e. self), so that a non-multipart
				805	message will still return a list of length 1.
				806	"""
				807	return [part.get_content_charset(failobj) for part in self.walk()]
				808
				809	# I.e. def walk(self): ...
				810	from email.iterators import walk