Blame - Lib/email/message.py - platform/external/python/cpython3

blob: 27a577dac70f5a93e6d4b5a554582496fe657966 [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	19	from email.charset import Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	20
				21	SEMISPACE = '; '
				22
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	23	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	24	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				26
				27
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	28	# Helper functions
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	29	def _splitparam(param):
				30	# Split header parameters. BAW: this may be too simple. It isn't
				31	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				32	# found in the wild. We may eventually need a full fledged parser
				33	# eventually.
				34	a, sep, b = param.partition(';')
				35	if not sep:
				36	return a.strip(), None
				37	return a.strip(), b.strip()
				38
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	39	def _formatparam(param, value=None, quote=True):
				40	"""Convenience function to format and return a key=value pair.
				41
				42	This will quote the value if needed or if quote is true.
				43	"""
				44	if value is not None and len(value) > 0:
				45	# A tuple is used for RFC 2231 encoded parameter values where items
				46	# are (charset, language, value). charset is a string, not a Charset
				47	# instance.
				48	if isinstance(value, tuple):
				49	# Encode as per RFC 2231
				50	param += '*'
				51	value = utils.encode_rfc2231(value[2], value[0], value[1])
				52	# BAW: Please check this. I think that if quote is set it should
				53	# force quoting even if not necessary.
				54	if quote or tspecials.search(value):
				55	return '%s="%s"' % (param, utils.quote(value))
				56	else:
				57	return '%s=%s' % (param, value)
				58	else:
				59	return param
				60
				61	def _parseparam(s):
				62	plist = []
				63	while s[:1] == ';':
				64	s = s[1:]
				65	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	66	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	67	end = s.find(';', end + 1)
				68	if end < 0:
				69	end = len(s)
				70	f = s[:end]
				71	if '=' in f:
				72	i = f.index('=')
				73	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				74	plist.append(f.strip())
				75	s = s[end:]
				76	return plist
				77
				78
				79	def _unquotevalue(value):
				80	# This is different than utils.collapse_rfc2231_value() because it doesn't
				81	# try to convert the value to a unicode. Message.get_param() and
				82	# Message.get_params() are both currently defined to return the tuple in
				83	# the face of RFC 2231 parameters.
				84	if isinstance(value, tuple):
				85	return value[0], value[1], utils.unquote(value[2])
				86	else:
				87	return utils.unquote(value)
				88
				89
				90
				91	class Message:
				92	"""Basic message object.
				93
				94	A message object is defined as something that has a bunch of RFC 2822
				95	headers and a payload. It may optionally have an envelope header
				96	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				97	multipart or a message/rfc822), then the payload is a list of Message
				98	objects, otherwise it is a string.
				99
				100	Message objects implement part of the `mapping' interface, which assumes
				101	there is exactly one occurrance of the header per message. Some headers
				102	do in fact appear multiple times (e.g. Received) and for those headers,
				103	you must use the explicit API to set or get all the headers. Not all of
				104	the mapping methods are implemented.
				105	"""
				106	def __init__(self):
				107	self._headers = []
				108	self._unixfrom = None
				109	self._payload = None
				110	self._charset = None
				111	# Defaults for multipart messages
				112	self.preamble = self.epilogue = None
				113	self.defects = []
				114	# Default content type
				115	self._default_type = 'text/plain'
				116
				117	def __str__(self):
				118	"""Return the entire formatted message as a string.
				119	This includes the headers, body, and envelope header.
				120	"""
				121	return self.as_string()
				122
				123	def as_string(self, unixfrom=False, maxheaderlen=0):
				124	"""Return the entire formatted message as a string.
				125	Optional `unixfrom' when True, means include the Unix From_ envelope
				126	header.
				127
				128	This is a convenience method and may not generate the message exactly
				129	as you intend because by default it mangles lines that begin with
				130	"From ". For more flexibility, use the flatten() method of a
				131	Generator instance.
				132	"""
				133	from email.generator import Generator
				134	fp = StringIO()
				135	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				136	g.flatten(self, unixfrom=unixfrom)
				137	return fp.getvalue()
				138
				139	def is_multipart(self):
				140	"""Return True if the message consists of multiple parts."""
				141	return isinstance(self._payload, list)
				142
				143	#
				144	# Unix From_ line
				145	#
				146	def set_unixfrom(self, unixfrom):
				147	self._unixfrom = unixfrom
				148
				149	def get_unixfrom(self):
				150	return self._unixfrom
				151
				152	#
				153	# Payload manipulation.
				154	#
				155	def attach(self, payload):
				156	"""Add the given payload to the current payload.
				157
				158	The current payload will always be a list of objects after this method
				159	is called. If you want to set the payload to a scalar object, use
				160	set_payload() instead.
				161	"""
				162	if self._payload is None:
				163	self._payload = [payload]
				164	else:
				165	self._payload.append(payload)
				166
				167	def get_payload(self, i=None, decode=False):
				168	"""Return a reference to the payload.
				169
				170	The payload will either be a list object or a string. If you mutate
				171	the list object, you modify the message's payload in place. Optional
				172	i returns that index into the payload.
				173
				174	Optional decode is a flag indicating whether the payload should be
				175	decoded or not, according to the Content-Transfer-Encoding header
				176	(default is False).
				177
				178	When True and the message is not a multipart, the payload will be
				179	decoded if this header's value is `quoted-printable' or `base64'. If
				180	some other encoding is used, or the header is missing, or if the
				181	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				182	payload is returned as-is.
				183
				184	If the message is a multipart and the decode flag is True, then None
				185	is returned.
				186	"""
				187	if i is None:
				188	payload = self._payload
				189	elif not isinstance(self._payload, list):
				190	raise TypeError('Expected list, got %s' % type(self._payload))
				191	else:
				192	payload = self._payload[i]
				193	if not decode:
				194	return payload
				195	# Decoded payloads always return bytes. XXX split this part out into
				196	# a new method called .get_decoded_payload().
				197	if self.is_multipart():
				198	return None
				199	cte = self.get('content-transfer-encoding', '').lower()
				200	if cte == 'quoted-printable':
				201	return utils._qdecode(payload)
				202	elif cte == 'base64':
				203	try:
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	204	if isinstance(payload, str):
				205	payload = payload.encode('raw-unicode-escape')
				206	return base64.b64decode(payload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	207	except binascii.Error:
				208	# Incorrect padding
				209	pass
				210	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	211	in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	212	out_file = BytesIO()
				213	try:
				214	uu.decode(in_file, out_file, quiet=True)
				215	return out_file.getvalue()
				216	except uu.Error:
				217	# Some decoding problem
				218	pass
				219	# Is there a better way to do this? We can't use the bytes
				220	# constructor.
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	221	if isinstance(payload, str):
				222	return payload.encode('raw-unicode-escape')
				223	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	224
				225	def set_payload(self, payload, charset=None):
				226	"""Set the payload to the given value.
				227
				228	Optional charset sets the message's default character set. See
				229	set_charset() for details.
				230	"""
				231	self._payload = payload
				232	if charset is not None:
				233	self.set_charset(charset)
				234
				235	def set_charset(self, charset):
				236	"""Set the charset of the payload to a given character set.
				237
				238	charset can be a Charset instance, a string naming a character set, or
				239	None. If it is a string it will be converted to a Charset instance.
				240	If charset is None, the charset parameter will be removed from the
				241	Content-Type field. Anything else will generate a TypeError.
				242
				243	The message will be assumed to be of type text/* encoded with
				244	charset.input_charset. It will be converted to charset.output_charset
				245	and encoded properly, if needed, when generating the plain text
				246	representation of the message. MIME headers (MIME-Version,
				247	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	248	"""
				249	if charset is None:
				250	self.del_param('charset')
				251	self._charset = None
				252	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	253	if not isinstance(charset, Charset):
				254	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	255	self._charset = charset
				256	if 'MIME-Version' not in self:
				257	self.add_header('MIME-Version', '1.0')
				258	if 'Content-Type' not in self:
				259	self.add_header('Content-Type', 'text/plain',
				260	charset=charset.get_output_charset())
				261	else:
				262	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	263	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	264	self._payload = charset.body_encode(self._payload)
				265	if 'Content-Transfer-Encoding' not in self:
				266	cte = charset.get_body_encoding()
				267	try:
				268	cte(self)
				269	except TypeError:
				270	self._payload = charset.body_encode(self._payload)
				271	self.add_header('Content-Transfer-Encoding', cte)
				272
				273	def get_charset(self):
				274	"""Return the Charset instance associated with the message's payload.
				275	"""
				276	return self._charset
				277
				278	#
				279	# MAPPING INTERFACE (partial)
				280	#
				281	def __len__(self):
				282	"""Return the total number of headers, including duplicates."""
				283	return len(self._headers)
				284
				285	def __getitem__(self, name):
				286	"""Get a header value.
				287
				288	Return None if the header is missing instead of raising an exception.
				289
				290	Note that if the header appeared multiple times, exactly which
				291	occurrance gets returned is undefined. Use get_all() to get all
				292	the values matching a header field name.
				293	"""
				294	return self.get(name)
				295
				296	def __setitem__(self, name, val):
				297	"""Set the value of a header.
				298
				299	Note: this does not overwrite an existing header with the same field
				300	name. Use __delitem__() first to delete any existing headers.
				301	"""
				302	self._headers.append((name, val))
				303
				304	def __delitem__(self, name):
				305	"""Delete all occurrences of a header, if present.
				306
				307	Does not raise an exception if the header is missing.
				308	"""
				309	name = name.lower()
				310	newheaders = []
				311	for k, v in self._headers:
				312	if k.lower() != name:
				313	newheaders.append((k, v))
				314	self._headers = newheaders
				315
				316	def __contains__(self, name):
				317	return name.lower() in [k.lower() for k, v in self._headers]
				318
				319	def __iter__(self):
				320	for field, value in self._headers:
				321	yield field
				322
				323	def __len__(self):
				324	return len(self._headers)
				325
				326	def keys(self):
				327	"""Return a list of all the message's header field names.
				328
				329	These will be sorted in the order they appeared in the original
				330	message, or were added to the message, and may contain duplicates.
				331	Any fields deleted and re-inserted are always appended to the header
				332	list.
				333	"""
				334	return [k for k, v in self._headers]
				335
				336	def values(self):
				337	"""Return a list of all the message's header values.
				338
				339	These will be sorted in the order they appeared in the original
				340	message, or were added to the message, and may contain duplicates.
				341	Any fields deleted and re-inserted are always appended to the header
				342	list.
				343	"""
				344	return [v for k, v in self._headers]
				345
				346	def items(self):
				347	"""Get all the message's header fields and values.
				348
				349	These will be sorted in the order they appeared in the original
				350	message, or were added to the message, and may contain duplicates.
				351	Any fields deleted and re-inserted are always appended to the header
				352	list.
				353	"""
				354	return self._headers[:]
				355
				356	def get(self, name, failobj=None):
				357	"""Get a header value.
				358
				359	Like __getitem__() but return failobj instead of None when the field
				360	is missing.
				361	"""
				362	name = name.lower()
				363	for k, v in self._headers:
				364	if k.lower() == name:
				365	return v
				366	return failobj
				367
				368	#
				369	# Additional useful stuff
				370	#
				371
				372	def get_all(self, name, failobj=None):
				373	"""Return a list of all the values for the named field.
				374
				375	These will be sorted in the order they appeared in the original
				376	message, and may contain duplicates. Any fields deleted and
				377	re-inserted are always appended to the header list.
				378
				379	If no such fields exist, failobj is returned (defaults to None).
				380	"""
				381	values = []
				382	name = name.lower()
				383	for k, v in self._headers:
				384	if k.lower() == name:
				385	values.append(v)
				386	if not values:
				387	return failobj
				388	return values
				389
				390	def add_header(self, _name, _value, **_params):
				391	"""Extended header setting.
				392
				393	name is the header field to add. keyword arguments can be used to set
				394	additional parameters for the header field, with underscores converted
				395	to dashes. Normally the parameter will be added as key="value" unless
				396	value is None, in which case only the key will be added.
				397
				398	Example:
				399
				400	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
				401	"""
				402	parts = []
				403	for k, v in _params.items():
				404	if v is None:
				405	parts.append(k.replace('_', '-'))
				406	else:
				407	parts.append(_formatparam(k.replace('_', '-'), v))
				408	if _value is not None:
				409	parts.insert(0, _value)
				410	self._headers.append((_name, SEMISPACE.join(parts)))
				411
				412	def replace_header(self, _name, _value):
				413	"""Replace a header.
				414
				415	Replace the first matching header found in the message, retaining
				416	header order and case. If no matching header was found, a KeyError is
				417	raised.
				418	"""
				419	_name = _name.lower()
				420	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				421	if k.lower() == _name:
				422	self._headers[i] = (k, _value)
				423	break
				424	else:
				425	raise KeyError(_name)
				426
				427	#
				428	# Use these three methods instead of the three above.
				429	#
				430
				431	def get_content_type(self):
				432	"""Return the message's content type.
				433
				434	The returned string is coerced to lower case of the form
				435	`maintype/subtype'. If there was no Content-Type header in the
				436	message, the default type as given by get_default_type() will be
				437	returned. Since according to RFC 2045, messages always have a default
				438	type this will always return a value.
				439
				440	RFC 2045 defines a message's default type to be text/plain unless it
				441	appears inside a multipart/digest container, in which case it would be
				442	message/rfc822.
				443	"""
				444	missing = object()
				445	value = self.get('content-type', missing)
				446	if value is missing:
				447	# This should have no parameters
				448	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	449	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	450	# RFC 2045, section 5.2 says if its invalid, use text/plain
				451	if ctype.count('/') != 1:
				452	return 'text/plain'
				453	return ctype
				454
				455	def get_content_maintype(self):
				456	"""Return the message's main content type.
				457
				458	This is the `maintype' part of the string returned by
				459	get_content_type().
				460	"""
				461	ctype = self.get_content_type()
				462	return ctype.split('/')[0]
				463
				464	def get_content_subtype(self):
				465	"""Returns the message's sub-content type.
				466
				467	This is the `subtype' part of the string returned by
				468	get_content_type().
				469	"""
				470	ctype = self.get_content_type()
				471	return ctype.split('/')[1]
				472
				473	def get_default_type(self):
				474	"""Return the `default' content type.
				475
				476	Most messages have a default content type of text/plain, except for
				477	messages that are subparts of multipart/digest containers. Such
				478	subparts have a default content type of message/rfc822.
				479	"""
				480	return self._default_type
				481
				482	def set_default_type(self, ctype):
				483	"""Set the `default' content type.
				484
				485	ctype should be either "text/plain" or "message/rfc822", although this
				486	is not enforced. The default content type is not stored in the
				487	Content-Type header.
				488	"""
				489	self._default_type = ctype
				490
				491	def _get_params_preserve(self, failobj, header):
				492	# Like get_params() but preserves the quoting of values. BAW:
				493	# should this be part of the public interface?
				494	missing = object()
				495	value = self.get(header, missing)
				496	if value is missing:
				497	return failobj
				498	params = []
				499	for p in _parseparam(';' + value):
				500	try:
				501	name, val = p.split('=', 1)
				502	name = name.strip()
				503	val = val.strip()
				504	except ValueError:
				505	# Must have been a bare attribute
				506	name = p.strip()
				507	val = ''
				508	params.append((name, val))
				509	params = utils.decode_params(params)
				510	return params
				511
				512	def get_params(self, failobj=None, header='content-type', unquote=True):
				513	"""Return the message's Content-Type parameters, as a list.
				514
				515	The elements of the returned list are 2-tuples of key/value pairs, as
				516	split on the `=' sign. The left hand side of the `=' is the key,
				517	while the right hand side is the value. If there is no `=' sign in
				518	the parameter the value is the empty string. The value is as
				519	described in the get_param() method.
				520
				521	Optional failobj is the object to return if there is no Content-Type
				522	header. Optional header is the header to search instead of
				523	Content-Type. If unquote is True, the value is unquoted.
				524	"""
				525	missing = object()
				526	params = self._get_params_preserve(missing, header)
				527	if params is missing:
				528	return failobj
				529	if unquote:
				530	return [(k, _unquotevalue(v)) for k, v in params]
				531	else:
				532	return params
				533
				534	def get_param(self, param, failobj=None, header='content-type',
				535	unquote=True):
				536	"""Return the parameter value if found in the Content-Type header.
				537
				538	Optional failobj is the object to return if there is no Content-Type
				539	header, or the Content-Type header has no such parameter. Optional
				540	header is the header to search instead of Content-Type.
				541
				542	Parameter keys are always compared case insensitively. The return
				543	value can either be a string, or a 3-tuple if the parameter was RFC
				544	2231 encoded. When it's a 3-tuple, the elements of the value are of
				545	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				546	LANGUAGE can be None, in which case you should consider VALUE to be
				547	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				548
				549	Your application should be prepared to deal with 3-tuple return
				550	values, and can convert the parameter to a Unicode string like so:
				551
				552	param = msg.get_param('foo')
				553	if isinstance(param, tuple):
				554	param = unicode(param[2], param[0] or 'us-ascii')
				555
				556	In any case, the parameter value (either the returned string, or the
				557	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				558	to False.
				559	"""
				560	if header not in self:
				561	return failobj
				562	for k, v in self._get_params_preserve(failobj, header):
				563	if k.lower() == param.lower():
				564	if unquote:
				565	return _unquotevalue(v)
				566	else:
				567	return v
				568	return failobj
				569
				570	def set_param(self, param, value, header='Content-Type', requote=True,
				571	charset=None, language=''):
				572	"""Set a parameter in the Content-Type header.
				573
				574	If the parameter already exists in the header, its value will be
				575	replaced with the new value.
				576
				577	If header is Content-Type and has not yet been defined for this
				578	message, it will be set to "text/plain" and the new parameter and
				579	value will be appended as per RFC 2045.
				580
				581	An alternate header can specified in the header argument, and all
				582	parameters will be quoted as necessary unless requote is False.
				583
				584	If charset is specified, the parameter will be encoded according to RFC
				585	2231. Optional language specifies the RFC 2231 language, defaulting
				586	to the empty string. Both charset and language should be strings.
				587	"""
				588	if not isinstance(value, tuple) and charset:
				589	value = (charset, language, value)
				590
				591	if header not in self and header.lower() == 'content-type':
				592	ctype = 'text/plain'
				593	else:
				594	ctype = self.get(header)
				595	if not self.get_param(param, header=header):
				596	if not ctype:
				597	ctype = _formatparam(param, value, requote)
				598	else:
				599	ctype = SEMISPACE.join(
				600	[ctype, _formatparam(param, value, requote)])
				601	else:
				602	ctype = ''
				603	for old_param, old_value in self.get_params(header=header,
				604	unquote=requote):
				605	append_param = ''
				606	if old_param.lower() == param.lower():
				607	append_param = _formatparam(param, value, requote)
				608	else:
				609	append_param = _formatparam(old_param, old_value, requote)
				610	if not ctype:
				611	ctype = append_param
				612	else:
				613	ctype = SEMISPACE.join([ctype, append_param])
				614	if ctype != self.get(header):
				615	del self[header]
				616	self[header] = ctype
				617
				618	def del_param(self, param, header='content-type', requote=True):
				619	"""Remove the given parameter completely from the Content-Type header.
				620
				621	The header will be re-written in place without the parameter or its
				622	value. All values will be quoted as necessary unless requote is
				623	False. Optional header specifies an alternative to the Content-Type
				624	header.
				625	"""
				626	if header not in self:
				627	return
				628	new_ctype = ''
				629	for p, v in self.get_params(header=header, unquote=requote):
				630	if p.lower() != param.lower():
				631	if not new_ctype:
				632	new_ctype = _formatparam(p, v, requote)
				633	else:
				634	new_ctype = SEMISPACE.join([new_ctype,
				635	_formatparam(p, v, requote)])
				636	if new_ctype != self.get(header):
				637	del self[header]
				638	self[header] = new_ctype
				639
				640	def set_type(self, type, header='Content-Type', requote=True):
				641	"""Set the main type and subtype for the Content-Type header.
				642
				643	type must be a string in the form "maintype/subtype", otherwise a
				644	ValueError is raised.
				645
				646	This method replaces the Content-Type header, keeping all the
				647	parameters in place. If requote is False, this leaves the existing
				648	header's quoting as is. Otherwise, the parameters will be quoted (the
				649	default).
				650
				651	An alternative header can be specified in the header argument. When
				652	the Content-Type header is set, we'll always also add a MIME-Version
				653	header.
				654	"""
				655	# BAW: should we be strict?
				656	if not type.count('/') == 1:
				657	raise ValueError
				658	# Set the Content-Type, you get a MIME-Version
				659	if header.lower() == 'content-type':
				660	del self['mime-version']
				661	self['MIME-Version'] = '1.0'
				662	if header not in self:
				663	self[header] = type
				664	return
				665	params = self.get_params(header=header, unquote=requote)
				666	del self[header]
				667	self[header] = type
				668	# Skip the first param; it's the old type.
				669	for p, v in params[1:]:
				670	self.set_param(p, v, header, requote)
				671
				672	def get_filename(self, failobj=None):
				673	"""Return the filename associated with the payload if present.
				674
				675	The filename is extracted from the Content-Disposition header's
				676	`filename' parameter, and it is unquoted. If that header is missing
				677	the `filename' parameter, this method falls back to looking for the
				678	`name' parameter.
				679	"""
				680	missing = object()
				681	filename = self.get_param('filename', missing, 'content-disposition')
				682	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	683	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	684	if filename is missing:
				685	return failobj
				686	return utils.collapse_rfc2231_value(filename).strip()
				687
				688	def get_boundary(self, failobj=None):
				689	"""Return the boundary associated with the payload if present.
				690
				691	The boundary is extracted from the Content-Type header's `boundary'
				692	parameter, and it is unquoted.
				693	"""
				694	missing = object()
				695	boundary = self.get_param('boundary', missing)
				696	if boundary is missing:
				697	return failobj
				698	# RFC 2046 says that boundaries may begin but not end in w/s
				699	return utils.collapse_rfc2231_value(boundary).rstrip()
				700
				701	def set_boundary(self, boundary):
				702	"""Set the boundary parameter in Content-Type to 'boundary'.
				703
				704	This is subtly different than deleting the Content-Type header and
				705	adding a new one with a new boundary parameter via add_header(). The
				706	main difference is that using the set_boundary() method preserves the
				707	order of the Content-Type header in the original message.
				708
				709	HeaderParseError is raised if the message has no Content-Type header.
				710	"""
				711	missing = object()
				712	params = self._get_params_preserve(missing, 'content-type')
				713	if params is missing:
				714	# There was no Content-Type header, and we don't know what type
				715	# to set it to, so raise an exception.
				716	raise errors.HeaderParseError('No Content-Type header found')
				717	newparams = []
				718	foundp = False
				719	for pk, pv in params:
				720	if pk.lower() == 'boundary':
				721	newparams.append(('boundary', '"%s"' % boundary))
				722	foundp = True
				723	else:
				724	newparams.append((pk, pv))
				725	if not foundp:
				726	# The original Content-Type header had no boundary attribute.
				727	# Tack one on the end. BAW: should we raise an exception
				728	# instead???
				729	newparams.append(('boundary', '"%s"' % boundary))
				730	# Replace the existing Content-Type header with the new value
				731	newheaders = []
				732	for h, v in self._headers:
				733	if h.lower() == 'content-type':
				734	parts = []
				735	for k, v in newparams:
				736	if v == '':
				737	parts.append(k)
				738	else:
				739	parts.append('%s=%s' % (k, v))
				740	newheaders.append((h, SEMISPACE.join(parts)))
				741
				742	else:
				743	newheaders.append((h, v))
				744	self._headers = newheaders
				745
				746	def get_content_charset(self, failobj=None):
				747	"""Return the charset parameter of the Content-Type header.
				748
				749	The returned string is always coerced to lower case. If there is no
				750	Content-Type header, or if that header has no charset parameter,
				751	failobj is returned.
				752	"""
				753	missing = object()
				754	charset = self.get_param('charset', missing)
				755	if charset is missing:
				756	return failobj
				757	if isinstance(charset, tuple):
				758	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				759	pcharset = charset[0] or 'us-ascii'
				760	try:
				761	# LookupError will be raised if the charset isn't known to
				762	# Python. UnicodeError will be raised if the encoded text
				763	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	764	as_bytes = charset[2].encode('raw-unicode-escape')
				765	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	766	except (LookupError, UnicodeError):
				767	charset = charset[2]
				768	# charset characters must be in us-ascii range
				769	try:
				770	charset.encode('us-ascii')
				771	except UnicodeError:
				772	return failobj
				773	# RFC 2046, $4.1.2 says charsets are not case sensitive
				774	return charset.lower()
				775
				776	def get_charsets(self, failobj=None):
				777	"""Return a list containing the charset(s) used in this message.
				778
				779	The returned list of items describes the Content-Type headers'
				780	charset parameter for this message and all the subparts in its
				781	payload.
				782
				783	Each item will either be a string (the value of the charset parameter
				784	in the Content-Type header of that part) or the value of the
				785	'failobj' parameter (defaults to None), if the part does not have a
				786	main MIME type of "text", or the charset is not defined.
				787
				788	The list will contain one string for each part of the message, plus
				789	one for the container message (i.e. self), so that a non-multipart
				790	message will still return a list of length 1.
				791	"""
				792	return [part.get_content_charset(failobj) for part in self.walk()]
				793
				794	# I.e. def walk(self): ...
				795	from email.iterators import walk