Blame - Lib/email/message.py - platform/external/python/cpython3

blob: ff262c7c8f5f925f1b4488a13d2c89f6fdc8fe43 [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	19	from email.charset import Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	20
				21	SEMISPACE = '; '
				22
				23	# Regular expression used to split header parameters. BAW: this may be too
				24	# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
				25	# most headers found in the wild. We may eventually need a full fledged
				26	# parser eventually.
				27	paramre = re.compile(r'\s;\s')
				28	# Regular expression that matches `special' characters in parameters, the
				29	# existance of which force quoting of the parameter value.
				30	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				31
				32
				33
				34	# Helper functions
				35	def _formatparam(param, value=None, quote=True):
				36	"""Convenience function to format and return a key=value pair.
				37
				38	This will quote the value if needed or if quote is true.
				39	"""
				40	if value is not None and len(value) > 0:
				41	# A tuple is used for RFC 2231 encoded parameter values where items
				42	# are (charset, language, value). charset is a string, not a Charset
				43	# instance.
				44	if isinstance(value, tuple):
				45	# Encode as per RFC 2231
				46	param += '*'
				47	value = utils.encode_rfc2231(value[2], value[0], value[1])
				48	# BAW: Please check this. I think that if quote is set it should
				49	# force quoting even if not necessary.
				50	if quote or tspecials.search(value):
				51	return '%s="%s"' % (param, utils.quote(value))
				52	else:
				53	return '%s=%s' % (param, value)
				54	else:
				55	return param
				56
				57	def _parseparam(s):
				58	plist = []
				59	while s[:1] == ';':
				60	s = s[1:]
				61	end = s.find(';')
				62	while end > 0 and s.count('"', 0, end) % 2:
				63	end = s.find(';', end + 1)
				64	if end < 0:
				65	end = len(s)
				66	f = s[:end]
				67	if '=' in f:
				68	i = f.index('=')
				69	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				70	plist.append(f.strip())
				71	s = s[end:]
				72	return plist
				73
				74
				75	def _unquotevalue(value):
				76	# This is different than utils.collapse_rfc2231_value() because it doesn't
				77	# try to convert the value to a unicode. Message.get_param() and
				78	# Message.get_params() are both currently defined to return the tuple in
				79	# the face of RFC 2231 parameters.
				80	if isinstance(value, tuple):
				81	return value[0], value[1], utils.unquote(value[2])
				82	else:
				83	return utils.unquote(value)
				84
				85
				86
				87	class Message:
				88	"""Basic message object.
				89
				90	A message object is defined as something that has a bunch of RFC 2822
				91	headers and a payload. It may optionally have an envelope header
				92	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				93	multipart or a message/rfc822), then the payload is a list of Message
				94	objects, otherwise it is a string.
				95
				96	Message objects implement part of the `mapping' interface, which assumes
				97	there is exactly one occurrance of the header per message. Some headers
				98	do in fact appear multiple times (e.g. Received) and for those headers,
				99	you must use the explicit API to set or get all the headers. Not all of
				100	the mapping methods are implemented.
				101	"""
				102	def __init__(self):
				103	self._headers = []
				104	self._unixfrom = None
				105	self._payload = None
				106	self._charset = None
				107	# Defaults for multipart messages
				108	self.preamble = self.epilogue = None
				109	self.defects = []
				110	# Default content type
				111	self._default_type = 'text/plain'
				112
				113	def __str__(self):
				114	"""Return the entire formatted message as a string.
				115	This includes the headers, body, and envelope header.
				116	"""
				117	return self.as_string()
				118
				119	def as_string(self, unixfrom=False, maxheaderlen=0):
				120	"""Return the entire formatted message as a string.
				121	Optional `unixfrom' when True, means include the Unix From_ envelope
				122	header.
				123
				124	This is a convenience method and may not generate the message exactly
				125	as you intend because by default it mangles lines that begin with
				126	"From ". For more flexibility, use the flatten() method of a
				127	Generator instance.
				128	"""
				129	from email.generator import Generator
				130	fp = StringIO()
				131	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				132	g.flatten(self, unixfrom=unixfrom)
				133	return fp.getvalue()
				134
				135	def is_multipart(self):
				136	"""Return True if the message consists of multiple parts."""
				137	return isinstance(self._payload, list)
				138
				139	#
				140	# Unix From_ line
				141	#
				142	def set_unixfrom(self, unixfrom):
				143	self._unixfrom = unixfrom
				144
				145	def get_unixfrom(self):
				146	return self._unixfrom
				147
				148	#
				149	# Payload manipulation.
				150	#
				151	def attach(self, payload):
				152	"""Add the given payload to the current payload.
				153
				154	The current payload will always be a list of objects after this method
				155	is called. If you want to set the payload to a scalar object, use
				156	set_payload() instead.
				157	"""
				158	if self._payload is None:
				159	self._payload = [payload]
				160	else:
				161	self._payload.append(payload)
				162
				163	def get_payload(self, i=None, decode=False):
				164	"""Return a reference to the payload.
				165
				166	The payload will either be a list object or a string. If you mutate
				167	the list object, you modify the message's payload in place. Optional
				168	i returns that index into the payload.
				169
				170	Optional decode is a flag indicating whether the payload should be
				171	decoded or not, according to the Content-Transfer-Encoding header
				172	(default is False).
				173
				174	When True and the message is not a multipart, the payload will be
				175	decoded if this header's value is `quoted-printable' or `base64'. If
				176	some other encoding is used, or the header is missing, or if the
				177	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				178	payload is returned as-is.
				179
				180	If the message is a multipart and the decode flag is True, then None
				181	is returned.
				182	"""
				183	if i is None:
				184	payload = self._payload
				185	elif not isinstance(self._payload, list):
				186	raise TypeError('Expected list, got %s' % type(self._payload))
				187	else:
				188	payload = self._payload[i]
				189	if not decode:
				190	return payload
				191	# Decoded payloads always return bytes. XXX split this part out into
				192	# a new method called .get_decoded_payload().
				193	if self.is_multipart():
				194	return None
				195	cte = self.get('content-transfer-encoding', '').lower()
				196	if cte == 'quoted-printable':
				197	return utils._qdecode(payload)
				198	elif cte == 'base64':
				199	try:
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	200	if isinstance(payload, str):
				201	payload = payload.encode('raw-unicode-escape')
				202	return base64.b64decode(payload)
				203	#return utils._bdecode(payload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	204	except binascii.Error:
				205	# Incorrect padding
				206	pass
				207	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	208	in_file = BytesIO(payload.encode('raw-unicode-escape'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	209	out_file = BytesIO()
				210	try:
				211	uu.decode(in_file, out_file, quiet=True)
				212	return out_file.getvalue()
				213	except uu.Error:
				214	# Some decoding problem
				215	pass
				216	# Is there a better way to do this? We can't use the bytes
				217	# constructor.
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	218	if isinstance(payload, str):
				219	return payload.encode('raw-unicode-escape')
				220	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	221
				222	def set_payload(self, payload, charset=None):
				223	"""Set the payload to the given value.
				224
				225	Optional charset sets the message's default character set. See
				226	set_charset() for details.
				227	"""
				228	self._payload = payload
				229	if charset is not None:
				230	self.set_charset(charset)
				231
				232	def set_charset(self, charset):
				233	"""Set the charset of the payload to a given character set.
				234
				235	charset can be a Charset instance, a string naming a character set, or
				236	None. If it is a string it will be converted to a Charset instance.
				237	If charset is None, the charset parameter will be removed from the
				238	Content-Type field. Anything else will generate a TypeError.
				239
				240	The message will be assumed to be of type text/* encoded with
				241	charset.input_charset. It will be converted to charset.output_charset
				242	and encoded properly, if needed, when generating the plain text
				243	representation of the message. MIME headers (MIME-Version,
				244	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	245	"""
				246	if charset is None:
				247	self.del_param('charset')
				248	self._charset = None
				249	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	250	if not isinstance(charset, Charset):
				251	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	252	self._charset = charset
				253	if 'MIME-Version' not in self:
				254	self.add_header('MIME-Version', '1.0')
				255	if 'Content-Type' not in self:
				256	self.add_header('Content-Type', 'text/plain',
				257	charset=charset.get_output_charset())
				258	else:
				259	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	260	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	261	self._payload = charset.body_encode(self._payload)
				262	if 'Content-Transfer-Encoding' not in self:
				263	cte = charset.get_body_encoding()
				264	try:
				265	cte(self)
				266	except TypeError:
				267	self._payload = charset.body_encode(self._payload)
				268	self.add_header('Content-Transfer-Encoding', cte)
				269
				270	def get_charset(self):
				271	"""Return the Charset instance associated with the message's payload.
				272	"""
				273	return self._charset
				274
				275	#
				276	# MAPPING INTERFACE (partial)
				277	#
				278	def __len__(self):
				279	"""Return the total number of headers, including duplicates."""
				280	return len(self._headers)
				281
				282	def __getitem__(self, name):
				283	"""Get a header value.
				284
				285	Return None if the header is missing instead of raising an exception.
				286
				287	Note that if the header appeared multiple times, exactly which
				288	occurrance gets returned is undefined. Use get_all() to get all
				289	the values matching a header field name.
				290	"""
				291	return self.get(name)
				292
				293	def __setitem__(self, name, val):
				294	"""Set the value of a header.
				295
				296	Note: this does not overwrite an existing header with the same field
				297	name. Use __delitem__() first to delete any existing headers.
				298	"""
				299	self._headers.append((name, val))
				300
				301	def __delitem__(self, name):
				302	"""Delete all occurrences of a header, if present.
				303
				304	Does not raise an exception if the header is missing.
				305	"""
				306	name = name.lower()
				307	newheaders = []
				308	for k, v in self._headers:
				309	if k.lower() != name:
				310	newheaders.append((k, v))
				311	self._headers = newheaders
				312
				313	def __contains__(self, name):
				314	return name.lower() in [k.lower() for k, v in self._headers]
				315
				316	def __iter__(self):
				317	for field, value in self._headers:
				318	yield field
				319
				320	def __len__(self):
				321	return len(self._headers)
				322
				323	def keys(self):
				324	"""Return a list of all the message's header field names.
				325
				326	These will be sorted in the order they appeared in the original
				327	message, or were added to the message, and may contain duplicates.
				328	Any fields deleted and re-inserted are always appended to the header
				329	list.
				330	"""
				331	return [k for k, v in self._headers]
				332
				333	def values(self):
				334	"""Return a list of all the message's header values.
				335
				336	These will be sorted in the order they appeared in the original
				337	message, or were added to the message, and may contain duplicates.
				338	Any fields deleted and re-inserted are always appended to the header
				339	list.
				340	"""
				341	return [v for k, v in self._headers]
				342
				343	def items(self):
				344	"""Get all the message's header fields and values.
				345
				346	These will be sorted in the order they appeared in the original
				347	message, or were added to the message, and may contain duplicates.
				348	Any fields deleted and re-inserted are always appended to the header
				349	list.
				350	"""
				351	return self._headers[:]
				352
				353	def get(self, name, failobj=None):
				354	"""Get a header value.
				355
				356	Like __getitem__() but return failobj instead of None when the field
				357	is missing.
				358	"""
				359	name = name.lower()
				360	for k, v in self._headers:
				361	if k.lower() == name:
				362	return v
				363	return failobj
				364
				365	#
				366	# Additional useful stuff
				367	#
				368
				369	def get_all(self, name, failobj=None):
				370	"""Return a list of all the values for the named field.
				371
				372	These will be sorted in the order they appeared in the original
				373	message, and may contain duplicates. Any fields deleted and
				374	re-inserted are always appended to the header list.
				375
				376	If no such fields exist, failobj is returned (defaults to None).
				377	"""
				378	values = []
				379	name = name.lower()
				380	for k, v in self._headers:
				381	if k.lower() == name:
				382	values.append(v)
				383	if not values:
				384	return failobj
				385	return values
				386
				387	def add_header(self, _name, _value, **_params):
				388	"""Extended header setting.
				389
				390	name is the header field to add. keyword arguments can be used to set
				391	additional parameters for the header field, with underscores converted
				392	to dashes. Normally the parameter will be added as key="value" unless
				393	value is None, in which case only the key will be added.
				394
				395	Example:
				396
				397	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
				398	"""
				399	parts = []
				400	for k, v in _params.items():
				401	if v is None:
				402	parts.append(k.replace('_', '-'))
				403	else:
				404	parts.append(_formatparam(k.replace('_', '-'), v))
				405	if _value is not None:
				406	parts.insert(0, _value)
				407	self._headers.append((_name, SEMISPACE.join(parts)))
				408
				409	def replace_header(self, _name, _value):
				410	"""Replace a header.
				411
				412	Replace the first matching header found in the message, retaining
				413	header order and case. If no matching header was found, a KeyError is
				414	raised.
				415	"""
				416	_name = _name.lower()
				417	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				418	if k.lower() == _name:
				419	self._headers[i] = (k, _value)
				420	break
				421	else:
				422	raise KeyError(_name)
				423
				424	#
				425	# Use these three methods instead of the three above.
				426	#
				427
				428	def get_content_type(self):
				429	"""Return the message's content type.
				430
				431	The returned string is coerced to lower case of the form
				432	`maintype/subtype'. If there was no Content-Type header in the
				433	message, the default type as given by get_default_type() will be
				434	returned. Since according to RFC 2045, messages always have a default
				435	type this will always return a value.
				436
				437	RFC 2045 defines a message's default type to be text/plain unless it
				438	appears inside a multipart/digest container, in which case it would be
				439	message/rfc822.
				440	"""
				441	missing = object()
				442	value = self.get('content-type', missing)
				443	if value is missing:
				444	# This should have no parameters
				445	return self.get_default_type()
				446	ctype = paramre.split(value)[0].lower().strip()
				447	# RFC 2045, section 5.2 says if its invalid, use text/plain
				448	if ctype.count('/') != 1:
				449	return 'text/plain'
				450	return ctype
				451
				452	def get_content_maintype(self):
				453	"""Return the message's main content type.
				454
				455	This is the `maintype' part of the string returned by
				456	get_content_type().
				457	"""
				458	ctype = self.get_content_type()
				459	return ctype.split('/')[0]
				460
				461	def get_content_subtype(self):
				462	"""Returns the message's sub-content type.
				463
				464	This is the `subtype' part of the string returned by
				465	get_content_type().
				466	"""
				467	ctype = self.get_content_type()
				468	return ctype.split('/')[1]
				469
				470	def get_default_type(self):
				471	"""Return the `default' content type.
				472
				473	Most messages have a default content type of text/plain, except for
				474	messages that are subparts of multipart/digest containers. Such
				475	subparts have a default content type of message/rfc822.
				476	"""
				477	return self._default_type
				478
				479	def set_default_type(self, ctype):
				480	"""Set the `default' content type.
				481
				482	ctype should be either "text/plain" or "message/rfc822", although this
				483	is not enforced. The default content type is not stored in the
				484	Content-Type header.
				485	"""
				486	self._default_type = ctype
				487
				488	def _get_params_preserve(self, failobj, header):
				489	# Like get_params() but preserves the quoting of values. BAW:
				490	# should this be part of the public interface?
				491	missing = object()
				492	value = self.get(header, missing)
				493	if value is missing:
				494	return failobj
				495	params = []
				496	for p in _parseparam(';' + value):
				497	try:
				498	name, val = p.split('=', 1)
				499	name = name.strip()
				500	val = val.strip()
				501	except ValueError:
				502	# Must have been a bare attribute
				503	name = p.strip()
				504	val = ''
				505	params.append((name, val))
				506	params = utils.decode_params(params)
				507	return params
				508
				509	def get_params(self, failobj=None, header='content-type', unquote=True):
				510	"""Return the message's Content-Type parameters, as a list.
				511
				512	The elements of the returned list are 2-tuples of key/value pairs, as
				513	split on the `=' sign. The left hand side of the `=' is the key,
				514	while the right hand side is the value. If there is no `=' sign in
				515	the parameter the value is the empty string. The value is as
				516	described in the get_param() method.
				517
				518	Optional failobj is the object to return if there is no Content-Type
				519	header. Optional header is the header to search instead of
				520	Content-Type. If unquote is True, the value is unquoted.
				521	"""
				522	missing = object()
				523	params = self._get_params_preserve(missing, header)
				524	if params is missing:
				525	return failobj
				526	if unquote:
				527	return [(k, _unquotevalue(v)) for k, v in params]
				528	else:
				529	return params
				530
				531	def get_param(self, param, failobj=None, header='content-type',
				532	unquote=True):
				533	"""Return the parameter value if found in the Content-Type header.
				534
				535	Optional failobj is the object to return if there is no Content-Type
				536	header, or the Content-Type header has no such parameter. Optional
				537	header is the header to search instead of Content-Type.
				538
				539	Parameter keys are always compared case insensitively. The return
				540	value can either be a string, or a 3-tuple if the parameter was RFC
				541	2231 encoded. When it's a 3-tuple, the elements of the value are of
				542	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				543	LANGUAGE can be None, in which case you should consider VALUE to be
				544	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				545
				546	Your application should be prepared to deal with 3-tuple return
				547	values, and can convert the parameter to a Unicode string like so:
				548
				549	param = msg.get_param('foo')
				550	if isinstance(param, tuple):
				551	param = unicode(param[2], param[0] or 'us-ascii')
				552
				553	In any case, the parameter value (either the returned string, or the
				554	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				555	to False.
				556	"""
				557	if header not in self:
				558	return failobj
				559	for k, v in self._get_params_preserve(failobj, header):
				560	if k.lower() == param.lower():
				561	if unquote:
				562	return _unquotevalue(v)
				563	else:
				564	return v
				565	return failobj
				566
				567	def set_param(self, param, value, header='Content-Type', requote=True,
				568	charset=None, language=''):
				569	"""Set a parameter in the Content-Type header.
				570
				571	If the parameter already exists in the header, its value will be
				572	replaced with the new value.
				573
				574	If header is Content-Type and has not yet been defined for this
				575	message, it will be set to "text/plain" and the new parameter and
				576	value will be appended as per RFC 2045.
				577
				578	An alternate header can specified in the header argument, and all
				579	parameters will be quoted as necessary unless requote is False.
				580
				581	If charset is specified, the parameter will be encoded according to RFC
				582	2231. Optional language specifies the RFC 2231 language, defaulting
				583	to the empty string. Both charset and language should be strings.
				584	"""
				585	if not isinstance(value, tuple) and charset:
				586	value = (charset, language, value)
				587
				588	if header not in self and header.lower() == 'content-type':
				589	ctype = 'text/plain'
				590	else:
				591	ctype = self.get(header)
				592	if not self.get_param(param, header=header):
				593	if not ctype:
				594	ctype = _formatparam(param, value, requote)
				595	else:
				596	ctype = SEMISPACE.join(
				597	[ctype, _formatparam(param, value, requote)])
				598	else:
				599	ctype = ''
				600	for old_param, old_value in self.get_params(header=header,
				601	unquote=requote):
				602	append_param = ''
				603	if old_param.lower() == param.lower():
				604	append_param = _formatparam(param, value, requote)
				605	else:
				606	append_param = _formatparam(old_param, old_value, requote)
				607	if not ctype:
				608	ctype = append_param
				609	else:
				610	ctype = SEMISPACE.join([ctype, append_param])
				611	if ctype != self.get(header):
				612	del self[header]
				613	self[header] = ctype
				614
				615	def del_param(self, param, header='content-type', requote=True):
				616	"""Remove the given parameter completely from the Content-Type header.
				617
				618	The header will be re-written in place without the parameter or its
				619	value. All values will be quoted as necessary unless requote is
				620	False. Optional header specifies an alternative to the Content-Type
				621	header.
				622	"""
				623	if header not in self:
				624	return
				625	new_ctype = ''
				626	for p, v in self.get_params(header=header, unquote=requote):
				627	if p.lower() != param.lower():
				628	if not new_ctype:
				629	new_ctype = _formatparam(p, v, requote)
				630	else:
				631	new_ctype = SEMISPACE.join([new_ctype,
				632	_formatparam(p, v, requote)])
				633	if new_ctype != self.get(header):
				634	del self[header]
				635	self[header] = new_ctype
				636
				637	def set_type(self, type, header='Content-Type', requote=True):
				638	"""Set the main type and subtype for the Content-Type header.
				639
				640	type must be a string in the form "maintype/subtype", otherwise a
				641	ValueError is raised.
				642
				643	This method replaces the Content-Type header, keeping all the
				644	parameters in place. If requote is False, this leaves the existing
				645	header's quoting as is. Otherwise, the parameters will be quoted (the
				646	default).
				647
				648	An alternative header can be specified in the header argument. When
				649	the Content-Type header is set, we'll always also add a MIME-Version
				650	header.
				651	"""
				652	# BAW: should we be strict?
				653	if not type.count('/') == 1:
				654	raise ValueError
				655	# Set the Content-Type, you get a MIME-Version
				656	if header.lower() == 'content-type':
				657	del self['mime-version']
				658	self['MIME-Version'] = '1.0'
				659	if header not in self:
				660	self[header] = type
				661	return
				662	params = self.get_params(header=header, unquote=requote)
				663	del self[header]
				664	self[header] = type
				665	# Skip the first param; it's the old type.
				666	for p, v in params[1:]:
				667	self.set_param(p, v, header, requote)
				668
				669	def get_filename(self, failobj=None):
				670	"""Return the filename associated with the payload if present.
				671
				672	The filename is extracted from the Content-Disposition header's
				673	`filename' parameter, and it is unquoted. If that header is missing
				674	the `filename' parameter, this method falls back to looking for the
				675	`name' parameter.
				676	"""
				677	missing = object()
				678	filename = self.get_param('filename', missing, 'content-disposition')
				679	if filename is missing:
				680	filename = self.get_param('name', missing, 'content-disposition')
				681	if filename is missing:
				682	return failobj
				683	return utils.collapse_rfc2231_value(filename).strip()
				684
				685	def get_boundary(self, failobj=None):
				686	"""Return the boundary associated with the payload if present.
				687
				688	The boundary is extracted from the Content-Type header's `boundary'
				689	parameter, and it is unquoted.
				690	"""
				691	missing = object()
				692	boundary = self.get_param('boundary', missing)
				693	if boundary is missing:
				694	return failobj
				695	# RFC 2046 says that boundaries may begin but not end in w/s
				696	return utils.collapse_rfc2231_value(boundary).rstrip()
				697
				698	def set_boundary(self, boundary):
				699	"""Set the boundary parameter in Content-Type to 'boundary'.
				700
				701	This is subtly different than deleting the Content-Type header and
				702	adding a new one with a new boundary parameter via add_header(). The
				703	main difference is that using the set_boundary() method preserves the
				704	order of the Content-Type header in the original message.
				705
				706	HeaderParseError is raised if the message has no Content-Type header.
				707	"""
				708	missing = object()
				709	params = self._get_params_preserve(missing, 'content-type')
				710	if params is missing:
				711	# There was no Content-Type header, and we don't know what type
				712	# to set it to, so raise an exception.
				713	raise errors.HeaderParseError('No Content-Type header found')
				714	newparams = []
				715	foundp = False
				716	for pk, pv in params:
				717	if pk.lower() == 'boundary':
				718	newparams.append(('boundary', '"%s"' % boundary))
				719	foundp = True
				720	else:
				721	newparams.append((pk, pv))
				722	if not foundp:
				723	# The original Content-Type header had no boundary attribute.
				724	# Tack one on the end. BAW: should we raise an exception
				725	# instead???
				726	newparams.append(('boundary', '"%s"' % boundary))
				727	# Replace the existing Content-Type header with the new value
				728	newheaders = []
				729	for h, v in self._headers:
				730	if h.lower() == 'content-type':
				731	parts = []
				732	for k, v in newparams:
				733	if v == '':
				734	parts.append(k)
				735	else:
				736	parts.append('%s=%s' % (k, v))
				737	newheaders.append((h, SEMISPACE.join(parts)))
				738
				739	else:
				740	newheaders.append((h, v))
				741	self._headers = newheaders
				742
				743	def get_content_charset(self, failobj=None):
				744	"""Return the charset parameter of the Content-Type header.
				745
				746	The returned string is always coerced to lower case. If there is no
				747	Content-Type header, or if that header has no charset parameter,
				748	failobj is returned.
				749	"""
				750	missing = object()
				751	charset = self.get_param('charset', missing)
				752	if charset is missing:
				753	return failobj
				754	if isinstance(charset, tuple):
				755	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				756	pcharset = charset[0] or 'us-ascii'
				757	try:
				758	# LookupError will be raised if the charset isn't known to
				759	# Python. UnicodeError will be raised if the encoded text
				760	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	761	as_bytes = charset[2].encode('raw-unicode-escape')
				762	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	763	except (LookupError, UnicodeError):
				764	charset = charset[2]
				765	# charset characters must be in us-ascii range
				766	try:
				767	charset.encode('us-ascii')
				768	except UnicodeError:
				769	return failobj
				770	# RFC 2046, $4.1.2 says charsets are not case sensitive
				771	return charset.lower()
				772
				773	def get_charsets(self, failobj=None):
				774	"""Return a list containing the charset(s) used in this message.
				775
				776	The returned list of items describes the Content-Type headers'
				777	charset parameter for this message and all the subparts in its
				778	payload.
				779
				780	Each item will either be a string (the value of the charset parameter
				781	in the Content-Type header of that part) or the value of the
				782	'failobj' parameter (defaults to None), if the part does not have a
				783	main MIME type of "text", or the charset is not defined.
				784
				785	The list will contain one string for each part of the message, plus
				786	one for the container message (i.e. self), so that a non-multipart
				787	message will still return a list of length 1.
				788	"""
				789	return [part.get_content_charset(failobj) for part in self.walk()]
				790
				791	# I.e. def walk(self): ...
				792	from email.iterators import walk