Blame - Lib/email/message.py - platform/external/python/cpython2

blob: 28835d09d95276678e77384ec140ad7871ff0dc9 [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message']
				8
				9	import re
				10	import uu
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	11	import base64
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	12	import binascii
				13	import warnings
				14	from io import BytesIO, StringIO
				15
				16	# Intrapackage imports
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	17	from email import utils
				18	from email import errors
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	19	from email import header
				20	from email import charset as _charset
				21	Charset = _charset.Charset
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	22
				23	SEMISPACE = '; '
				24
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	25	# Regular expression that matches `special' characters in parameters, the
Mark Dickinson	934896d	2009-02-21 20:59:32 +0000	[diff] [blame]	26	# existence of which force quoting of the parameter value.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	27	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				28
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	29	# How to figure out if we are processing strings that come from a byte
				30	# source with undecodable characters.
				31	_has_surrogates = re.compile(
				32	'([^\ud800-\udbff]\|\A)[\udc00-\udfff]([^\udc00-\udfff]\|\Z)').search
				33
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	34
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	35	# Helper functions
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	36	def _sanitize_header(name, value):
				37	# If the header value contains surrogates, return a Header using
				38	# the unknown-8bit charset to encode the bytes as encoded words.
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	39	if not isinstance(value, str):
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	40	# Assume it is already a header object
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	41	return value
				42	if _has_surrogates(value):
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	43	return header.Header(value, charset=_charset.UNKNOWN8BIT,
				44	header_name=name)
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	45	else:
				46	return value
				47
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	48	def _splitparam(param):
				49	# Split header parameters. BAW: this may be too simple. It isn't
				50	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				51	# found in the wild. We may eventually need a full fledged parser
				52	# eventually.
				53	a, sep, b = param.partition(';')
				54	if not sep:
				55	return a.strip(), None
				56	return a.strip(), b.strip()
				57
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	58	def _formatparam(param, value=None, quote=True):
				59	"""Convenience function to format and return a key=value pair.
				60
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	61	This will quote the value if needed or if quote is true. If value is a
				62	three tuple (charset, language, value), it will be encoded according
				63	to RFC2231 rules. If it contains non-ascii characters it will likewise
				64	be encoded according to RFC2231 rules, using the utf-8 charset and
				65	a null language.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	66	"""
				67	if value is not None and len(value) > 0:
				68	# A tuple is used for RFC 2231 encoded parameter values where items
				69	# are (charset, language, value). charset is a string, not a Charset
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	70	# instance. RFC 2231 encoded values are never quoted, per RFC.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	71	if isinstance(value, tuple):
				72	# Encode as per RFC 2231
				73	param += '*'
				74	value = utils.encode_rfc2231(value[2], value[0], value[1])
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	75	return '%s=%s' % (param, value)
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	76	else:
				77	try:
				78	value.encode('ascii')
				79	except UnicodeEncodeError:
				80	param += '*'
				81	value = utils.encode_rfc2231(value, 'utf-8', '')
R. David Murray	dfd7eb0	2010-12-24 22:36:49 +0000	[diff] [blame]	82	return '%s=%s' % (param, value)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	83	# BAW: Please check this. I think that if quote is set it should
				84	# force quoting even if not necessary.
				85	if quote or tspecials.search(value):
				86	return '%s="%s"' % (param, utils.quote(value))
				87	else:
				88	return '%s=%s' % (param, value)
				89	else:
				90	return param
				91
				92	def _parseparam(s):
				93	plist = []
				94	while s[:1] == ';':
				95	s = s[1:]
				96	end = s.find(';')
R. David Murray	d48739f	2010-04-14 18:59:18 +0000	[diff] [blame]	97	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	98	end = s.find(';', end + 1)
				99	if end < 0:
				100	end = len(s)
				101	f = s[:end]
				102	if '=' in f:
				103	i = f.index('=')
				104	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				105	plist.append(f.strip())
				106	s = s[end:]
				107	return plist
				108
				109
				110	def _unquotevalue(value):
				111	# This is different than utils.collapse_rfc2231_value() because it doesn't
				112	# try to convert the value to a unicode. Message.get_param() and
				113	# Message.get_params() are both currently defined to return the tuple in
				114	# the face of RFC 2231 parameters.
				115	if isinstance(value, tuple):
				116	return value[0], value[1], utils.unquote(value[2])
				117	else:
				118	return utils.unquote(value)
				119
				120
				121
				122	class Message:
				123	"""Basic message object.
				124
				125	A message object is defined as something that has a bunch of RFC 2822
				126	headers and a payload. It may optionally have an envelope header
				127	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				128	multipart or a message/rfc822), then the payload is a list of Message
				129	objects, otherwise it is a string.
				130
				131	Message objects implement part of the `mapping' interface, which assumes
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	132	there is exactly one occurrence of the header per message. Some headers
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	133	do in fact appear multiple times (e.g. Received) and for those headers,
				134	you must use the explicit API to set or get all the headers. Not all of
				135	the mapping methods are implemented.
				136	"""
				137	def __init__(self):
				138	self._headers = []
				139	self._unixfrom = None
				140	self._payload = None
				141	self._charset = None
				142	# Defaults for multipart messages
				143	self.preamble = self.epilogue = None
				144	self.defects = []
				145	# Default content type
				146	self._default_type = 'text/plain'
				147
				148	def __str__(self):
				149	"""Return the entire formatted message as a string.
				150	This includes the headers, body, and envelope header.
				151	"""
				152	return self.as_string()
				153
				154	def as_string(self, unixfrom=False, maxheaderlen=0):
				155	"""Return the entire formatted message as a string.
				156	Optional `unixfrom' when True, means include the Unix From_ envelope
				157	header.
				158
				159	This is a convenience method and may not generate the message exactly
				160	as you intend because by default it mangles lines that begin with
				161	"From ". For more flexibility, use the flatten() method of a
				162	Generator instance.
				163	"""
				164	from email.generator import Generator
				165	fp = StringIO()
				166	g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
				167	g.flatten(self, unixfrom=unixfrom)
				168	return fp.getvalue()
				169
				170	def is_multipart(self):
				171	"""Return True if the message consists of multiple parts."""
				172	return isinstance(self._payload, list)
				173
				174	#
				175	# Unix From_ line
				176	#
				177	def set_unixfrom(self, unixfrom):
				178	self._unixfrom = unixfrom
				179
				180	def get_unixfrom(self):
				181	return self._unixfrom
				182
				183	#
				184	# Payload manipulation.
				185	#
				186	def attach(self, payload):
				187	"""Add the given payload to the current payload.
				188
				189	The current payload will always be a list of objects after this method
				190	is called. If you want to set the payload to a scalar object, use
				191	set_payload() instead.
				192	"""
				193	if self._payload is None:
				194	self._payload = [payload]
				195	else:
				196	self._payload.append(payload)
				197
				198	def get_payload(self, i=None, decode=False):
				199	"""Return a reference to the payload.
				200
				201	The payload will either be a list object or a string. If you mutate
				202	the list object, you modify the message's payload in place. Optional
				203	i returns that index into the payload.
				204
				205	Optional decode is a flag indicating whether the payload should be
				206	decoded or not, according to the Content-Transfer-Encoding header
				207	(default is False).
				208
				209	When True and the message is not a multipart, the payload will be
				210	decoded if this header's value is `quoted-printable' or `base64'. If
				211	some other encoding is used, or the header is missing, or if the
				212	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				213	payload is returned as-is.
				214
				215	If the message is a multipart and the decode flag is True, then None
				216	is returned.
				217	"""
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	218	# Here is the logic table for this code, based on the email5.0.0 code:
				219	# i decode is_multipart result
				220	# ------ ------ ------------ ------------------------------
				221	# None True True None
				222	# i True True None
				223	# None False True _payload (a list)
				224	# i False True _payload element i (a Message)
				225	# i False False error (not a list)
				226	# i True False error (not a list)
				227	# None False False _payload
				228	# None True False _payload decoded (bytes)
				229	# Note that Barry planned to factor out the 'decode' case, but that
				230	# isn't so easy now that we handle the 8 bit data, which needs to be
				231	# converted in both the decode and non-decode path.
				232	if self.is_multipart():
				233	if decode:
				234	return None
				235	if i is None:
				236	return self._payload
				237	else:
				238	return self._payload[i]
				239	# For backward compatibility, Use isinstance and this error message
				240	# instead of the more logical is_multipart test.
				241	if i is not None and not isinstance(self._payload, list):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	242	raise TypeError('Expected list, got %s' % type(self._payload))
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	243	payload = self._payload
				244	cte = self.get('content-transfer-encoding', '').lower()
R David Murray	106f8e3	2011-03-15 12:48:41 -0400	[diff] [blame^]	245	# payload may be bytes here.
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	246	if isinstance(payload, str):
				247	if _has_surrogates(payload):
				248	bpayload = payload.encode('ascii', 'surrogateescape')
				249	if not decode:
				250	try:
				251	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				252	except LookupError:
				253	payload = bpayload.decode('ascii', 'replace')
				254	elif decode:
				255	try:
				256	bpayload = payload.encode('ascii')
				257	except UnicodeError:
				258	# This won't happen for RFC compliant messages (messages
				259	# containing only ASCII codepoints in the unicode input).
				260	# If it does happen, turn the string into bytes in a way
				261	# guaranteed not to fail.
				262	bpayload = payload.encode('raw-unicode-escape')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	263	if not decode:
				264	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	265	if cte == 'quoted-printable':
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	266	return utils._qdecode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	267	elif cte == 'base64':
				268	try:
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	269	return base64.b64decode(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	270	except binascii.Error:
				271	# Incorrect padding
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	272	return bpayload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	273	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	274	in_file = BytesIO(bpayload)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	275	out_file = BytesIO()
				276	try:
				277	uu.decode(in_file, out_file, quiet=True)
				278	return out_file.getvalue()
				279	except uu.Error:
				280	# Some decoding problem
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	281	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	282	if isinstance(payload, str):
R. David Murray	96fd54e	2010-10-08 15:55:28 +0000	[diff] [blame]	283	return bpayload
Barry Warsaw	8b2af27	2007-08-31 03:04:26 +0000	[diff] [blame]	284	return payload
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	285
				286	def set_payload(self, payload, charset=None):
				287	"""Set the payload to the given value.
				288
				289	Optional charset sets the message's default character set. See
				290	set_charset() for details.
				291	"""
				292	self._payload = payload
				293	if charset is not None:
				294	self.set_charset(charset)
				295
				296	def set_charset(self, charset):
				297	"""Set the charset of the payload to a given character set.
				298
				299	charset can be a Charset instance, a string naming a character set, or
				300	None. If it is a string it will be converted to a Charset instance.
				301	If charset is None, the charset parameter will be removed from the
				302	Content-Type field. Anything else will generate a TypeError.
				303
				304	The message will be assumed to be of type text/* encoded with
				305	charset.input_charset. It will be converted to charset.output_charset
				306	and encoded properly, if needed, when generating the plain text
				307	representation of the message. MIME headers (MIME-Version,
				308	Content-Type, Content-Transfer-Encoding) will be added as needed.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	309	"""
				310	if charset is None:
				311	self.del_param('charset')
				312	self._charset = None
				313	return
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	314	if not isinstance(charset, Charset):
				315	charset = Charset(charset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	316	self._charset = charset
				317	if 'MIME-Version' not in self:
				318	self.add_header('MIME-Version', '1.0')
				319	if 'Content-Type' not in self:
				320	self.add_header('Content-Type', 'text/plain',
				321	charset=charset.get_output_charset())
				322	else:
				323	self.set_param('charset', charset.get_output_charset())
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	324	if charset != charset.get_output_charset():
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	325	self._payload = charset.body_encode(self._payload)
				326	if 'Content-Transfer-Encoding' not in self:
				327	cte = charset.get_body_encoding()
				328	try:
				329	cte(self)
				330	except TypeError:
				331	self._payload = charset.body_encode(self._payload)
				332	self.add_header('Content-Transfer-Encoding', cte)
				333
				334	def get_charset(self):
				335	"""Return the Charset instance associated with the message's payload.
				336	"""
				337	return self._charset
				338
				339	#
				340	# MAPPING INTERFACE (partial)
				341	#
				342	def __len__(self):
				343	"""Return the total number of headers, including duplicates."""
				344	return len(self._headers)
				345
				346	def __getitem__(self, name):
				347	"""Get a header value.
				348
				349	Return None if the header is missing instead of raising an exception.
				350
				351	Note that if the header appeared multiple times, exactly which
R. David Murray	d2c310f	2010-10-01 02:08:02 +0000	[diff] [blame]	352	occurrence gets returned is undefined. Use get_all() to get all
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	353	the values matching a header field name.
				354	"""
				355	return self.get(name)
				356
				357	def __setitem__(self, name, val):
				358	"""Set the value of a header.
				359
				360	Note: this does not overwrite an existing header with the same field
				361	name. Use __delitem__() first to delete any existing headers.
				362	"""
				363	self._headers.append((name, val))
				364
				365	def __delitem__(self, name):
				366	"""Delete all occurrences of a header, if present.
				367
				368	Does not raise an exception if the header is missing.
				369	"""
				370	name = name.lower()
				371	newheaders = []
				372	for k, v in self._headers:
				373	if k.lower() != name:
				374	newheaders.append((k, v))
				375	self._headers = newheaders
				376
				377	def __contains__(self, name):
				378	return name.lower() in [k.lower() for k, v in self._headers]
				379
				380	def __iter__(self):
				381	for field, value in self._headers:
				382	yield field
				383
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	384	def keys(self):
				385	"""Return a list of all the message's header field names.
				386
				387	These will be sorted in the order they appeared in the original
				388	message, or were added to the message, and may contain duplicates.
				389	Any fields deleted and re-inserted are always appended to the header
				390	list.
				391	"""
				392	return [k for k, v in self._headers]
				393
				394	def values(self):
				395	"""Return a list of all the message's header values.
				396
				397	These will be sorted in the order they appeared in the original
				398	message, or were added to the message, and may contain duplicates.
				399	Any fields deleted and re-inserted are always appended to the header
				400	list.
				401	"""
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	402	return [_sanitize_header(k, v) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	403
				404	def items(self):
				405	"""Get all the message's header fields and values.
				406
				407	These will be sorted in the order they appeared in the original
				408	message, or were added to the message, and may contain duplicates.
				409	Any fields deleted and re-inserted are always appended to the header
				410	list.
				411	"""
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	412	return [(k, _sanitize_header(k, v)) for k, v in self._headers]
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	413
				414	def get(self, name, failobj=None):
				415	"""Get a header value.
				416
				417	Like __getitem__() but return failobj instead of None when the field
				418	is missing.
				419	"""
				420	name = name.lower()
				421	for k, v in self._headers:
				422	if k.lower() == name:
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	423	return _sanitize_header(k, v)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	424	return failobj
				425
				426	#
				427	# Additional useful stuff
				428	#
				429
				430	def get_all(self, name, failobj=None):
				431	"""Return a list of all the values for the named field.
				432
				433	These will be sorted in the order they appeared in the original
				434	message, and may contain duplicates. Any fields deleted and
				435	re-inserted are always appended to the header list.
				436
				437	If no such fields exist, failobj is returned (defaults to None).
				438	"""
				439	values = []
				440	name = name.lower()
				441	for k, v in self._headers:
				442	if k.lower() == name:
R. David Murray	9253214	2011-01-07 23:25:30 +0000	[diff] [blame]	443	values.append(_sanitize_header(k, v))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	444	if not values:
				445	return failobj
				446	return values
				447
				448	def add_header(self, _name, _value, **_params):
				449	"""Extended header setting.
				450
				451	name is the header field to add. keyword arguments can be used to set
				452	additional parameters for the header field, with underscores converted
				453	to dashes. Normally the parameter will be added as key="value" unless
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	454	value is None, in which case only the key will be added. If a
				455	parameter value contains non-ASCII characters it can be specified as a
				456	three-tuple of (charset, language, value), in which case it will be
				457	encoded according to RFC2231 rules. Otherwise it will be encoded using
				458	the utf-8 charset and a language of ''.
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	459
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	460	Examples:
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	461
				462	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
R. David Murray	7ec754b	2010-12-13 23:51:19 +0000	[diff] [blame]	463	msg.add_header('content-disposition', 'attachment',
				464	filename=('utf-8', '', Fußballer.ppt'))
				465	msg.add_header('content-disposition', 'attachment',
				466	filename='Fußballer.ppt'))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	467	"""
				468	parts = []
				469	for k, v in _params.items():
				470	if v is None:
				471	parts.append(k.replace('_', '-'))
				472	else:
				473	parts.append(_formatparam(k.replace('_', '-'), v))
				474	if _value is not None:
				475	parts.insert(0, _value)
				476	self._headers.append((_name, SEMISPACE.join(parts)))
				477
				478	def replace_header(self, _name, _value):
				479	"""Replace a header.
				480
				481	Replace the first matching header found in the message, retaining
				482	header order and case. If no matching header was found, a KeyError is
				483	raised.
				484	"""
				485	_name = _name.lower()
				486	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				487	if k.lower() == _name:
				488	self._headers[i] = (k, _value)
				489	break
				490	else:
				491	raise KeyError(_name)
				492
				493	#
				494	# Use these three methods instead of the three above.
				495	#
				496
				497	def get_content_type(self):
				498	"""Return the message's content type.
				499
				500	The returned string is coerced to lower case of the form
				501	`maintype/subtype'. If there was no Content-Type header in the
				502	message, the default type as given by get_default_type() will be
				503	returned. Since according to RFC 2045, messages always have a default
				504	type this will always return a value.
				505
				506	RFC 2045 defines a message's default type to be text/plain unless it
				507	appears inside a multipart/digest container, in which case it would be
				508	message/rfc822.
				509	"""
				510	missing = object()
				511	value = self.get('content-type', missing)
				512	if value is missing:
				513	# This should have no parameters
				514	return self.get_default_type()
Benjamin Peterson	4cd6a95	2008-08-17 20:23:46 +0000	[diff] [blame]	515	ctype = _splitparam(value)[0].lower()
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	516	# RFC 2045, section 5.2 says if its invalid, use text/plain
				517	if ctype.count('/') != 1:
				518	return 'text/plain'
				519	return ctype
				520
				521	def get_content_maintype(self):
				522	"""Return the message's main content type.
				523
				524	This is the `maintype' part of the string returned by
				525	get_content_type().
				526	"""
				527	ctype = self.get_content_type()
				528	return ctype.split('/')[0]
				529
				530	def get_content_subtype(self):
				531	"""Returns the message's sub-content type.
				532
				533	This is the `subtype' part of the string returned by
				534	get_content_type().
				535	"""
				536	ctype = self.get_content_type()
				537	return ctype.split('/')[1]
				538
				539	def get_default_type(self):
				540	"""Return the `default' content type.
				541
				542	Most messages have a default content type of text/plain, except for
				543	messages that are subparts of multipart/digest containers. Such
				544	subparts have a default content type of message/rfc822.
				545	"""
				546	return self._default_type
				547
				548	def set_default_type(self, ctype):
				549	"""Set the `default' content type.
				550
				551	ctype should be either "text/plain" or "message/rfc822", although this
				552	is not enforced. The default content type is not stored in the
				553	Content-Type header.
				554	"""
				555	self._default_type = ctype
				556
				557	def _get_params_preserve(self, failobj, header):
				558	# Like get_params() but preserves the quoting of values. BAW:
				559	# should this be part of the public interface?
				560	missing = object()
				561	value = self.get(header, missing)
				562	if value is missing:
				563	return failobj
				564	params = []
				565	for p in _parseparam(';' + value):
				566	try:
				567	name, val = p.split('=', 1)
				568	name = name.strip()
				569	val = val.strip()
				570	except ValueError:
				571	# Must have been a bare attribute
				572	name = p.strip()
				573	val = ''
				574	params.append((name, val))
				575	params = utils.decode_params(params)
				576	return params
				577
				578	def get_params(self, failobj=None, header='content-type', unquote=True):
				579	"""Return the message's Content-Type parameters, as a list.
				580
				581	The elements of the returned list are 2-tuples of key/value pairs, as
				582	split on the `=' sign. The left hand side of the `=' is the key,
				583	while the right hand side is the value. If there is no `=' sign in
				584	the parameter the value is the empty string. The value is as
				585	described in the get_param() method.
				586
				587	Optional failobj is the object to return if there is no Content-Type
				588	header. Optional header is the header to search instead of
				589	Content-Type. If unquote is True, the value is unquoted.
				590	"""
				591	missing = object()
				592	params = self._get_params_preserve(missing, header)
				593	if params is missing:
				594	return failobj
				595	if unquote:
				596	return [(k, _unquotevalue(v)) for k, v in params]
				597	else:
				598	return params
				599
				600	def get_param(self, param, failobj=None, header='content-type',
				601	unquote=True):
				602	"""Return the parameter value if found in the Content-Type header.
				603
				604	Optional failobj is the object to return if there is no Content-Type
				605	header, or the Content-Type header has no such parameter. Optional
				606	header is the header to search instead of Content-Type.
				607
				608	Parameter keys are always compared case insensitively. The return
				609	value can either be a string, or a 3-tuple if the parameter was RFC
				610	2231 encoded. When it's a 3-tuple, the elements of the value are of
				611	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				612	LANGUAGE can be None, in which case you should consider VALUE to be
				613	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				614
				615	Your application should be prepared to deal with 3-tuple return
				616	values, and can convert the parameter to a Unicode string like so:
				617
				618	param = msg.get_param('foo')
				619	if isinstance(param, tuple):
				620	param = unicode(param[2], param[0] or 'us-ascii')
				621
				622	In any case, the parameter value (either the returned string, or the
				623	VALUE item in the 3-tuple) is always unquoted, unless unquote is set
				624	to False.
				625	"""
				626	if header not in self:
				627	return failobj
				628	for k, v in self._get_params_preserve(failobj, header):
				629	if k.lower() == param.lower():
				630	if unquote:
				631	return _unquotevalue(v)
				632	else:
				633	return v
				634	return failobj
				635
				636	def set_param(self, param, value, header='Content-Type', requote=True,
				637	charset=None, language=''):
				638	"""Set a parameter in the Content-Type header.
				639
				640	If the parameter already exists in the header, its value will be
				641	replaced with the new value.
				642
				643	If header is Content-Type and has not yet been defined for this
				644	message, it will be set to "text/plain" and the new parameter and
				645	value will be appended as per RFC 2045.
				646
				647	An alternate header can specified in the header argument, and all
				648	parameters will be quoted as necessary unless requote is False.
				649
				650	If charset is specified, the parameter will be encoded according to RFC
				651	2231. Optional language specifies the RFC 2231 language, defaulting
				652	to the empty string. Both charset and language should be strings.
				653	"""
				654	if not isinstance(value, tuple) and charset:
				655	value = (charset, language, value)
				656
				657	if header not in self and header.lower() == 'content-type':
				658	ctype = 'text/plain'
				659	else:
				660	ctype = self.get(header)
				661	if not self.get_param(param, header=header):
				662	if not ctype:
				663	ctype = _formatparam(param, value, requote)
				664	else:
				665	ctype = SEMISPACE.join(
				666	[ctype, _formatparam(param, value, requote)])
				667	else:
				668	ctype = ''
				669	for old_param, old_value in self.get_params(header=header,
				670	unquote=requote):
				671	append_param = ''
				672	if old_param.lower() == param.lower():
				673	append_param = _formatparam(param, value, requote)
				674	else:
				675	append_param = _formatparam(old_param, old_value, requote)
				676	if not ctype:
				677	ctype = append_param
				678	else:
				679	ctype = SEMISPACE.join([ctype, append_param])
				680	if ctype != self.get(header):
				681	del self[header]
				682	self[header] = ctype
				683
				684	def del_param(self, param, header='content-type', requote=True):
				685	"""Remove the given parameter completely from the Content-Type header.
				686
				687	The header will be re-written in place without the parameter or its
				688	value. All values will be quoted as necessary unless requote is
				689	False. Optional header specifies an alternative to the Content-Type
				690	header.
				691	"""
				692	if header not in self:
				693	return
				694	new_ctype = ''
				695	for p, v in self.get_params(header=header, unquote=requote):
				696	if p.lower() != param.lower():
				697	if not new_ctype:
				698	new_ctype = _formatparam(p, v, requote)
				699	else:
				700	new_ctype = SEMISPACE.join([new_ctype,
				701	_formatparam(p, v, requote)])
				702	if new_ctype != self.get(header):
				703	del self[header]
				704	self[header] = new_ctype
				705
				706	def set_type(self, type, header='Content-Type', requote=True):
				707	"""Set the main type and subtype for the Content-Type header.
				708
				709	type must be a string in the form "maintype/subtype", otherwise a
				710	ValueError is raised.
				711
				712	This method replaces the Content-Type header, keeping all the
				713	parameters in place. If requote is False, this leaves the existing
				714	header's quoting as is. Otherwise, the parameters will be quoted (the
				715	default).
				716
				717	An alternative header can be specified in the header argument. When
				718	the Content-Type header is set, we'll always also add a MIME-Version
				719	header.
				720	"""
				721	# BAW: should we be strict?
				722	if not type.count('/') == 1:
				723	raise ValueError
				724	# Set the Content-Type, you get a MIME-Version
				725	if header.lower() == 'content-type':
				726	del self['mime-version']
				727	self['MIME-Version'] = '1.0'
				728	if header not in self:
				729	self[header] = type
				730	return
				731	params = self.get_params(header=header, unquote=requote)
				732	del self[header]
				733	self[header] = type
				734	# Skip the first param; it's the old type.
				735	for p, v in params[1:]:
				736	self.set_param(p, v, header, requote)
				737
				738	def get_filename(self, failobj=None):
				739	"""Return the filename associated with the payload if present.
				740
				741	The filename is extracted from the Content-Disposition header's
				742	`filename' parameter, and it is unquoted. If that header is missing
				743	the `filename' parameter, this method falls back to looking for the
				744	`name' parameter.
				745	"""
				746	missing = object()
				747	filename = self.get_param('filename', missing, 'content-disposition')
				748	if filename is missing:
R. David Murray	bf2e0aa	2009-10-10 00:13:32 +0000	[diff] [blame]	749	filename = self.get_param('name', missing, 'content-type')
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	750	if filename is missing:
				751	return failobj
				752	return utils.collapse_rfc2231_value(filename).strip()
				753
				754	def get_boundary(self, failobj=None):
				755	"""Return the boundary associated with the payload if present.
				756
				757	The boundary is extracted from the Content-Type header's `boundary'
				758	parameter, and it is unquoted.
				759	"""
				760	missing = object()
				761	boundary = self.get_param('boundary', missing)
				762	if boundary is missing:
				763	return failobj
				764	# RFC 2046 says that boundaries may begin but not end in w/s
				765	return utils.collapse_rfc2231_value(boundary).rstrip()
				766
				767	def set_boundary(self, boundary):
				768	"""Set the boundary parameter in Content-Type to 'boundary'.
				769
				770	This is subtly different than deleting the Content-Type header and
				771	adding a new one with a new boundary parameter via add_header(). The
				772	main difference is that using the set_boundary() method preserves the
				773	order of the Content-Type header in the original message.
				774
				775	HeaderParseError is raised if the message has no Content-Type header.
				776	"""
				777	missing = object()
				778	params = self._get_params_preserve(missing, 'content-type')
				779	if params is missing:
				780	# There was no Content-Type header, and we don't know what type
				781	# to set it to, so raise an exception.
				782	raise errors.HeaderParseError('No Content-Type header found')
				783	newparams = []
				784	foundp = False
				785	for pk, pv in params:
				786	if pk.lower() == 'boundary':
				787	newparams.append(('boundary', '"%s"' % boundary))
				788	foundp = True
				789	else:
				790	newparams.append((pk, pv))
				791	if not foundp:
				792	# The original Content-Type header had no boundary attribute.
				793	# Tack one on the end. BAW: should we raise an exception
				794	# instead???
				795	newparams.append(('boundary', '"%s"' % boundary))
				796	# Replace the existing Content-Type header with the new value
				797	newheaders = []
				798	for h, v in self._headers:
				799	if h.lower() == 'content-type':
				800	parts = []
				801	for k, v in newparams:
				802	if v == '':
				803	parts.append(k)
				804	else:
				805	parts.append('%s=%s' % (k, v))
				806	newheaders.append((h, SEMISPACE.join(parts)))
				807
				808	else:
				809	newheaders.append((h, v))
				810	self._headers = newheaders
				811
				812	def get_content_charset(self, failobj=None):
				813	"""Return the charset parameter of the Content-Type header.
				814
				815	The returned string is always coerced to lower case. If there is no
				816	Content-Type header, or if that header has no charset parameter,
				817	failobj is returned.
				818	"""
				819	missing = object()
				820	charset = self.get_param('charset', missing)
				821	if charset is missing:
				822	return failobj
				823	if isinstance(charset, tuple):
				824	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				825	pcharset = charset[0] or 'us-ascii'
				826	try:
				827	# LookupError will be raised if the charset isn't known to
				828	# Python. UnicodeError will be raised if the encoded text
				829	# contains a character not in the charset.
Barry Warsaw	2cc1f6d	2007-08-30 14:28:55 +0000	[diff] [blame]	830	as_bytes = charset[2].encode('raw-unicode-escape')
				831	charset = str(as_bytes, pcharset)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	832	except (LookupError, UnicodeError):
				833	charset = charset[2]
				834	# charset characters must be in us-ascii range
				835	try:
				836	charset.encode('us-ascii')
				837	except UnicodeError:
				838	return failobj
				839	# RFC 2046, $4.1.2 says charsets are not case sensitive
				840	return charset.lower()
				841
				842	def get_charsets(self, failobj=None):
				843	"""Return a list containing the charset(s) used in this message.
				844
				845	The returned list of items describes the Content-Type headers'
				846	charset parameter for this message and all the subparts in its
				847	payload.
				848
				849	Each item will either be a string (the value of the charset parameter
				850	in the Content-Type header of that part) or the value of the
				851	'failobj' parameter (defaults to None), if the part does not have a
				852	main MIME type of "text", or the charset is not defined.
				853
				854	The list will contain one string for each part of the message, plus
				855	one for the container message (i.e. self), so that a non-multipart
				856	message will still return a list of length 1.
				857	"""
				858	return [part.get_content_charset(failobj) for part in self.walk()]
				859
				860	# I.e. def walk(self): ...
				861	from email.iterators import walk