Blame - Lib/email/contentmanager.py - platform/external/python/cpython3

blob: b98ce27184f3062eb5d36b96659ae4cedd3b32a6 [file] [log] [blame]

R David Murray	3da240f	2013-10-16 22:48:40 -0400	[diff] [blame]	1	import binascii
				2	import email.charset
				3	import email.message
				4	import email.errors
				5	from email import quoprimime
				6
				7	class ContentManager:
				8
				9	def __init__(self):
				10	self.get_handlers = {}
				11	self.set_handlers = {}
				12
				13	def add_get_handler(self, key, handler):
				14	self.get_handlers[key] = handler
				15
				16	def get_content(self, msg, args, *kw):
				17	content_type = msg.get_content_type()
				18	if content_type in self.get_handlers:
				19	return self.get_handlers[content_type](msg, args, *kw)
				20	maintype = msg.get_content_maintype()
				21	if maintype in self.get_handlers:
				22	return self.get_handlers[maintype](msg, args, *kw)
				23	if '' in self.get_handlers:
				24	return self.get_handlers[''](msg, args, *kw)
				25	raise KeyError(content_type)
				26
				27	def add_set_handler(self, typekey, handler):
				28	self.set_handlers[typekey] = handler
				29
				30	def set_content(self, msg, obj, args, *kw):
				31	if msg.get_content_maintype() == 'multipart':
				32	# XXX: is this error a good idea or not? We can remove it later,
				33	# but we can't add it later, so do it for now.
				34	raise TypeError("set_content not valid on multipart")
				35	handler = self._find_set_handler(msg, obj)
				36	msg.clear_content()
				37	handler(msg, obj, args, *kw)
				38
				39	def _find_set_handler(self, msg, obj):
				40	full_path_for_error = None
				41	for typ in type(obj).__mro__:
				42	if typ in self.set_handlers:
				43	return self.set_handlers[typ]
				44	qname = typ.__qualname__
				45	modname = getattr(typ, '__module__', '')
				46	full_path = '.'.join((modname, qname)) if modname else qname
				47	if full_path_for_error is None:
				48	full_path_for_error = full_path
				49	if full_path in self.set_handlers:
				50	return self.set_handlers[full_path]
				51	if qname in self.set_handlers:
				52	return self.set_handlers[qname]
				53	name = typ.__name__
				54	if name in self.set_handlers:
				55	return self.set_handlers[name]
				56	if None in self.set_handlers:
				57	return self.set_handlers[None]
				58	raise KeyError(full_path_for_error)
				59
				60
				61	raw_data_manager = ContentManager()
				62
				63
				64	def get_text_content(msg, errors='replace'):
				65	content = msg.get_payload(decode=True)
				66	charset = msg.get_param('charset', 'ASCII')
				67	return content.decode(charset, errors=errors)
				68	raw_data_manager.add_get_handler('text', get_text_content)
				69
				70
				71	def get_non_text_content(msg):
				72	return msg.get_payload(decode=True)
				73	for maintype in 'audio image video application'.split():
				74	raw_data_manager.add_get_handler(maintype, get_non_text_content)
				75
				76
				77	def get_message_content(msg):
				78	return msg.get_payload(0)
				79	for subtype in 'rfc822 external-body'.split():
				80	raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
				81
				82
				83	def get_and_fixup_unknown_message_content(msg):
				84	# If we don't understand a message subtype, we are supposed to treat it as
				85	# if it were application/octet-stream, per
				86	# tools.ietf.org/html/rfc2046#section-5.2.4. Feedparser doesn't do that,
				87	# so do our best to fix things up. Note that it is not appropriate to
				88	# model message/partial content as Message objects, so they are handled
				89	# here as well. (How to reassemble them is out of scope for this comment :)
				90	return bytes(msg.get_payload(0))
				91	raw_data_manager.add_get_handler('message',
				92	get_and_fixup_unknown_message_content)
				93
				94
				95	def _prepare_set(msg, maintype, subtype, headers):
				96	msg['Content-Type'] = '/'.join((maintype, subtype))
				97	if headers:
				98	if not hasattr(headers[0], 'name'):
				99	mp = msg.policy
				100	headers = [mp.header_factory(*mp.header_source_parse([header]))
				101	for header in headers]
				102	try:
				103	for header in headers:
				104	if header.defects:
				105	raise header.defects[0]
				106	msg[header.name] = header
				107	except email.errors.HeaderDefect as exc:
				108	raise ValueError("Invalid header: {}".format(
				109	header.fold(policy=msg.policy))) from exc
				110
				111
				112	def _finalize_set(msg, disposition, filename, cid, params):
				113	if disposition is None and filename is not None:
				114	disposition = 'attachment'
				115	if disposition is not None:
				116	msg['Content-Disposition'] = disposition
				117	if filename is not None:
				118	msg.set_param('filename',
				119	filename,
				120	header='Content-Disposition',
				121	replace=True)
				122	if cid is not None:
				123	msg['Content-ID'] = cid
				124	if params is not None:
				125	for key, value in params.items():
				126	msg.set_param(key, value)
				127
				128
R David Murray	94a7927	2016-09-09 15:00:09 -0400	[diff] [blame^]	129	# XXX: This is a cleaned-up version of base64mime.body_encode (including a bug
				130	# fix in the calculation of unencoded_bytes_per_line). It would be nice to
				131	# drop both this and quoprimime.body_encode in favor of enhanced binascii
				132	# routines that accepted a max_line_length parameter.
R David Murray	3da240f	2013-10-16 22:48:40 -0400	[diff] [blame]	133	def _encode_base64(data, max_line_length):
				134	encoded_lines = []
R David Murray	94a7927	2016-09-09 15:00:09 -0400	[diff] [blame^]	135	unencoded_bytes_per_line = max_line_length // 4 * 3
R David Murray	3da240f	2013-10-16 22:48:40 -0400	[diff] [blame]	136	for i in range(0, len(data), unencoded_bytes_per_line):
				137	thisline = data[i:i+unencoded_bytes_per_line]
				138	encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
				139	return ''.join(encoded_lines)
				140
				141
				142	def _encode_text(string, charset, cte, policy):
				143	lines = string.encode(charset).splitlines()
				144	linesep = policy.linesep.encode('ascii')
				145	def embeded_body(lines): return linesep.join(lines) + linesep
				146	def normal_body(lines): return b'\n'.join(lines) + b'\n'
				147	if cte==None:
				148	# Use heuristics to decide on the "best" encoding.
				149	try:
				150	return '7bit', normal_body(lines).decode('ascii')
				151	except UnicodeDecodeError:
				152	pass
				153	if (policy.cte_type == '8bit' and
				154	max(len(x) for x in lines) <= policy.max_line_length):
				155	return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
				156	sniff = embeded_body(lines[:10])
				157	sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
				158	policy.max_line_length)
				159	sniff_base64 = binascii.b2a_base64(sniff)
				160	# This is a little unfair to qp; it includes lineseps, base64 doesn't.
				161	if len(sniff_qp) > len(sniff_base64):
				162	cte = 'base64'
				163	else:
				164	cte = 'quoted-printable'
				165	if len(lines) <= 10:
				166	return cte, sniff_qp
				167	if cte == '7bit':
				168	data = normal_body(lines).decode('ascii')
				169	elif cte == '8bit':
				170	data = normal_body(lines).decode('ascii', 'surrogateescape')
				171	elif cte == 'quoted-printable':
				172	data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
				173	policy.max_line_length)
				174	elif cte == 'base64':
				175	data = _encode_base64(embeded_body(lines), policy.max_line_length)
				176	else:
				177	raise ValueError("Unknown content transfer encoding {}".format(cte))
				178	return cte, data
				179
				180
				181	def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
				182	disposition=None, filename=None, cid=None,
				183	params=None, headers=None):
				184	_prepare_set(msg, 'text', subtype, headers)
				185	cte, payload = _encode_text(string, charset, cte, msg.policy)
				186	msg.set_payload(payload)
				187	msg.set_param('charset',
				188	email.charset.ALIASES.get(charset, charset),
				189	replace=True)
				190	msg['Content-Transfer-Encoding'] = cte
				191	_finalize_set(msg, disposition, filename, cid, params)
				192	raw_data_manager.add_set_handler(str, set_text_content)
				193
				194
				195	def set_message_content(msg, message, subtype="rfc822", cte=None,
				196	disposition=None, filename=None, cid=None,
				197	params=None, headers=None):
				198	if subtype == 'partial':
				199	raise ValueError("message/partial is not supported for Message objects")
				200	if subtype == 'rfc822':
				201	if cte not in (None, '7bit', '8bit', 'binary'):
				202	# http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
				203	raise ValueError(
				204	"message/rfc822 parts do not support cte={}".format(cte))
				205	# 8bit will get coerced on serialization if policy.cte_type='7bit'. We
				206	# may end up claiming 8bit when it isn't needed, but the only negative
				207	# result of that should be a gateway that needs to coerce to 7bit
				208	# having to look through the whole embedded message to discover whether
				209	# or not it actually has to do anything.
				210	cte = '8bit' if cte is None else cte
				211	elif subtype == 'external-body':
				212	if cte not in (None, '7bit'):
				213	# http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
				214	raise ValueError(
				215	"message/external-body parts do not support cte={}".format(cte))
				216	cte = '7bit'
				217	elif cte is None:
				218	# http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
				219	# subtypes should be restricted to 7bit, so assume that.
				220	cte = '7bit'
				221	_prepare_set(msg, 'message', subtype, headers)
				222	msg.set_payload([message])
				223	msg['Content-Transfer-Encoding'] = cte
				224	_finalize_set(msg, disposition, filename, cid, params)
				225	raw_data_manager.add_set_handler(email.message.Message, set_message_content)
				226
				227
				228	def set_bytes_content(msg, data, maintype, subtype, cte='base64',
				229	disposition=None, filename=None, cid=None,
				230	params=None, headers=None):
				231	_prepare_set(msg, maintype, subtype, headers)
				232	if cte == 'base64':
				233	data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
				234	elif cte == 'quoted-printable':
				235	# XXX: quoprimime.body_encode won't encode newline characters in data,
				236	# so we can't use it. This means max_line_length is ignored. Another
				237	# bug to fix later. (Note: encoders.quopri is broken on line ends.)
				238	data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
				239	data = data.decode('ascii')
				240	elif cte == '7bit':
				241	# Make sure it really is only ASCII. The early warning here seems
				242	# worth the overhead...if you care write your own content manager :).
				243	data.encode('ascii')
				244	elif cte in ('8bit', 'binary'):
				245	data = data.decode('ascii', 'surrogateescape')
				246	msg.set_payload(data)
				247	msg['Content-Transfer-Encoding'] = cte
				248	_finalize_set(msg, disposition, filename, cid, params)
				249	for typ in (bytes, bytearray, memoryview):
				250	raw_data_manager.add_set_handler(typ, set_bytes_content)