Blame - Lib/email/generator.py - platform/external/python/cpython3

blob: ada14df95275ee4fca5b2a35100c69cf57ec8290 [file] [log] [blame]

Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Classes to generate plain text from a message object tree."""
				6
				7	__all__ = ['Generator', 'DecodedGenerator']
				8
				9	import re
				10	import sys
				11	import time
				12	import random
				13	import warnings
				14
				15	from io import StringIO
				16	from email.header import Header
				17
				18	UNDERSCORE = '_'
				19	NL = '\n'
				20
				21	fcre = re.compile(r'^From ', re.MULTILINE)
				22
				23
				24
				25	class Generator:
				26	"""Generates output from a Message object tree.
				27
				28	This basic generator writes the message to the given file object as plain
				29	text.
				30	"""
				31	#
				32	# Public interface
				33	#
				34
				35	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
				36	"""Create the generator for message flattening.
				37
				38	outfp is the output file-like object for writing the message to. It
				39	must have a write() method.
				40
				41	Optional mangle_from_ is a flag that, when True (the default), escapes
				42	From_ lines in the body of the message by putting a `>' in front of
				43	them.
				44
				45	Optional maxheaderlen specifies the longest length for a non-continued
				46	header. When a header line is longer (in characters, with tabs
				47	expanded to 8 spaces) than maxheaderlen, the header will split as
				48	defined in the Header class. Set maxheaderlen to zero to disable
				49	header wrapping. The default is 78, as recommended (but not required)
				50	by RFC 2822.
				51	"""
				52	self._fp = outfp
				53	self._mangle_from_ = mangle_from_
				54	self._maxheaderlen = maxheaderlen
				55
				56	def write(self, s):
				57	# Just delegate to the file object
				58	self._fp.write(s)
				59
				60	def flatten(self, msg, unixfrom=False):
				61	"""Print the message object tree rooted at msg to the output file
				62	specified when the Generator instance was created.
				63
				64	unixfrom is a flag that forces the printing of a Unix From_ delimiter
				65	before the first object in the message tree. If the original message
				66	has no From_ delimiter, a `standard' one is crafted. By default, this
				67	is False to inhibit the printing of any From_ delimiter.
				68
				69	Note that for subobjects, no From_ line is printed.
				70	"""
				71	if unixfrom:
				72	ufrom = msg.get_unixfrom()
				73	if not ufrom:
				74	ufrom = 'From nobody ' + time.ctime(time.time())
				75	print(ufrom, file=self._fp)
				76	self._write(msg)
				77
				78	def clone(self, fp):
				79	"""Clone this generator with the exact same options."""
				80	return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
				81
				82	#
				83	# Protected interface - undocumented ;/
				84	#
				85
				86	def _write(self, msg):
				87	# We can't write the headers yet because of the following scenario:
				88	# say a multipart message includes the boundary string somewhere in
				89	# its body. We'd have to calculate the new boundary /before/ we write
				90	# the headers so that we can write the correct Content-Type:
				91	# parameter.
				92	#
				93	# The way we do this, so as to make the _handle_*() methods simpler,
				94	# is to cache any subpart writes into a StringIO. The we write the
				95	# headers and the StringIO contents. That way, subpart handlers can
				96	# Do The Right Thing, and can still modify the Content-Type: header if
				97	# necessary.
				98	oldfp = self._fp
				99	try:
				100	self._fp = sfp = StringIO()
				101	self._dispatch(msg)
				102	finally:
				103	self._fp = oldfp
				104	# Write the headers. First we see if the message object wants to
				105	# handle that itself. If not, we'll do it generically.
				106	meth = getattr(msg, '_write_headers', None)
				107	if meth is None:
				108	self._write_headers(msg)
				109	else:
				110	meth(self)
				111	self._fp.write(sfp.getvalue())
				112
				113	def _dispatch(self, msg):
				114	# Get the Content-Type: for the message, then try to dispatch to
				115	# self._handle_<maintype>_<subtype>(). If there's no handler for the
				116	# full MIME type, then dispatch to self._handle_<maintype>(). If
				117	# that's missing too, then dispatch to self._writeBody().
				118	main = msg.get_content_maintype()
				119	sub = msg.get_content_subtype()
				120	specific = UNDERSCORE.join((main, sub)).replace('-', '_')
				121	meth = getattr(self, '_handle_' + specific, None)
				122	if meth is None:
				123	generic = main.replace('-', '_')
				124	meth = getattr(self, '_handle_' + generic, None)
				125	if meth is None:
				126	meth = self._writeBody
				127	meth(msg)
				128
				129	#
				130	# Default handlers
				131	#
				132
				133	def _write_headers(self, msg):
				134	for h, v in msg.items():
				135	print('%s:' % h, end=' ', file=self._fp)
Guido van Rossum	9604e66	2007-08-30 03:46:43 +0000	[diff] [blame]	136	if isinstance(v, Header):
				137	print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	138	else:
				139	# Header's got lots of smarts, so use it.
				140	header = Header(v, maxlinelen=self._maxheaderlen,
				141	header_name=h, continuation_ws='\t')
				142	print(header.encode(), file=self._fp)
				143	# A blank line always separates headers from body
				144	print(file=self._fp)
				145
				146	#
				147	# Handlers for writing types and subtypes
				148	#
				149
				150	def _handle_text(self, msg):
				151	payload = msg.get_payload()
				152	if payload is None:
				153	return
Guido van Rossum	3172c5d	2007-10-16 18:12:55 +0000	[diff] [blame]	154	if not isinstance(payload, str):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	155	raise TypeError('string payload expected: %s' % type(payload))
				156	if self._mangle_from_:
				157	payload = fcre.sub('>From ', payload)
				158	self._fp.write(payload)
				159
				160	# Default body handler
				161	_writeBody = _handle_text
				162
				163	def _handle_multipart(self, msg):
				164	# The trick here is to write out each part separately, merge them all
				165	# together, and then make sure that the boundary we've chosen isn't
				166	# present in the payload.
				167	msgtexts = []
				168	subparts = msg.get_payload()
				169	if subparts is None:
				170	subparts = []
Guido van Rossum	3172c5d	2007-10-16 18:12:55 +0000	[diff] [blame]	171	elif isinstance(subparts, str):
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	172	# e.g. a non-strict parse of a message with no starting boundary.
				173	self._fp.write(subparts)
				174	return
				175	elif not isinstance(subparts, list):
				176	# Scalar payload
				177	subparts = [subparts]
				178	for part in subparts:
				179	s = StringIO()
				180	g = self.clone(s)
				181	g.flatten(part, unixfrom=False)
				182	msgtexts.append(s.getvalue())
				183	# Now make sure the boundary we've selected doesn't appear in any of
				184	# the message texts.
				185	alltext = NL.join(msgtexts)
				186	# BAW: What about boundaries that are wrapped in double-quotes?
				187	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
				188	# If we had to calculate a new boundary because the body text
				189	# contained that string, set the new boundary. We don't do it
				190	# unconditionally because, while set_boundary() preserves order, it
				191	# doesn't preserve newlines/continuations in headers. This is no big
				192	# deal in practice, but turns out to be inconvenient for the unittest
				193	# suite.
				194	if msg.get_boundary() != boundary:
				195	msg.set_boundary(boundary)
				196	# If there's a preamble, write it out, with a trailing CRLF
				197	if msg.preamble is not None:
				198	print(msg.preamble, file=self._fp)
				199	# dash-boundary transport-padding CRLF
				200	print('--' + boundary, file=self._fp)
				201	# body-part
				202	if msgtexts:
				203	self._fp.write(msgtexts.pop(0))
				204	# *encapsulation
				205	# --> delimiter transport-padding
				206	# --> CRLF body-part
				207	for body_part in msgtexts:
				208	# delimiter transport-padding CRLF
				209	print('\n--' + boundary, file=self._fp)
				210	# body-part
				211	self._fp.write(body_part)
				212	# close-delimiter transport-padding
				213	self._fp.write('\n--' + boundary + '--')
				214	if msg.epilogue is not None:
				215	print(file=self._fp)
				216	self._fp.write(msg.epilogue)
				217
				218	def _handle_message_delivery_status(self, msg):
				219	# We can't just write the headers directly to self's file object
				220	# because this will leave an extra newline between the last header
				221	# block and the boundary. Sigh.
				222	blocks = []
				223	for part in msg.get_payload():
				224	s = StringIO()
				225	g = self.clone(s)
				226	g.flatten(part, unixfrom=False)
				227	text = s.getvalue()
				228	lines = text.split('\n')
				229	# Strip off the unnecessary trailing empty line
				230	if lines and lines[-1] == '':
				231	blocks.append(NL.join(lines[:-1]))
				232	else:
				233	blocks.append(text)
				234	# Now join all the blocks with an empty line. This has the lovely
				235	# effect of separating each block with an empty line, but not adding
				236	# an extra one after the last one.
				237	self._fp.write(NL.join(blocks))
				238
				239	def _handle_message(self, msg):
				240	s = StringIO()
				241	g = self.clone(s)
				242	# The payload of a message/rfc822 part should be a multipart sequence
				243	# of length 1. The zeroth element of the list should be the Message
				244	# object for the subpart. Extract that object, stringify it, and
				245	# write it out.
				246	g.flatten(msg.get_payload(0), unixfrom=False)
				247	self._fp.write(s.getvalue())
				248
				249
				250
				251	_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
				252
				253	class DecodedGenerator(Generator):
				254	"""Generator a text representation of a message.
				255
				256	Like the Generator base class, except that non-text parts are substituted
				257	with a format string representing the part.
				258	"""
				259	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
				260	"""Like Generator.__init__() except that an additional optional
				261	argument is allowed.
				262
				263	Walks through all subparts of a message. If the subpart is of main
				264	type `text', then it prints the decoded payload of the subpart.
				265
				266	Otherwise, fmt is a format string that is used instead of the message
				267	payload. fmt is expanded with the following keywords (in
				268	%(keyword)s format):
				269
				270	type : Full MIME type of the non-text part
				271	maintype : Main MIME type of the non-text part
				272	subtype : Sub-MIME type of the non-text part
				273	filename : Filename of the non-text part
				274	description: Description associated with the non-text part
				275	encoding : Content transfer encoding of the non-text part
				276
				277	The default value for fmt is None, meaning
				278
				279	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
				280	"""
				281	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
				282	if fmt is None:
				283	self._fmt = _FMT
				284	else:
				285	self._fmt = fmt
				286
				287	def _dispatch(self, msg):
				288	for part in msg.walk():
				289	maintype = part.get_content_maintype()
				290	if maintype == 'text':
Guido van Rossum	3172c5d	2007-10-16 18:12:55 +0000	[diff] [blame]	291	print(part.get_payload(decode=False), file=self)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	292	elif maintype == 'multipart':
				293	# Just skip this
				294	pass
				295	else:
				296	print(self._fmt % {
				297	'type' : part.get_content_type(),
				298	'maintype' : part.get_content_maintype(),
				299	'subtype' : part.get_content_subtype(),
				300	'filename' : part.get_filename('[no filename]'),
				301	'description': part.get('Content-Description',
				302	'[no description]'),
				303	'encoding' : part.get('Content-Transfer-Encoding',
				304	'[no encoding]'),
				305	}, file=self)
				306
				307
				308
				309	# Helper
Christian Heimes	a37d4c6	2007-12-04 23:02:19 +0000	[diff] [blame]	310	_width = len(repr(sys.maxsize-1))
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	311	_fmt = '%%0%dd' % _width
				312
				313	def _make_boundary(text=None):
				314	# Craft a random boundary. If text is given, ensure that the chosen
				315	# boundary doesn't appear in the text.
Christian Heimes	a37d4c6	2007-12-04 23:02:19 +0000	[diff] [blame]	316	token = random.randrange(sys.maxsize)
Guido van Rossum	8b3febe	2007-08-30 01:15:14 +0000	[diff] [blame]	317	boundary = ('=' * 15) + (_fmt % token) + '=='
				318	if text is None:
				319	return boundary
				320	b = boundary
				321	counter = 0
				322	while True:
				323	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
				324	if not cre.search(text):
				325	break
				326	b = boundary + '.' + str(counter)
				327	counter += 1
				328	return b