Blame - Lib/email/Generator.py - platform/external/python/cpython3

blob: 981e0ffd04855ada5d93c19ccb9afb98e799ec2a [file] [log] [blame]

Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	1	# Copyright (C) 2001 Python Software Foundation
				2	# Author: barry@zope.com (Barry Warsaw)
				3
				4	"""Classes to generate plain text from a message object tree.
				5	"""
				6
				7	import time
				8	import re
				9	import random
				10
				11	from types import ListType, StringType
				12	from cStringIO import StringIO
				13
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	14	EMPTYSTRING = ''
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	15	SEMISPACE = '; '
				16	BAR = '\|'
				17	UNDERSCORE = '_'
				18	NL = '\n'
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	19	NLTAB = '\n\t'
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	20	SEMINLTAB = ';\n\t'
				21	SPACE8 = ' ' * 8
				22
				23	fcre = re.compile(r'^From ', re.MULTILINE)
				24
				25
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	26
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	27	class Generator:
				28	"""Generates output from a Message object tree.
				29
				30	This basic generator writes the message to the given file object as plain
				31	text.
				32	"""
				33	#
				34	# Public interface
				35	#
				36
				37	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
				38	"""Create the generator for message flattening.
				39
				40	outfp is the output file-like object for writing the message to. It
				41	must have a write() method.
				42
				43	Optional mangle_from_ is a flag that, when true, escapes From_ lines
				44	in the body of the message by putting a `>' in front of them.
				45
				46	Optional maxheaderlen specifies the longest length for a non-continued
				47	header. When a header line is longer (in characters, with tabs
				48	expanded to 8 spaces), than maxheaderlen, the header will be broken on
				49	semicolons and continued as per RFC 2822. If no semicolon is found,
				50	then the header is left alone. Set to zero to disable wrapping
				51	headers. Default is 78, as recommended (but not required by RFC
				52	2822.
				53	"""
				54	self._fp = outfp
				55	self._mangle_from_ = mangle_from_
				56	self.__first = 1
				57	self.__maxheaderlen = maxheaderlen
				58
				59	def write(self, s):
				60	# Just delegate to the file object
				61	self._fp.write(s)
				62
				63	def __call__(self, msg, unixfrom=0):
				64	"""Print the message object tree rooted at msg to the output file
				65	specified when the Generator instance was created.
				66
				67	unixfrom is a flag that forces the printing of a Unix From_ delimiter
				68	before the first object in the message tree. If the original message
				69	has no From_ delimiter, a `standard' one is crafted. By default, this
				70	is 0 to inhibit the printing of any From_ delimiter.
				71
				72	Note that for subobjects, no From_ line is printed.
				73	"""
				74	if unixfrom:
				75	ufrom = msg.get_unixfrom()
				76	if not ufrom:
				77	ufrom = 'From nobody ' + time.ctime(time.time())
				78	print >> self._fp, ufrom
				79	self._write(msg)
				80
				81	#
				82	# Protected interface - undocumented ;/
				83	#
				84
				85	def _write(self, msg):
				86	# We can't write the headers yet because of the following scenario:
				87	# say a multipart message includes the boundary string somewhere in
				88	# its body. We'd have to calculate the new boundary /before/ we write
				89	# the headers so that we can write the correct Content-Type:
				90	# parameter.
				91	#
				92	# The way we do this, so as to make the _handle_*() methods simpler,
				93	# is to cache any subpart writes into a StringIO. The we write the
				94	# headers and the StringIO contents. That way, subpart handlers can
				95	# Do The Right Thing, and can still modify the Content-Type: header if
				96	# necessary.
				97	oldfp = self._fp
				98	try:
				99	self._fp = sfp = StringIO()
				100	self._dispatch(msg)
				101	finally:
				102	self._fp = oldfp
				103	# Write the headers. First we see if the message object wants to
				104	# handle that itself. If not, we'll do it generically.
				105	meth = getattr(msg, '_write_headers', None)
				106	if meth is None:
				107	self._write_headers(msg)
				108	else:
				109	meth(self)
				110	self._fp.write(sfp.getvalue())
				111
				112	def _dispatch(self, msg):
				113	# Get the Content-Type: for the message, then try to dispatch to
				114	# self._handle_maintype_subtype(). If there's no handler for the full
				115	# MIME type, then dispatch to self._handle_maintype(). If that's
				116	# missing too, then dispatch to self._writeBody().
				117	ctype = msg.get_type()
				118	if ctype is None:
				119	# No Content-Type: header so try the default handler
				120	self._writeBody(msg)
				121	else:
				122	# We do have a Content-Type: header.
				123	specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
				124	meth = getattr(self, '_handle_' + specific, None)
				125	if meth is None:
				126	generic = msg.get_main_type().replace('-', '_')
				127	meth = getattr(self, '_handle_' + generic, None)
				128	if meth is None:
				129	meth = self._writeBody
				130	meth(msg)
				131
				132	#
				133	# Default handlers
				134	#
				135
				136	def _write_headers(self, msg):
				137	for h, v in msg.items():
				138	# We only write the MIME-Version: header for the outermost
				139	# container message. Unfortunately, we can't use same technique
				140	# as for the Unix-From above because we don't know when
				141	# MIME-Version: will occur.
				142	if h.lower() == 'mime-version' and not self.__first:
				143	continue
				144	# RFC 2822 says that lines SHOULD be no more than maxheaderlen
				145	# characters wide, so we're well within our rights to split long
				146	# headers.
				147	text = '%s: %s' % (h, v)
				148	if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
				149	text = self._split_header(text)
				150	print >> self._fp, text
				151	# A blank line always separates headers from body
				152	print >> self._fp
				153
				154	def _split_header(self, text):
				155	maxheaderlen = self.__maxheaderlen
				156	# Find out whether any lines in the header are really longer than
				157	# maxheaderlen characters wide. There could be continuation lines
				158	# that actually shorten it. Also, replace hard tabs with 8 spaces.
				159	lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
				160	for line in lines:
				161	if len(line) > maxheaderlen:
				162	break
				163	else:
				164	# No line was actually longer than maxheaderlen characters, so
				165	# just return the original unchanged.
				166	return text
				167	rtn = []
				168	for line in text.split('\n'):
				169	# Short lines can remain unchanged
				170	if len(line.replace('\t', SPACE8)) <= maxheaderlen:
				171	rtn.append(line)
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	172	SEMINLTAB.join(rtn)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	173	else:
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	174	oldlen = len(text)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	175	# Try to break the line on semicolons, but if that doesn't
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	176	# work, try to split on folding whitespace.
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	177	while len(text) > maxheaderlen:
				178	i = text.rfind(';', 0, maxheaderlen)
				179	if i < 0:
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	180	break
				181	rtn.append(text[:i])
				182	text = text[i+1:].lstrip()
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	183	if len(text) <> oldlen:
				184	# Splitting on semis worked
				185	rtn.append(text)
				186	return SEMINLTAB.join(rtn)
				187	# Splitting on semis didn't help, so try to split on
				188	# whitespace.
				189	parts = re.split(r'(\s+)', text)
				190	# Watch out though for "Header: longnonsplittableline"
				191	if parts[0].endswith(':') and len(parts) == 3:
				192	return text
				193	first = parts.pop(0)
				194	sublines = [first]
				195	acc = len(first)
				196	while parts:
				197	len0 = len(parts[0])
				198	len1 = len(parts[1])
				199	if acc + len0 + len1 < maxheaderlen:
				200	sublines.append(parts.pop(0))
				201	sublines.append(parts.pop(0))
				202	acc += len0 + len1
				203	else:
				204	# Split it here, but don't forget to ignore the
				205	# next whitespace-only part
				206	rtn.append(EMPTYSTRING.join(sublines))
				207	del parts[0]
				208	first = parts.pop(0)
				209	sublines = [first]
				210	acc = len(first)
				211	rtn.append(EMPTYSTRING.join(sublines))
				212	return NLTAB.join(rtn)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	213
				214	#
				215	# Handlers for writing types and subtypes
				216	#
				217
				218	def _handle_text(self, msg):
				219	payload = msg.get_payload()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	220	if payload is None:
				221	return
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	222	if not isinstance(payload, StringType):
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	223	raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	224	if self._mangle_from_:
				225	payload = fcre.sub('>From ', payload)
				226	self._fp.write(payload)
				227
				228	# Default body handler
				229	_writeBody = _handle_text
				230
				231	def _handle_multipart(self, msg, isdigest=0):
				232	# The trick here is to write out each part separately, merge them all
				233	# together, and then make sure that the boundary we've chosen isn't
				234	# present in the payload.
				235	msgtexts = []
				236	for part in msg.get_payload():
				237	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	238	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	239	g(part, unixfrom=0)
				240	msgtexts.append(s.getvalue())
				241	# Now make sure the boundary we've selected doesn't appear in any of
				242	# the message texts.
				243	alltext = NL.join(msgtexts)
				244	# BAW: What about boundaries that are wrapped in double-quotes?
				245	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
				246	# If we had to calculate a new boundary because the body text
				247	# contained that string, set the new boundary. We don't do it
				248	# unconditionally because, while set_boundary() preserves order, it
				249	# doesn't preserve newlines/continuations in headers. This is no big
				250	# deal in practice, but turns out to be inconvenient for the unittest
				251	# suite.
				252	if msg.get_boundary() <> boundary:
				253	msg.set_boundary(boundary)
				254	# Write out any preamble
				255	if msg.preamble is not None:
				256	self._fp.write(msg.preamble)
				257	# First boundary is a bit different; it doesn't have a leading extra
				258	# newline.
				259	print >> self._fp, '--' + boundary
				260	if isdigest:
				261	print >> self._fp
				262	# Join and write the individual parts
				263	joiner = '\n--' + boundary + '\n'
				264	if isdigest:
				265	# multipart/digest types effectively add an extra newline between
				266	# the boundary and the body part.
				267	joiner += '\n'
				268	self._fp.write(joiner.join(msgtexts))
				269	print >> self._fp, '\n--' + boundary + '--',
				270	# Write out any epilogue
				271	if msg.epilogue is not None:
Barry Warsaw	856c32b	2001-10-19 04:06:39 +0000	[diff] [blame]	272	if not msg.epilogue.startswith('\n'):
				273	print >> self._fp
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	274	self._fp.write(msg.epilogue)
				275
				276	def _handle_multipart_digest(self, msg):
				277	self._handle_multipart(msg, isdigest=1)
				278
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	279	def _handle_message_delivery_status(self, msg):
				280	# We can't just write the headers directly to self's file object
				281	# because this will leave an extra newline between the last header
				282	# block and the boundary. Sigh.
				283	blocks = []
				284	for part in msg.get_payload():
				285	s = StringIO()
				286	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
				287	g(part, unixfrom=0)
				288	text = s.getvalue()
				289	lines = text.split('\n')
				290	# Strip off the unnecessary trailing empty line
				291	if lines and lines[-1] == '':
				292	blocks.append(NL.join(lines[:-1]))
				293	else:
				294	blocks.append(text)
				295	# Now join all the blocks with an empty line. This has the lovely
				296	# effect of separating each block with an empty line, but not adding
				297	# an extra one after the last one.
				298	self._fp.write(NL.join(blocks))
				299
				300	def _handle_message(self, msg):
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	301	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	302	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	303	# A message/rfc822 should contain a scalar payload which is another
				304	# Message object. Extract that object, stringify it, and write that
				305	# out.
				306	g(msg.get_payload(), unixfrom=0)
				307	self._fp.write(s.getvalue())
				308
				309
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	310
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	311	class DecodedGenerator(Generator):
				312	"""Generator a text representation of a message.
				313
				314	Like the Generator base class, except that non-text parts are substituted
				315	with a format string representing the part.
				316	"""
				317	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
				318	"""Like Generator.__init__() except that an additional optional
				319	argument is allowed.
				320
				321	Walks through all subparts of a message. If the subpart is of main
				322	type `text', then it prints the decoded payload of the subpart.
				323
				324	Otherwise, fmt is a format string that is used instead of the message
				325	payload. fmt is expanded with the following keywords (in
				326	%(keyword)s format):
				327
				328	type : Full MIME type of the non-text part
				329	maintype : Main MIME type of the non-text part
				330	subtype : Sub-MIME type of the non-text part
				331	filename : Filename of the non-text part
				332	description: Description associated with the non-text part
				333	encoding : Content transfer encoding of the non-text part
				334
				335	The default value for fmt is None, meaning
				336
				337	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
				338	"""
				339	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
				340	if fmt is None:
				341	fmt = ('[Non-text (%(type)s) part of message omitted, '
				342	'filename %(filename)s]')
				343	self._fmt = fmt
				344
				345	def _dispatch(self, msg):
				346	for part in msg.walk():
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	347	maintype = part.get_main_type('text')
				348	if maintype == 'text':
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	349	print >> self, part.get_payload(decode=1)
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	350	elif maintype == 'multipart':
				351	# Just skip this
				352	pass
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	353	else:
				354	print >> self, self._fmt % {
				355	'type' : part.get_type('[no MIME type]'),
				356	'maintype' : part.get_main_type('[no main MIME type]'),
				357	'subtype' : part.get_subtype('[no sub-MIME type]'),
				358	'filename' : part.get_filename('[no filename]'),
				359	'description': part.get('Content-Description',
				360	'[no description]'),
				361	'encoding' : part.get('Content-Transfer-Encoding',
				362	'[no encoding]'),
				363	}
				364
				365
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	366
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	367	# Helper
				368	def _make_boundary(self, text=None):
				369	# Craft a random boundary. If text is given, ensure that the chosen
				370	# boundary doesn't appear in the text.
				371	boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
				372	if text is None:
				373	return boundary
				374	b = boundary
				375	counter = 0
				376	while 1:
				377	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
				378	if not cre.search(text):
				379	break
				380	b = boundary + '.' + str(counter)
				381	counter += 1
				382	return b