Blame - Lib/email/Generator.py - platform/external/python/cpython3

blob: dbbcabc3fd9a7f97d1ecbe14cd4763825f047b9a [file] [log] [blame]

Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	1	# Copyright (C) 2001,2002 Python Software Foundation
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	2	# Author: barry@zope.com (Barry Warsaw)
				3
				4	"""Classes to generate plain text from a message object tree.
				5	"""
				6
				7	import time
				8	import re
				9	import random
				10
				11	from types import ListType, StringType
				12	from cStringIO import StringIO
				13
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	14	EMPTYSTRING = ''
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	15	SEMISPACE = '; '
				16	BAR = '\|'
				17	UNDERSCORE = '_'
				18	NL = '\n'
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	19	NLTAB = '\n\t'
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	20	SEMINLTAB = ';\n\t'
				21	SPACE8 = ' ' * 8
				22
				23	fcre = re.compile(r'^From ', re.MULTILINE)
				24
				25
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	26
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	27	class Generator:
				28	"""Generates output from a Message object tree.
				29
				30	This basic generator writes the message to the given file object as plain
				31	text.
				32	"""
				33	#
				34	# Public interface
				35	#
				36
				37	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
				38	"""Create the generator for message flattening.
				39
				40	outfp is the output file-like object for writing the message to. It
				41	must have a write() method.
				42
				43	Optional mangle_from_ is a flag that, when true, escapes From_ lines
				44	in the body of the message by putting a `>' in front of them.
				45
				46	Optional maxheaderlen specifies the longest length for a non-continued
				47	header. When a header line is longer (in characters, with tabs
				48	expanded to 8 spaces), than maxheaderlen, the header will be broken on
				49	semicolons and continued as per RFC 2822. If no semicolon is found,
				50	then the header is left alone. Set to zero to disable wrapping
				51	headers. Default is 78, as recommended (but not required by RFC
				52	2822.
				53	"""
				54	self._fp = outfp
				55	self._mangle_from_ = mangle_from_
				56	self.__first = 1
				57	self.__maxheaderlen = maxheaderlen
				58
				59	def write(self, s):
				60	# Just delegate to the file object
				61	self._fp.write(s)
				62
				63	def __call__(self, msg, unixfrom=0):
				64	"""Print the message object tree rooted at msg to the output file
				65	specified when the Generator instance was created.
				66
				67	unixfrom is a flag that forces the printing of a Unix From_ delimiter
				68	before the first object in the message tree. If the original message
				69	has no From_ delimiter, a `standard' one is crafted. By default, this
				70	is 0 to inhibit the printing of any From_ delimiter.
				71
				72	Note that for subobjects, no From_ line is printed.
				73	"""
				74	if unixfrom:
				75	ufrom = msg.get_unixfrom()
				76	if not ufrom:
				77	ufrom = 'From nobody ' + time.ctime(time.time())
				78	print >> self._fp, ufrom
				79	self._write(msg)
				80
				81	#
				82	# Protected interface - undocumented ;/
				83	#
				84
				85	def _write(self, msg):
				86	# We can't write the headers yet because of the following scenario:
				87	# say a multipart message includes the boundary string somewhere in
				88	# its body. We'd have to calculate the new boundary /before/ we write
				89	# the headers so that we can write the correct Content-Type:
				90	# parameter.
				91	#
				92	# The way we do this, so as to make the _handle_*() methods simpler,
				93	# is to cache any subpart writes into a StringIO. The we write the
				94	# headers and the StringIO contents. That way, subpart handlers can
				95	# Do The Right Thing, and can still modify the Content-Type: header if
				96	# necessary.
				97	oldfp = self._fp
				98	try:
				99	self._fp = sfp = StringIO()
				100	self._dispatch(msg)
				101	finally:
				102	self._fp = oldfp
				103	# Write the headers. First we see if the message object wants to
				104	# handle that itself. If not, we'll do it generically.
				105	meth = getattr(msg, '_write_headers', None)
				106	if meth is None:
				107	self._write_headers(msg)
				108	else:
				109	meth(self)
				110	self._fp.write(sfp.getvalue())
				111
				112	def _dispatch(self, msg):
				113	# Get the Content-Type: for the message, then try to dispatch to
				114	# self._handle_maintype_subtype(). If there's no handler for the full
				115	# MIME type, then dispatch to self._handle_maintype(). If that's
				116	# missing too, then dispatch to self._writeBody().
				117	ctype = msg.get_type()
				118	if ctype is None:
				119	# No Content-Type: header so try the default handler
				120	self._writeBody(msg)
				121	else:
				122	# We do have a Content-Type: header.
				123	specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
				124	meth = getattr(self, '_handle_' + specific, None)
				125	if meth is None:
				126	generic = msg.get_main_type().replace('-', '_')
				127	meth = getattr(self, '_handle_' + generic, None)
				128	if meth is None:
				129	meth = self._writeBody
				130	meth(msg)
				131
				132	#
				133	# Default handlers
				134	#
				135
				136	def _write_headers(self, msg):
				137	for h, v in msg.items():
				138	# We only write the MIME-Version: header for the outermost
				139	# container message. Unfortunately, we can't use same technique
				140	# as for the Unix-From above because we don't know when
				141	# MIME-Version: will occur.
				142	if h.lower() == 'mime-version' and not self.__first:
				143	continue
				144	# RFC 2822 says that lines SHOULD be no more than maxheaderlen
				145	# characters wide, so we're well within our rights to split long
				146	# headers.
				147	text = '%s: %s' % (h, v)
				148	if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
				149	text = self._split_header(text)
				150	print >> self._fp, text
				151	# A blank line always separates headers from body
				152	print >> self._fp
				153
				154	def _split_header(self, text):
				155	maxheaderlen = self.__maxheaderlen
				156	# Find out whether any lines in the header are really longer than
				157	# maxheaderlen characters wide. There could be continuation lines
				158	# that actually shorten it. Also, replace hard tabs with 8 spaces.
				159	lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
				160	for line in lines:
				161	if len(line) > maxheaderlen:
				162	break
				163	else:
				164	# No line was actually longer than maxheaderlen characters, so
				165	# just return the original unchanged.
				166	return text
				167	rtn = []
				168	for line in text.split('\n'):
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	169	splitline = []
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	170	# Short lines can remain unchanged
				171	if len(line.replace('\t', SPACE8)) <= maxheaderlen:
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	172	splitline.append(line)
				173	rtn.append(SEMINLTAB.join(splitline))
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	174	else:
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	175	oldlen = len(line)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	176	# Try to break the line on semicolons, but if that doesn't
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	177	# work, try to split on folding whitespace.
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	178	while len(line) > maxheaderlen:
				179	i = line.rfind(';', 0, maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	180	if i < 0:
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	181	break
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	182	splitline.append(line[:i])
				183	line = line[i+1:].lstrip()
				184	if len(line) <> oldlen:
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	185	# Splitting on semis worked
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	186	splitline.append(line)
				187	rtn.append(SEMINLTAB.join(splitline))
				188	continue
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	189	# Splitting on semis didn't help, so try to split on
				190	# whitespace.
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	191	parts = re.split(r'(\s+)', line)
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	192	# Watch out though for "Header: longnonsplittableline"
				193	if parts[0].endswith(':') and len(parts) == 3:
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	194	rtn.append(line)
				195	continue
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	196	first = parts.pop(0)
				197	sublines = [first]
				198	acc = len(first)
				199	while parts:
				200	len0 = len(parts[0])
				201	len1 = len(parts[1])
				202	if acc + len0 + len1 < maxheaderlen:
				203	sublines.append(parts.pop(0))
				204	sublines.append(parts.pop(0))
				205	acc += len0 + len1
				206	else:
				207	# Split it here, but don't forget to ignore the
				208	# next whitespace-only part
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	209	splitline.append(EMPTYSTRING.join(sublines))
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	210	del parts[0]
				211	first = parts.pop(0)
				212	sublines = [first]
				213	acc = len(first)
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	214	splitline.append(EMPTYSTRING.join(sublines))
				215	rtn.append(NLTAB.join(splitline))
				216	return NL.join(rtn)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	217
				218	#
				219	# Handlers for writing types and subtypes
				220	#
				221
				222	def _handle_text(self, msg):
				223	payload = msg.get_payload()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	224	if payload is None:
				225	return
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	226	cset = msg.get_charset()
				227	if cset is not None:
				228	payload = cset.body_encode(payload)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	229	if not isinstance(payload, StringType):
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	230	raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	231	if self._mangle_from_:
				232	payload = fcre.sub('>From ', payload)
				233	self._fp.write(payload)
				234
				235	# Default body handler
				236	_writeBody = _handle_text
				237
				238	def _handle_multipart(self, msg, isdigest=0):
				239	# The trick here is to write out each part separately, merge them all
				240	# together, and then make sure that the boundary we've chosen isn't
				241	# present in the payload.
				242	msgtexts = []
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	243	subparts = msg.get_payload()
				244	if subparts is None:
				245	# Nothing has every been attached
				246	boundary = msg.get_boundary(failobj=_make_boundary())
				247	print >> self._fp, '--' + boundary
				248	print >> self._fp, '\n'
				249	print >> self._fp, '--' + boundary + '--'
				250	return
				251	elif not isinstance(subparts, ListType):
				252	# Scalar payload
				253	subparts = [subparts]
				254	for part in subparts:
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	255	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	256	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	257	g(part, unixfrom=0)
				258	msgtexts.append(s.getvalue())
				259	# Now make sure the boundary we've selected doesn't appear in any of
				260	# the message texts.
				261	alltext = NL.join(msgtexts)
				262	# BAW: What about boundaries that are wrapped in double-quotes?
				263	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
				264	# If we had to calculate a new boundary because the body text
				265	# contained that string, set the new boundary. We don't do it
				266	# unconditionally because, while set_boundary() preserves order, it
				267	# doesn't preserve newlines/continuations in headers. This is no big
				268	# deal in practice, but turns out to be inconvenient for the unittest
				269	# suite.
				270	if msg.get_boundary() <> boundary:
				271	msg.set_boundary(boundary)
				272	# Write out any preamble
				273	if msg.preamble is not None:
				274	self._fp.write(msg.preamble)
				275	# First boundary is a bit different; it doesn't have a leading extra
				276	# newline.
				277	print >> self._fp, '--' + boundary
				278	if isdigest:
				279	print >> self._fp
				280	# Join and write the individual parts
				281	joiner = '\n--' + boundary + '\n'
				282	if isdigest:
				283	# multipart/digest types effectively add an extra newline between
				284	# the boundary and the body part.
				285	joiner += '\n'
				286	self._fp.write(joiner.join(msgtexts))
				287	print >> self._fp, '\n--' + boundary + '--',
				288	# Write out any epilogue
				289	if msg.epilogue is not None:
Barry Warsaw	856c32b	2001-10-19 04:06:39 +0000	[diff] [blame]	290	if not msg.epilogue.startswith('\n'):
				291	print >> self._fp
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	292	self._fp.write(msg.epilogue)
				293
				294	def _handle_multipart_digest(self, msg):
				295	self._handle_multipart(msg, isdigest=1)
				296
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	297	def _handle_message_delivery_status(self, msg):
				298	# We can't just write the headers directly to self's file object
				299	# because this will leave an extra newline between the last header
				300	# block and the boundary. Sigh.
				301	blocks = []
				302	for part in msg.get_payload():
				303	s = StringIO()
				304	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
				305	g(part, unixfrom=0)
				306	text = s.getvalue()
				307	lines = text.split('\n')
				308	# Strip off the unnecessary trailing empty line
				309	if lines and lines[-1] == '':
				310	blocks.append(NL.join(lines[:-1]))
				311	else:
				312	blocks.append(text)
				313	# Now join all the blocks with an empty line. This has the lovely
				314	# effect of separating each block with an empty line, but not adding
				315	# an extra one after the last one.
				316	self._fp.write(NL.join(blocks))
				317
				318	def _handle_message(self, msg):
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	319	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	320	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	321	# A message/rfc822 should contain a scalar payload which is another
				322	# Message object. Extract that object, stringify it, and write that
				323	# out.
				324	g(msg.get_payload(), unixfrom=0)
				325	self._fp.write(s.getvalue())
				326
				327
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	328
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	329	class DecodedGenerator(Generator):
				330	"""Generator a text representation of a message.
				331
				332	Like the Generator base class, except that non-text parts are substituted
				333	with a format string representing the part.
				334	"""
				335	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
				336	"""Like Generator.__init__() except that an additional optional
				337	argument is allowed.
				338
				339	Walks through all subparts of a message. If the subpart is of main
				340	type `text', then it prints the decoded payload of the subpart.
				341
				342	Otherwise, fmt is a format string that is used instead of the message
				343	payload. fmt is expanded with the following keywords (in
				344	%(keyword)s format):
				345
				346	type : Full MIME type of the non-text part
				347	maintype : Main MIME type of the non-text part
				348	subtype : Sub-MIME type of the non-text part
				349	filename : Filename of the non-text part
				350	description: Description associated with the non-text part
				351	encoding : Content transfer encoding of the non-text part
				352
				353	The default value for fmt is None, meaning
				354
				355	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
				356	"""
				357	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
				358	if fmt is None:
				359	fmt = ('[Non-text (%(type)s) part of message omitted, '
				360	'filename %(filename)s]')
				361	self._fmt = fmt
				362
				363	def _dispatch(self, msg):
				364	for part in msg.walk():
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	365	maintype = part.get_main_type('text')
				366	if maintype == 'text':
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	367	print >> self, part.get_payload(decode=1)
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	368	elif maintype == 'multipart':
				369	# Just skip this
				370	pass
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	371	else:
				372	print >> self, self._fmt % {
				373	'type' : part.get_type('[no MIME type]'),
				374	'maintype' : part.get_main_type('[no main MIME type]'),
				375	'subtype' : part.get_subtype('[no sub-MIME type]'),
				376	'filename' : part.get_filename('[no filename]'),
				377	'description': part.get('Content-Description',
				378	'[no description]'),
				379	'encoding' : part.get('Content-Transfer-Encoding',
				380	'[no encoding]'),
				381	}
				382
				383
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	384
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	385	# Helper
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	386	def _make_boundary(text=None):
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	387	# Craft a random boundary. If text is given, ensure that the chosen
				388	# boundary doesn't appear in the text.
				389	boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
				390	if text is None:
				391	return boundary
				392	b = boundary
				393	counter = 0
				394	while 1:
				395	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
				396	if not cre.search(text):
				397	break
				398	b = boundary + '.' + str(counter)
				399	counter += 1
				400	return b