Blame - Lib/email/Generator.py - platform/external/python/cpython2

blob: e969d00d89ed19792f4ba48fe5ab1b3facb9e4dd [file] [log] [blame]

Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	1	# Copyright (C) 2001 Python Software Foundation
				2	# Author: barry@zope.com (Barry Warsaw)
				3
				4	"""Classes to generate plain text from a message object tree.
				5	"""
				6
				7	import time
				8	import re
				9	import random
				10
				11	from types import ListType, StringType
				12	from cStringIO import StringIO
				13
				14	# Intrapackage imports
				15	import Message
				16	import Errors
				17
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	18	EMPTYSTRING = ''
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	19	SEMISPACE = '; '
				20	BAR = '\|'
				21	UNDERSCORE = '_'
				22	NL = '\n'
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	23	NLTAB = '\n\t'
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	24	SEMINLTAB = ';\n\t'
				25	SPACE8 = ' ' * 8
				26
				27	fcre = re.compile(r'^From ', re.MULTILINE)
				28
				29
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	30
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	31	class Generator:
				32	"""Generates output from a Message object tree.
				33
				34	This basic generator writes the message to the given file object as plain
				35	text.
				36	"""
				37	#
				38	# Public interface
				39	#
				40
				41	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
				42	"""Create the generator for message flattening.
				43
				44	outfp is the output file-like object for writing the message to. It
				45	must have a write() method.
				46
				47	Optional mangle_from_ is a flag that, when true, escapes From_ lines
				48	in the body of the message by putting a `>' in front of them.
				49
				50	Optional maxheaderlen specifies the longest length for a non-continued
				51	header. When a header line is longer (in characters, with tabs
				52	expanded to 8 spaces), than maxheaderlen, the header will be broken on
				53	semicolons and continued as per RFC 2822. If no semicolon is found,
				54	then the header is left alone. Set to zero to disable wrapping
				55	headers. Default is 78, as recommended (but not required by RFC
				56	2822.
				57	"""
				58	self._fp = outfp
				59	self._mangle_from_ = mangle_from_
				60	self.__first = 1
				61	self.__maxheaderlen = maxheaderlen
				62
				63	def write(self, s):
				64	# Just delegate to the file object
				65	self._fp.write(s)
				66
				67	def __call__(self, msg, unixfrom=0):
				68	"""Print the message object tree rooted at msg to the output file
				69	specified when the Generator instance was created.
				70
				71	unixfrom is a flag that forces the printing of a Unix From_ delimiter
				72	before the first object in the message tree. If the original message
				73	has no From_ delimiter, a `standard' one is crafted. By default, this
				74	is 0 to inhibit the printing of any From_ delimiter.
				75
				76	Note that for subobjects, no From_ line is printed.
				77	"""
				78	if unixfrom:
				79	ufrom = msg.get_unixfrom()
				80	if not ufrom:
				81	ufrom = 'From nobody ' + time.ctime(time.time())
				82	print >> self._fp, ufrom
				83	self._write(msg)
				84
				85	#
				86	# Protected interface - undocumented ;/
				87	#
				88
				89	def _write(self, msg):
				90	# We can't write the headers yet because of the following scenario:
				91	# say a multipart message includes the boundary string somewhere in
				92	# its body. We'd have to calculate the new boundary /before/ we write
				93	# the headers so that we can write the correct Content-Type:
				94	# parameter.
				95	#
				96	# The way we do this, so as to make the _handle_*() methods simpler,
				97	# is to cache any subpart writes into a StringIO. The we write the
				98	# headers and the StringIO contents. That way, subpart handlers can
				99	# Do The Right Thing, and can still modify the Content-Type: header if
				100	# necessary.
				101	oldfp = self._fp
				102	try:
				103	self._fp = sfp = StringIO()
				104	self._dispatch(msg)
				105	finally:
				106	self._fp = oldfp
				107	# Write the headers. First we see if the message object wants to
				108	# handle that itself. If not, we'll do it generically.
				109	meth = getattr(msg, '_write_headers', None)
				110	if meth is None:
				111	self._write_headers(msg)
				112	else:
				113	meth(self)
				114	self._fp.write(sfp.getvalue())
				115
				116	def _dispatch(self, msg):
				117	# Get the Content-Type: for the message, then try to dispatch to
				118	# self._handle_maintype_subtype(). If there's no handler for the full
				119	# MIME type, then dispatch to self._handle_maintype(). If that's
				120	# missing too, then dispatch to self._writeBody().
				121	ctype = msg.get_type()
				122	if ctype is None:
				123	# No Content-Type: header so try the default handler
				124	self._writeBody(msg)
				125	else:
				126	# We do have a Content-Type: header.
				127	specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
				128	meth = getattr(self, '_handle_' + specific, None)
				129	if meth is None:
				130	generic = msg.get_main_type().replace('-', '_')
				131	meth = getattr(self, '_handle_' + generic, None)
				132	if meth is None:
				133	meth = self._writeBody
				134	meth(msg)
				135
				136	#
				137	# Default handlers
				138	#
				139
				140	def _write_headers(self, msg):
				141	for h, v in msg.items():
				142	# We only write the MIME-Version: header for the outermost
				143	# container message. Unfortunately, we can't use same technique
				144	# as for the Unix-From above because we don't know when
				145	# MIME-Version: will occur.
				146	if h.lower() == 'mime-version' and not self.__first:
				147	continue
				148	# RFC 2822 says that lines SHOULD be no more than maxheaderlen
				149	# characters wide, so we're well within our rights to split long
				150	# headers.
				151	text = '%s: %s' % (h, v)
				152	if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
				153	text = self._split_header(text)
				154	print >> self._fp, text
				155	# A blank line always separates headers from body
				156	print >> self._fp
				157
				158	def _split_header(self, text):
				159	maxheaderlen = self.__maxheaderlen
				160	# Find out whether any lines in the header are really longer than
				161	# maxheaderlen characters wide. There could be continuation lines
				162	# that actually shorten it. Also, replace hard tabs with 8 spaces.
				163	lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
				164	for line in lines:
				165	if len(line) > maxheaderlen:
				166	break
				167	else:
				168	# No line was actually longer than maxheaderlen characters, so
				169	# just return the original unchanged.
				170	return text
				171	rtn = []
				172	for line in text.split('\n'):
				173	# Short lines can remain unchanged
				174	if len(line.replace('\t', SPACE8)) <= maxheaderlen:
				175	rtn.append(line)
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	176	SEMINLTAB.join(rtn)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	177	else:
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	178	oldlen = len(text)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	179	# Try to break the line on semicolons, but if that doesn't
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	180	# work, try to split on folding whitespace.
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	181	while len(text) > maxheaderlen:
				182	i = text.rfind(';', 0, maxheaderlen)
				183	if i < 0:
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	184	break
				185	rtn.append(text[:i])
				186	text = text[i+1:].lstrip()
Barry Warsaw	d1eeecb	2001-10-17 20:51:42 +0000	[diff] [blame]	187	if len(text) <> oldlen:
				188	# Splitting on semis worked
				189	rtn.append(text)
				190	return SEMINLTAB.join(rtn)
				191	# Splitting on semis didn't help, so try to split on
				192	# whitespace.
				193	parts = re.split(r'(\s+)', text)
				194	# Watch out though for "Header: longnonsplittableline"
				195	if parts[0].endswith(':') and len(parts) == 3:
				196	return text
				197	first = parts.pop(0)
				198	sublines = [first]
				199	acc = len(first)
				200	while parts:
				201	len0 = len(parts[0])
				202	len1 = len(parts[1])
				203	if acc + len0 + len1 < maxheaderlen:
				204	sublines.append(parts.pop(0))
				205	sublines.append(parts.pop(0))
				206	acc += len0 + len1
				207	else:
				208	# Split it here, but don't forget to ignore the
				209	# next whitespace-only part
				210	rtn.append(EMPTYSTRING.join(sublines))
				211	del parts[0]
				212	first = parts.pop(0)
				213	sublines = [first]
				214	acc = len(first)
				215	rtn.append(EMPTYSTRING.join(sublines))
				216	return NLTAB.join(rtn)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	217
				218	#
				219	# Handlers for writing types and subtypes
				220	#
				221
				222	def _handle_text(self, msg):
				223	payload = msg.get_payload()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	224	if payload is None:
				225	return
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	226	if not isinstance(payload, StringType):
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	227	raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	228	if self._mangle_from_:
				229	payload = fcre.sub('>From ', payload)
				230	self._fp.write(payload)
				231
				232	# Default body handler
				233	_writeBody = _handle_text
				234
				235	def _handle_multipart(self, msg, isdigest=0):
				236	# The trick here is to write out each part separately, merge them all
				237	# together, and then make sure that the boundary we've chosen isn't
				238	# present in the payload.
				239	msgtexts = []
				240	for part in msg.get_payload():
				241	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	242	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	243	g(part, unixfrom=0)
				244	msgtexts.append(s.getvalue())
				245	# Now make sure the boundary we've selected doesn't appear in any of
				246	# the message texts.
				247	alltext = NL.join(msgtexts)
				248	# BAW: What about boundaries that are wrapped in double-quotes?
				249	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
				250	# If we had to calculate a new boundary because the body text
				251	# contained that string, set the new boundary. We don't do it
				252	# unconditionally because, while set_boundary() preserves order, it
				253	# doesn't preserve newlines/continuations in headers. This is no big
				254	# deal in practice, but turns out to be inconvenient for the unittest
				255	# suite.
				256	if msg.get_boundary() <> boundary:
				257	msg.set_boundary(boundary)
				258	# Write out any preamble
				259	if msg.preamble is not None:
				260	self._fp.write(msg.preamble)
				261	# First boundary is a bit different; it doesn't have a leading extra
				262	# newline.
				263	print >> self._fp, '--' + boundary
				264	if isdigest:
				265	print >> self._fp
				266	# Join and write the individual parts
				267	joiner = '\n--' + boundary + '\n'
				268	if isdigest:
				269	# multipart/digest types effectively add an extra newline between
				270	# the boundary and the body part.
				271	joiner += '\n'
				272	self._fp.write(joiner.join(msgtexts))
				273	print >> self._fp, '\n--' + boundary + '--',
				274	# Write out any epilogue
				275	if msg.epilogue is not None:
Barry Warsaw	856c32b	2001-10-19 04:06:39 +0000	[diff] [blame^]	276	if not msg.epilogue.startswith('\n'):
				277	print >> self._fp
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	278	self._fp.write(msg.epilogue)
				279
				280	def _handle_multipart_digest(self, msg):
				281	self._handle_multipart(msg, isdigest=1)
				282
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	283	def _handle_message_delivery_status(self, msg):
				284	# We can't just write the headers directly to self's file object
				285	# because this will leave an extra newline between the last header
				286	# block and the boundary. Sigh.
				287	blocks = []
				288	for part in msg.get_payload():
				289	s = StringIO()
				290	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
				291	g(part, unixfrom=0)
				292	text = s.getvalue()
				293	lines = text.split('\n')
				294	# Strip off the unnecessary trailing empty line
				295	if lines and lines[-1] == '':
				296	blocks.append(NL.join(lines[:-1]))
				297	else:
				298	blocks.append(text)
				299	# Now join all the blocks with an empty line. This has the lovely
				300	# effect of separating each block with an empty line, but not adding
				301	# an extra one after the last one.
				302	self._fp.write(NL.join(blocks))
				303
				304	def _handle_message(self, msg):
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	305	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	306	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	307	# A message/rfc822 should contain a scalar payload which is another
				308	# Message object. Extract that object, stringify it, and write that
				309	# out.
				310	g(msg.get_payload(), unixfrom=0)
				311	self._fp.write(s.getvalue())
				312
				313
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	314
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	315	class DecodedGenerator(Generator):
				316	"""Generator a text representation of a message.
				317
				318	Like the Generator base class, except that non-text parts are substituted
				319	with a format string representing the part.
				320	"""
				321	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
				322	"""Like Generator.__init__() except that an additional optional
				323	argument is allowed.
				324
				325	Walks through all subparts of a message. If the subpart is of main
				326	type `text', then it prints the decoded payload of the subpart.
				327
				328	Otherwise, fmt is a format string that is used instead of the message
				329	payload. fmt is expanded with the following keywords (in
				330	%(keyword)s format):
				331
				332	type : Full MIME type of the non-text part
				333	maintype : Main MIME type of the non-text part
				334	subtype : Sub-MIME type of the non-text part
				335	filename : Filename of the non-text part
				336	description: Description associated with the non-text part
				337	encoding : Content transfer encoding of the non-text part
				338
				339	The default value for fmt is None, meaning
				340
				341	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
				342	"""
				343	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
				344	if fmt is None:
				345	fmt = ('[Non-text (%(type)s) part of message omitted, '
				346	'filename %(filename)s]')
				347	self._fmt = fmt
				348
				349	def _dispatch(self, msg):
				350	for part in msg.walk():
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	351	maintype = part.get_main_type('text')
				352	if maintype == 'text':
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	353	print >> self, part.get_payload(decode=1)
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame]	354	elif maintype == 'multipart':
				355	# Just skip this
				356	pass
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	357	else:
				358	print >> self, self._fmt % {
				359	'type' : part.get_type('[no MIME type]'),
				360	'maintype' : part.get_main_type('[no main MIME type]'),
				361	'subtype' : part.get_subtype('[no sub-MIME type]'),
				362	'filename' : part.get_filename('[no filename]'),
				363	'description': part.get('Content-Description',
				364	'[no description]'),
				365	'encoding' : part.get('Content-Transfer-Encoding',
				366	'[no encoding]'),
				367	}
				368
				369
Barry Warsaw	e968ead	2001-10-04 17:05:11 +0000	[diff] [blame]	370
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	371	# Helper
				372	def _make_boundary(self, text=None):
				373	# Craft a random boundary. If text is given, ensure that the chosen
				374	# boundary doesn't appear in the text.
				375	boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
				376	if text is None:
				377	return boundary
				378	b = boundary
				379	counter = 0
				380	while 1:
				381	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
				382	if not cre.search(text):
				383	break
				384	b = boundary + '.' + str(counter)
				385	counter += 1
				386	return b