Blame - Lib/email/Generator.py - platform/external/python/cpython2

blob: c31dc803ec7de694a7ead0af14b34fe138f9b374 [file] [log] [blame]

Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	1	# Copyright (C) 2001 Python Software Foundation
				2	# Author: barry@zope.com (Barry Warsaw)
				3
				4	"""Classes to generate plain text from a message object tree.
				5	"""
				6
				7	import time
				8	import re
				9	import random
				10
				11	from types import ListType, StringType
				12	from cStringIO import StringIO
				13
				14	# Intrapackage imports
				15	import Message
				16	import Errors
				17
				18	SEMISPACE = '; '
				19	BAR = '\|'
				20	UNDERSCORE = '_'
				21	NL = '\n'
				22	SEMINLTAB = ';\n\t'
				23	SPACE8 = ' ' * 8
				24
				25	fcre = re.compile(r'^From ', re.MULTILINE)
				26
				27
				28
				29	class Generator:
				30	"""Generates output from a Message object tree.
				31
				32	This basic generator writes the message to the given file object as plain
				33	text.
				34	"""
				35	#
				36	# Public interface
				37	#
				38
				39	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
				40	"""Create the generator for message flattening.
				41
				42	outfp is the output file-like object for writing the message to. It
				43	must have a write() method.
				44
				45	Optional mangle_from_ is a flag that, when true, escapes From_ lines
				46	in the body of the message by putting a `>' in front of them.
				47
				48	Optional maxheaderlen specifies the longest length for a non-continued
				49	header. When a header line is longer (in characters, with tabs
				50	expanded to 8 spaces), than maxheaderlen, the header will be broken on
				51	semicolons and continued as per RFC 2822. If no semicolon is found,
				52	then the header is left alone. Set to zero to disable wrapping
				53	headers. Default is 78, as recommended (but not required by RFC
				54	2822.
				55	"""
				56	self._fp = outfp
				57	self._mangle_from_ = mangle_from_
				58	self.__first = 1
				59	self.__maxheaderlen = maxheaderlen
				60
				61	def write(self, s):
				62	# Just delegate to the file object
				63	self._fp.write(s)
				64
				65	def __call__(self, msg, unixfrom=0):
				66	"""Print the message object tree rooted at msg to the output file
				67	specified when the Generator instance was created.
				68
				69	unixfrom is a flag that forces the printing of a Unix From_ delimiter
				70	before the first object in the message tree. If the original message
				71	has no From_ delimiter, a `standard' one is crafted. By default, this
				72	is 0 to inhibit the printing of any From_ delimiter.
				73
				74	Note that for subobjects, no From_ line is printed.
				75	"""
				76	if unixfrom:
				77	ufrom = msg.get_unixfrom()
				78	if not ufrom:
				79	ufrom = 'From nobody ' + time.ctime(time.time())
				80	print >> self._fp, ufrom
				81	self._write(msg)
				82
				83	#
				84	# Protected interface - undocumented ;/
				85	#
				86
				87	def _write(self, msg):
				88	# We can't write the headers yet because of the following scenario:
				89	# say a multipart message includes the boundary string somewhere in
				90	# its body. We'd have to calculate the new boundary /before/ we write
				91	# the headers so that we can write the correct Content-Type:
				92	# parameter.
				93	#
				94	# The way we do this, so as to make the _handle_*() methods simpler,
				95	# is to cache any subpart writes into a StringIO. The we write the
				96	# headers and the StringIO contents. That way, subpart handlers can
				97	# Do The Right Thing, and can still modify the Content-Type: header if
				98	# necessary.
				99	oldfp = self._fp
				100	try:
				101	self._fp = sfp = StringIO()
				102	self._dispatch(msg)
				103	finally:
				104	self._fp = oldfp
				105	# Write the headers. First we see if the message object wants to
				106	# handle that itself. If not, we'll do it generically.
				107	meth = getattr(msg, '_write_headers', None)
				108	if meth is None:
				109	self._write_headers(msg)
				110	else:
				111	meth(self)
				112	self._fp.write(sfp.getvalue())
				113
				114	def _dispatch(self, msg):
				115	# Get the Content-Type: for the message, then try to dispatch to
				116	# self._handle_maintype_subtype(). If there's no handler for the full
				117	# MIME type, then dispatch to self._handle_maintype(). If that's
				118	# missing too, then dispatch to self._writeBody().
				119	ctype = msg.get_type()
				120	if ctype is None:
				121	# No Content-Type: header so try the default handler
				122	self._writeBody(msg)
				123	else:
				124	# We do have a Content-Type: header.
				125	specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
				126	meth = getattr(self, '_handle_' + specific, None)
				127	if meth is None:
				128	generic = msg.get_main_type().replace('-', '_')
				129	meth = getattr(self, '_handle_' + generic, None)
				130	if meth is None:
				131	meth = self._writeBody
				132	meth(msg)
				133
				134	#
				135	# Default handlers
				136	#
				137
				138	def _write_headers(self, msg):
				139	for h, v in msg.items():
				140	# We only write the MIME-Version: header for the outermost
				141	# container message. Unfortunately, we can't use same technique
				142	# as for the Unix-From above because we don't know when
				143	# MIME-Version: will occur.
				144	if h.lower() == 'mime-version' and not self.__first:
				145	continue
				146	# RFC 2822 says that lines SHOULD be no more than maxheaderlen
				147	# characters wide, so we're well within our rights to split long
				148	# headers.
				149	text = '%s: %s' % (h, v)
				150	if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
				151	text = self._split_header(text)
				152	print >> self._fp, text
				153	# A blank line always separates headers from body
				154	print >> self._fp
				155
				156	def _split_header(self, text):
				157	maxheaderlen = self.__maxheaderlen
				158	# Find out whether any lines in the header are really longer than
				159	# maxheaderlen characters wide. There could be continuation lines
				160	# that actually shorten it. Also, replace hard tabs with 8 spaces.
				161	lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
				162	for line in lines:
				163	if len(line) > maxheaderlen:
				164	break
				165	else:
				166	# No line was actually longer than maxheaderlen characters, so
				167	# just return the original unchanged.
				168	return text
				169	rtn = []
				170	for line in text.split('\n'):
				171	# Short lines can remain unchanged
				172	if len(line.replace('\t', SPACE8)) <= maxheaderlen:
				173	rtn.append(line)
				174	else:
				175	# Try to break the line on semicolons, but if that doesn't
				176	# work, then just leave it alone.
				177	while len(text) > maxheaderlen:
				178	i = text.rfind(';', 0, maxheaderlen)
				179	if i < 0:
				180	rtn.append(text)
				181	break
				182	rtn.append(text[:i])
				183	text = text[i+1:].lstrip()
				184	rtn.append(text)
				185	return SEMINLTAB.join(rtn)
				186
				187	#
				188	# Handlers for writing types and subtypes
				189	#
				190
				191	def _handle_text(self, msg):
				192	payload = msg.get_payload()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	193	if payload is None:
				194	return
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	195	if not isinstance(payload, StringType):
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	196	raise TypeError, 'string payload expected: %s' % type(payload)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	197	if self._mangle_from_:
				198	payload = fcre.sub('>From ', payload)
				199	self._fp.write(payload)
				200
				201	# Default body handler
				202	_writeBody = _handle_text
				203
				204	def _handle_multipart(self, msg, isdigest=0):
				205	# The trick here is to write out each part separately, merge them all
				206	# together, and then make sure that the boundary we've chosen isn't
				207	# present in the payload.
				208	msgtexts = []
				209	for part in msg.get_payload():
				210	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	211	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	212	g(part, unixfrom=0)
				213	msgtexts.append(s.getvalue())
				214	# Now make sure the boundary we've selected doesn't appear in any of
				215	# the message texts.
				216	alltext = NL.join(msgtexts)
				217	# BAW: What about boundaries that are wrapped in double-quotes?
				218	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
				219	# If we had to calculate a new boundary because the body text
				220	# contained that string, set the new boundary. We don't do it
				221	# unconditionally because, while set_boundary() preserves order, it
				222	# doesn't preserve newlines/continuations in headers. This is no big
				223	# deal in practice, but turns out to be inconvenient for the unittest
				224	# suite.
				225	if msg.get_boundary() <> boundary:
				226	msg.set_boundary(boundary)
				227	# Write out any preamble
				228	if msg.preamble is not None:
				229	self._fp.write(msg.preamble)
				230	# First boundary is a bit different; it doesn't have a leading extra
				231	# newline.
				232	print >> self._fp, '--' + boundary
				233	if isdigest:
				234	print >> self._fp
				235	# Join and write the individual parts
				236	joiner = '\n--' + boundary + '\n'
				237	if isdigest:
				238	# multipart/digest types effectively add an extra newline between
				239	# the boundary and the body part.
				240	joiner += '\n'
				241	self._fp.write(joiner.join(msgtexts))
				242	print >> self._fp, '\n--' + boundary + '--',
				243	# Write out any epilogue
				244	if msg.epilogue is not None:
				245	self._fp.write(msg.epilogue)
				246
				247	def _handle_multipart_digest(self, msg):
				248	self._handle_multipart(msg, isdigest=1)
				249
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	250	def _handle_message_delivery_status(self, msg):
				251	# We can't just write the headers directly to self's file object
				252	# because this will leave an extra newline between the last header
				253	# block and the boundary. Sigh.
				254	blocks = []
				255	for part in msg.get_payload():
				256	s = StringIO()
				257	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
				258	g(part, unixfrom=0)
				259	text = s.getvalue()
				260	lines = text.split('\n')
				261	# Strip off the unnecessary trailing empty line
				262	if lines and lines[-1] == '':
				263	blocks.append(NL.join(lines[:-1]))
				264	else:
				265	blocks.append(text)
				266	# Now join all the blocks with an empty line. This has the lovely
				267	# effect of separating each block with an empty line, but not adding
				268	# an extra one after the last one.
				269	self._fp.write(NL.join(blocks))
				270
				271	def _handle_message(self, msg):
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	272	s = StringIO()
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	273	g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	274	# A message/rfc822 should contain a scalar payload which is another
				275	# Message object. Extract that object, stringify it, and write that
				276	# out.
				277	g(msg.get_payload(), unixfrom=0)
				278	self._fp.write(s.getvalue())
				279
				280
				281
				282	class DecodedGenerator(Generator):
				283	"""Generator a text representation of a message.
				284
				285	Like the Generator base class, except that non-text parts are substituted
				286	with a format string representing the part.
				287	"""
				288	def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
				289	"""Like Generator.__init__() except that an additional optional
				290	argument is allowed.
				291
				292	Walks through all subparts of a message. If the subpart is of main
				293	type `text', then it prints the decoded payload of the subpart.
				294
				295	Otherwise, fmt is a format string that is used instead of the message
				296	payload. fmt is expanded with the following keywords (in
				297	%(keyword)s format):
				298
				299	type : Full MIME type of the non-text part
				300	maintype : Main MIME type of the non-text part
				301	subtype : Sub-MIME type of the non-text part
				302	filename : Filename of the non-text part
				303	description: Description associated with the non-text part
				304	encoding : Content transfer encoding of the non-text part
				305
				306	The default value for fmt is None, meaning
				307
				308	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
				309	"""
				310	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
				311	if fmt is None:
				312	fmt = ('[Non-text (%(type)s) part of message omitted, '
				313	'filename %(filename)s]')
				314	self._fmt = fmt
				315
				316	def _dispatch(self, msg):
				317	for part in msg.walk():
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	318	maintype = part.get_main_type('text')
				319	if maintype == 'text':
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	320	print >> self, part.get_payload(decode=1)
Barry Warsaw	b384e01	2001-09-26 05:32:41 +0000	[diff] [blame^]	321	elif maintype == 'multipart':
				322	# Just skip this
				323	pass
Barry Warsaw	ba92580	2001-09-23 03:17:28 +0000	[diff] [blame]	324	else:
				325	print >> self, self._fmt % {
				326	'type' : part.get_type('[no MIME type]'),
				327	'maintype' : part.get_main_type('[no main MIME type]'),
				328	'subtype' : part.get_subtype('[no sub-MIME type]'),
				329	'filename' : part.get_filename('[no filename]'),
				330	'description': part.get('Content-Description',
				331	'[no description]'),
				332	'encoding' : part.get('Content-Transfer-Encoding',
				333	'[no encoding]'),
				334	}
				335
				336
				337
				338	# Helper
				339	def _make_boundary(self, text=None):
				340	# Craft a random boundary. If text is given, ensure that the chosen
				341	# boundary doesn't appear in the text.
				342	boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
				343	if text is None:
				344	return boundary
				345	b = boundary
				346	counter = 0
				347	while 1:
				348	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
				349	if not cre.search(text):
				350	break
				351	b = boundary + '.' + str(counter)
				352	counter += 1
				353	return b