Blame - Lib/rfc822.py - platform/external/python/cpython3

blob: 292b3c96ee5498ecd98a01510fc73363da56bcd8 [file] [log] [blame]

Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	1	"""RFC-822 message manipulation class.
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	2
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	3	XXX This is only a very rough sketch of a full RFC-822 parser;
				4	in particular the tokenizing of addresses does not adhere to all the
				5	quoting rules.
				6
				7	Directions for use:
				8
				9	To create a Message object: first open a file, e.g.:
				10	fp = open(file, 'r')
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	11	You can use any other legal way of getting an open file object, e.g. use
				12	sys.stdin or call os.popen().
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	13	Then pass the open file object to the Message() constructor:
				14	m = Message(fp)
				15
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	16	This class can work with any input object that supports a readline
				17	method. If the input object has seek and tell capability, the
				18	rewindbody method will work; also illegal lines will be pushed back
				19	onto the input stream. If the input object lacks seek but has an
				20	`unread' method that can push back a line of input, Message will use
				21	that to push back illegal lines. Thus this class can be used to parse
				22	messages coming from a buffered stream.
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	23
				24	The optional `seekable' argument is provided as a workaround for
				25	certain stdio libraries in which tell() discards buffered data before
				26	discovering that the lseek() system call doesn't work. For maximum
				27	portability, you should set the seekable argument to zero to prevent
				28	that initial \code{tell} when passing in an unseekable object such as
				29	a a file object created from a socket object. If it is 1 on entry --
				30	which it is by default -- the tell() method of the open file object is
				31	called once; if this raises an exception, seekable is reset to 0. For
				32	other nonzero values of seekable, this test is not made.
				33
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	34	To get the text of a particular header there are several methods:
				35	str = m.getheader(name)
				36	str = m.getrawheader(name)
				37	where name is the name of the header, e.g. 'Subject'.
				38	The difference is that getheader() strips the leading and trailing
				39	whitespace, while getrawheader() doesn't. Both functions retain
				40	embedded whitespace (including newlines) exactly as they are
				41	specified in the header, and leave the case of the text unchanged.
				42
				43	For addresses and address lists there are functions
				44	realname, mailaddress = m.getaddr(name) and
				45	list = m.getaddrlist(name)
				46	where the latter returns a list of (realname, mailaddr) tuples.
				47
				48	There is also a method
				49	time = m.getdate(name)
				50	which parses a Date-like field and returns a time-compatible tuple,
				51	i.e. a tuple such as returned by time.localtime() or accepted by
				52	time.mktime().
				53
				54	See the class definition for lower level access methods.
				55
				56	There are also some utility functions here.
				57	"""
Guido van Rossum	4d4ab92	1998-06-16 22:27:09 +0000	[diff] [blame]	58	# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	59
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	60	import string
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	61	import time
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	62
				63
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	64	_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum	92457b9	1995-06-22 19:06:57 +0000	[diff] [blame]	65
				66
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	67	class Message:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	68	"""Represents a single RFC-822-compliant message."""
				69
				70	def __init__(self, fp, seekable = 1):
				71	"""Initialize the class instance and read the headers."""
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	72	if seekable == 1:
				73	# Exercise tell() to make sure it works
				74	# (and then assume seek() works, too)
				75	try:
				76	fp.tell()
				77	except:
				78	seekable = 0
				79	else:
				80	seekable = 1
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	81	self.fp = fp
				82	self.seekable = seekable
				83	self.startofheaders = None
				84	self.startofbody = None
				85	#
				86	if self.seekable:
				87	try:
				88	self.startofheaders = self.fp.tell()
				89	except IOError:
				90	self.seekable = 0
				91	#
				92	self.readheaders()
				93	#
				94	if self.seekable:
				95	try:
				96	self.startofbody = self.fp.tell()
				97	except IOError:
				98	self.seekable = 0
				99
				100	def rewindbody(self):
				101	"""Rewind the file to the start of the body (if seekable)."""
				102	if not self.seekable:
				103	raise IOError, "unseekable file"
				104	self.fp.seek(self.startofbody)
				105
				106	def readheaders(self):
				107	"""Read header lines.
				108
				109	Read header lines up to the entirely blank line that
				110	terminates them. The (normally blank) line that ends the
				111	headers is skipped, but not included in the returned list.
				112	If a non-header line ends the headers, (which is an error),
				113	an attempt is made to backspace over it; it is never
				114	included in the returned list.
				115
				116	The variable self.status is set to the empty string if all
				117	went well, otherwise it is an error message.
				118	The variable self.headers is a completely uninterpreted list
				119	of lines contained in the header (so printing them will
				120	reproduce the header exactly as it appears in the file).
				121	"""
				122	self.dict = {}
				123	self.unixfrom = ''
				124	self.headers = list = []
				125	self.status = ''
				126	headerseen = ""
				127	firstline = 1
Guido van Rossum	052969a	1998-07-21 14:24:04 +0000	[diff] [blame]	128	startofline = unread = tell = None
				129	if hasattr(self.fp, 'unread'):
				130	unread = self.fp.unread
				131	elif self.seekable:
				132	tell = self.fp.tell
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	133	while 1:
Guido van Rossum	052969a	1998-07-21 14:24:04 +0000	[diff] [blame]	134	if tell:
				135	startofline = tell()
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	136	line = self.fp.readline()
				137	if not line:
				138	self.status = 'EOF in headers'
				139	break
				140	# Skip unix From name time lines
				141	if firstline and line[:5] == 'From ':
				142	self.unixfrom = self.unixfrom + line
				143	continue
				144	firstline = 0
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	145	if headerseen and line[0] in ' \t':
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	146	# It's a continuation line.
				147	list.append(line)
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	148	x = (self.dict[headerseen] + "\n " + string.strip(line))
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	149	self.dict[headerseen] = string.strip(x)
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	150	continue
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	151	elif self.iscomment(line):
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	152	# It's a comment. Ignore it.
				153	continue
				154	elif self.islast(line):
				155	# Note! No pushback here! The delimiter line gets eaten.
				156	break
				157	headerseen = self.isheader(line)
				158	if headerseen:
				159	# It's a legal header line, save it.
				160	list.append(line)
				161	self.dict[headerseen] = string.strip(line[len(headerseen)+2:])
				162	continue
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	163	else:
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	164	# It's not a header line; throw it back and stop here.
				165	if not self.dict:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	166	self.status = 'No headers'
				167	else:
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	168	self.status = 'Non-header line where header expected'
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	169	# Try to undo the read.
Guido van Rossum	052969a	1998-07-21 14:24:04 +0000	[diff] [blame]	170	if unread:
				171	unread(line)
				172	elif tell:
				173	self.fp.seek(startofline)
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	174	else:
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	175	self.status = self.status + '; bad seek'
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	176	break
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	177
				178	def isheader(self, line):
				179	"""Determine whether a given line is a legal header.
				180
				181	This method should return the header name, suitably canonicalized.
				182	You may override this method in order to use Message parsing
				183	on tagged data in RFC822-like formats with special header formats.
				184	"""
				185	i = string.find(line, ':')
				186	if i > 0:
				187	return string.lower(line[:i])
				188	else:
				189	return None
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	190
				191	def islast(self, line):
				192	"""Determine whether a line is a legal end of RFC-822 headers.
				193
				194	You may override this method if your application wants
				195	to bend the rules, e.g. to strip trailing whitespace,
				196	or to recognise MH template separators ('--------').
				197	For convenience (e.g. for code reading from sockets) a
				198	line consisting of \r\n also matches.
				199	"""
				200	return line in _blanklines
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	201
				202	def iscomment(self, line):
				203	"""Determine whether a line should be skipped entirely.
				204
				205	You may override this method in order to use Message parsing
				206	on tagged data in RFC822-like formats that support embedded
				207	comments or free-text data.
				208	"""
				209	return None
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	210
				211	def getallmatchingheaders(self, name):
				212	"""Find all header lines matching a given header name.
				213
				214	Look through the list of headers and find all lines
				215	matching a given header name (and their continuation
				216	lines). A list of the lines is returned, without
				217	interpretation. If the header does not occur, an
				218	empty list is returned. If the header occurs multiple
				219	times, all occurrences are returned. Case is not
				220	important in the header name.
				221	"""
				222	name = string.lower(name) + ':'
				223	n = len(name)
				224	list = []
				225	hit = 0
				226	for line in self.headers:
				227	if string.lower(line[:n]) == name:
				228	hit = 1
				229	elif line[:1] not in string.whitespace:
				230	hit = 0
				231	if hit:
				232	list.append(line)
				233	return list
				234
				235	def getfirstmatchingheader(self, name):
				236	"""Get the first header line matching name.
				237
				238	This is similar to getallmatchingheaders, but it returns
				239	only the first matching header (and its continuation
				240	lines).
				241	"""
				242	name = string.lower(name) + ':'
				243	n = len(name)
				244	list = []
				245	hit = 0
				246	for line in self.headers:
				247	if hit:
				248	if line[:1] not in string.whitespace:
				249	break
				250	elif string.lower(line[:n]) == name:
				251	hit = 1
				252	if hit:
				253	list.append(line)
				254	return list
				255
				256	def getrawheader(self, name):
				257	"""A higher-level interface to getfirstmatchingheader().
				258
				259	Return a string containing the literal text of the
				260	header but with the keyword stripped. All leading,
				261	trailing and embedded whitespace is kept in the
				262	string, however.
				263	Return None if the header does not occur.
				264	"""
				265
				266	list = self.getfirstmatchingheader(name)
				267	if not list:
				268	return None
				269	list[0] = list[0][len(name) + 1:]
				270	return string.joinfields(list, '')
				271
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	272	def getheader(self, name, default=None):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	273	"""Get the header value for a name.
				274
				275	This is the normal interface: it return a stripped
				276	version of the header value for a given header name,
				277	or None if it doesn't exist. This uses the dictionary
				278	version which finds the last such header.
				279	"""
				280	try:
				281	return self.dict[string.lower(name)]
				282	except KeyError:
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	283	return default
				284	get = getheader
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	285
				286	def getaddr(self, name):
				287	"""Get a single address from a header, as a tuple.
				288
				289	An example return value:
				290	('Guido van Rossum', 'guido@cwi.nl')
				291	"""
				292	# New, by Ben Escoto
				293	alist = self.getaddrlist(name)
				294	if alist:
				295	return alist[0]
				296	else:
				297	return (None, None)
				298
				299	def getaddrlist(self, name):
				300	"""Get a list of addresses from a header.
Barry Warsaw	8a57843	1999-01-14 19:59:58 +0000	[diff] [blame]	301
				302	Retrieves a list of addresses from a header, where each address is a
				303	tuple as returned by getaddr(). Scans all named headers, so it works
				304	properly with multiple To: or Cc: headers for example.
				305
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	306	"""
Barry Warsaw	8a57843	1999-01-14 19:59:58 +0000	[diff] [blame]	307	raw = []
				308	for h in self.getallmatchingheaders(name):
				309	if h[0] in ' \t':
				310	raw.append(h)
				311	else:
				312	if raw:
				313	raw.append(', ')
				314	i = string.find(h, ':')
				315	if i > 0:
				316	addr = h[i+1:]
				317	raw.append(addr)
				318	alladdrs = string.join(raw, '')
				319	a = AddrlistClass(alladdrs)
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	320	return a.getaddrlist()
				321
				322	def getdate(self, name):
				323	"""Retrieve a date field from a header.
				324
				325	Retrieves a date field from the named header, returning
				326	a tuple compatible with time.mktime().
				327	"""
				328	try:
				329	data = self[name]
				330	except KeyError:
				331	return None
				332	return parsedate(data)
				333
				334	def getdate_tz(self, name):
				335	"""Retrieve a date field from a header as a 10-tuple.
				336
				337	The first 9 elements make up a tuple compatible with
				338	time.mktime(), and the 10th is the offset of the poster's
				339	time zone from GMT/UTC.
				340	"""
				341	try:
				342	data = self[name]
				343	except KeyError:
				344	return None
				345	return parsedate_tz(data)
				346
				347
				348	# Access as a dictionary (only finds last header of each type):
				349
				350	def __len__(self):
				351	"""Get the number of headers in a message."""
				352	return len(self.dict)
				353
				354	def __getitem__(self, name):
				355	"""Get a specific header, as from a dictionary."""
				356	return self.dict[string.lower(name)]
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	357
				358	def __setitem__(self, name, value):
Guido van Rossum	4d4ab92	1998-06-16 22:27:09 +0000	[diff] [blame]	359	"""Set the value of a header.
				360
				361	Note: This is not a perfect inversion of __getitem__, because
				362	any changed headers get stuck at the end of the raw-headers list
				363	rather than where the altered header was.
				364	"""
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	365	del self[name] # Won't fail if it doesn't exist
				366	self.dict[string.lower(name)] = value
				367	text = name + ": " + value
				368	lines = string.split(text, "\n")
				369	for line in lines:
				370	self.headers.append(line + "\n")
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	371
Guido van Rossum	75d92c1	1998-04-02 21:33:20 +0000	[diff] [blame]	372	def __delitem__(self, name):
				373	"""Delete all occurrences of a specific header, if it is present."""
				374	name = string.lower(name)
				375	if not self.dict.has_key(name):
				376	return
				377	del self.dict[name]
				378	name = name + ':'
				379	n = len(name)
				380	list = []
				381	hit = 0
				382	for i in range(len(self.headers)):
				383	line = self.headers[i]
				384	if string.lower(line[:n]) == name:
				385	hit = 1
				386	elif line[:1] not in string.whitespace:
				387	hit = 0
				388	if hit:
				389	list.append(i)
				390	list.reverse()
				391	for i in list:
				392	del self.headers[i]
				393
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	394	def has_key(self, name):
				395	"""Determine whether a message contains the named header."""
				396	return self.dict.has_key(string.lower(name))
				397
				398	def keys(self):
				399	"""Get all of a message's header field names."""
				400	return self.dict.keys()
				401
				402	def values(self):
				403	"""Get all of a message's header field values."""
				404	return self.dict.values()
				405
				406	def items(self):
				407	"""Get all of a message's headers.
				408
				409	Returns a list of name, value tuples.
				410	"""
				411	return self.dict.items()
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	412
Guido van Rossum	c7bb857	1998-06-10 21:31:01 +0000	[diff] [blame]	413	def __str__(self):
				414	str = ''
				415	for hdr in self.headers:
				416	str = str + hdr
				417	return str
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	418
				419
				420	# Utility functions
				421	# -----------------
				422
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	423	# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	424	# XXX The inverses of the parse functions may also be useful.
				425
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	426
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	427	def unquote(str):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	428	"""Remove quotes from a string."""
				429	if len(str) > 1:
				430	if str[0] == '"' and str[-1:] == '"':
				431	return str[1:-1]
				432	if str[0] == '<' and str[-1:] == '>':
				433	return str[1:-1]
				434	return str
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	435
				436
Guido van Rossum	7883e1d	1997-09-15 14:12:54 +0000	[diff] [blame]	437	def quote(str):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	438	"""Add quotes around a string."""
				439	return '"%s"' % string.join(
				440	string.split(
				441	string.join(
				442	string.split(str, '\\'),
				443	'\\\\'),
				444	'"'),
				445	'\\"')
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	446
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	447
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	448	def parseaddr(address):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	449	"""Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	450	a = AddrlistClass(address)
				451	list = a.getaddrlist()
				452	if not list:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	453	return (None, None)
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	454	else:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	455	return list[0]
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	456
				457
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	458	class AddrlistClass:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	459	"""Address parser class by Ben Escoto.
				460
				461	To understand what this class does, it helps to have a copy of
				462	RFC-822 in front of you.
Guido van Rossum	4d4ab92	1998-06-16 22:27:09 +0000	[diff] [blame]	463
				464	Note: this class interface is deprecated and may be removed in the future.
				465	Use rfc822.AddressList instead.
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	466	"""
				467
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	468	def __init__(self, field):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	469	"""Initialize a new instance.
				470
				471	`field' is an unparsed address header field, containing
				472	one or more addresses.
				473	"""
				474	self.specials = '()<>@,:;.\"[]'
				475	self.pos = 0
				476	self.LWS = ' \t'
Barry Warsaw	8a57843	1999-01-14 19:59:58 +0000	[diff] [blame]	477	self.CR = '\r\n'
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	478	self.atomends = self.specials + self.LWS + self.CR
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	479	self.field = field
				480	self.commentlist = []
				481
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	482	def gotonext(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	483	"""Parse up to the start of the next address."""
				484	while self.pos < len(self.field):
				485	if self.field[self.pos] in self.LWS + '\n\r':
				486	self.pos = self.pos + 1
				487	elif self.field[self.pos] == '(':
				488	self.commentlist.append(self.getcomment())
				489	else: break
				490
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	491	def getaddrlist(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	492	"""Parse all addresses.
				493
				494	Returns a list containing all of the addresses.
				495	"""
				496	ad = self.getaddress()
				497	if ad:
				498	return ad + self.getaddrlist()
				499	else: return []
				500
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	501	def getaddress(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	502	"""Parse the next address."""
				503	self.commentlist = []
				504	self.gotonext()
				505
				506	oldpos = self.pos
				507	oldcl = self.commentlist
				508	plist = self.getphraselist()
				509
				510	self.gotonext()
				511	returnlist = []
				512
				513	if self.pos >= len(self.field):
				514	# Bad email address technically, no domain.
				515	if plist:
				516	returnlist = [(string.join(self.commentlist), plist[0])]
				517
				518	elif self.field[self.pos] in '.@':
				519	# email address is just an addrspec
				520	# this isn't very efficient since we start over
				521	self.pos = oldpos
				522	self.commentlist = oldcl
				523	addrspec = self.getaddrspec()
				524	returnlist = [(string.join(self.commentlist), addrspec)]
				525
				526	elif self.field[self.pos] == ':':
				527	# address is a group
				528	returnlist = []
				529
				530	self.pos = self.pos + 1
				531	while self.pos < len(self.field):
				532	self.gotonext()
				533	if self.field[self.pos] == ';':
				534	self.pos = self.pos + 1
				535	break
				536	returnlist = returnlist + self.getaddress()
				537
				538	elif self.field[self.pos] == '<':
				539	# Address is a phrase then a route addr
				540	routeaddr = self.getrouteaddr()
				541
				542	if self.commentlist:
				543	returnlist = [(string.join(plist) + ' (' + \
				544	string.join(self.commentlist) + ')', routeaddr)]
				545	else: returnlist = [(string.join(plist), routeaddr)]
				546
				547	else:
				548	if plist:
				549	returnlist = [(string.join(self.commentlist), plist[0])]
Barry Warsaw	8a57843	1999-01-14 19:59:58 +0000	[diff] [blame]	550	elif self.field[self.pos] in self.specials:
				551	self.pos = self.pos + 1
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	552
				553	self.gotonext()
				554	if self.pos < len(self.field) and self.field[self.pos] == ',':
				555	self.pos = self.pos + 1
				556	return returnlist
				557
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	558	def getrouteaddr(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	559	"""Parse a route address (Return-path value).
				560
				561	This method just skips all the route stuff and returns the addrspec.
				562	"""
				563	if self.field[self.pos] != '<':
				564	return
				565
				566	expectroute = 0
				567	self.pos = self.pos + 1
				568	self.gotonext()
Guido van Rossum	9e43adb	1998-03-03 16:17:52 +0000	[diff] [blame]	569	adlist = None
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	570	while self.pos < len(self.field):
				571	if expectroute:
				572	self.getdomain()
				573	expectroute = 0
				574	elif self.field[self.pos] == '>':
				575	self.pos = self.pos + 1
				576	break
				577	elif self.field[self.pos] == '@':
				578	self.pos = self.pos + 1
				579	expectroute = 1
				580	elif self.field[self.pos] == ':':
				581	self.pos = self.pos + 1
				582	expectaddrspec = 1
				583	else:
				584	adlist = self.getaddrspec()
				585	self.pos = self.pos + 1
				586	break
				587	self.gotonext()
				588
				589	return adlist
				590
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	591	def getaddrspec(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	592	"""Parse an RFC-822 addr-spec."""
				593	aslist = []
				594
				595	self.gotonext()
				596	while self.pos < len(self.field):
				597	if self.field[self.pos] == '.':
				598	aslist.append('.')
				599	self.pos = self.pos + 1
				600	elif self.field[self.pos] == '"':
				601	aslist.append(self.getquote())
				602	elif self.field[self.pos] in self.atomends:
				603	break
				604	else: aslist.append(self.getatom())
				605	self.gotonext()
				606
				607	if self.pos >= len(self.field) or self.field[self.pos] != '@':
				608	return string.join(aslist, '')
				609
				610	aslist.append('@')
				611	self.pos = self.pos + 1
				612	self.gotonext()
				613	return string.join(aslist, '') + self.getdomain()
				614
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	615	def getdomain(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	616	"""Get the complete domain name from an address."""
				617	sdlist = []
				618	while self.pos < len(self.field):
				619	if self.field[self.pos] in self.LWS:
				620	self.pos = self.pos + 1
				621	elif self.field[self.pos] == '(':
				622	self.commentlist.append(self.getcomment())
				623	elif self.field[self.pos] == '[':
				624	sdlist.append(self.getdomainliteral())
				625	elif self.field[self.pos] == '.':
				626	self.pos = self.pos + 1
				627	sdlist.append('.')
				628	elif self.field[self.pos] in self.atomends:
				629	break
				630	else: sdlist.append(self.getatom())
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	631	return string.join(sdlist, '')
				632
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	633	def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	634	"""Parse a header fragment delimited by special characters.
				635
				636	`beginchar' is the start character for the fragment.
				637	If self is not looking at an instance of `beginchar' then
				638	getdelimited returns the empty string.
				639
				640	`endchars' is a sequence of allowable end-delimiting characters.
				641	Parsing stops when one of these is encountered.
				642
				643	If `allowcomments' is non-zero, embedded RFC-822 comments
				644	are allowed within the parsed fragment.
				645	"""
				646	if self.field[self.pos] != beginchar:
				647	return ''
				648
				649	slist = ['']
				650	quote = 0
				651	self.pos = self.pos + 1
				652	while self.pos < len(self.field):
				653	if quote == 1:
				654	slist.append(self.field[self.pos])
				655	quote = 0
				656	elif self.field[self.pos] in endchars:
				657	self.pos = self.pos + 1
				658	break
				659	elif allowcomments and self.field[self.pos] == '(':
				660	slist.append(self.getcomment())
				661	elif self.field[self.pos] == '\\':
				662	quote = 1
				663	else:
				664	slist.append(self.field[self.pos])
				665	self.pos = self.pos + 1
				666
				667	return string.join(slist, '')
				668
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	669	def getquote(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	670	"""Get a quote-delimited fragment from self's field."""
				671	return self.getdelimited('"', '"\r', 0)
				672
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	673	def getcomment(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	674	"""Get a parenthesis-delimited fragment from self's field."""
				675	return self.getdelimited('(', ')\r', 1)
				676
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	677	def getdomainliteral(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	678	"""Parse an RFC-822 domain-literal."""
				679	return self.getdelimited('[', ']\r', 0)
				680
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	681	def getatom(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	682	"""Parse an RFC-822 atom."""
				683	atomlist = ['']
				684
				685	while self.pos < len(self.field):
				686	if self.field[self.pos] in self.atomends:
				687	break
				688	else: atomlist.append(self.field[self.pos])
				689	self.pos = self.pos + 1
				690
				691	return string.join(atomlist, '')
				692
Guido van Rossum	be7c45e	1997-11-22 21:49:19 +0000	[diff] [blame]	693	def getphraselist(self):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	694	"""Parse a sequence of RFC-822 phrases.
				695
				696	A phrase is a sequence of words, which are in turn either
Guido van Rossum	e894fc0	1998-06-11 13:58:40 +0000	[diff] [blame]	697	RFC-822 atoms or quoted-strings. Phrases are canonicalized
				698	by squeezing all runs of continuous whitespace into one space.
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	699	"""
				700	plist = []
				701
				702	while self.pos < len(self.field):
				703	if self.field[self.pos] in self.LWS:
				704	self.pos = self.pos + 1
				705	elif self.field[self.pos] == '"':
				706	plist.append(self.getquote())
				707	elif self.field[self.pos] == '(':
				708	self.commentlist.append(self.getcomment())
				709	elif self.field[self.pos] in self.atomends:
				710	break
				711	else: plist.append(self.getatom())
				712
				713	return plist
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	714
Guido van Rossum	4d4ab92	1998-06-16 22:27:09 +0000	[diff] [blame]	715	class AddressList(AddrlistClass):
				716	"""An AddressList encapsulates a list of parsed RFC822 addresses."""
				717	def __init__(self, field):
				718	AddrlistClass.__init__(self, field)
				719	if field:
				720	self.addresslist = self.getaddrlist()
				721	else:
				722	self.addresslist = []
				723
				724	def __len__(self):
				725	return len(self.addresslist)
				726
				727	def __str__(self):
				728	return string.joinfields(map(dump_address_pair, self.addresslist),", ")
				729
				730	def __add__(self, other):
				731	# Set union
				732	newaddr = AddressList(None)
				733	newaddr.addresslist = self.addresslist[:]
				734	for x in other.addresslist:
				735	if not x in self.addresslist:
				736	newaddr.addresslist.append(x)
				737	return newaddr
				738
				739	def __sub__(self, other):
				740	# Set difference
				741	newaddr = AddressList(None)
				742	for x in self.addresslist:
				743	if not x in other.addresslist:
				744	newaddr.addresslist.append(x)
				745	return newaddr
				746
Guido van Rossum	81d10b4	1998-06-16 22:29:03 +0000	[diff] [blame]	747	def __getitem__(self, index):
				748	# Make indexing, slices, and 'in' work
				749	return self.addrlist[index]
				750
Guido van Rossum	4d4ab92	1998-06-16 22:27:09 +0000	[diff] [blame]	751	def dump_address_pair(pair):
				752	"""Dump a (name, address) pair in a canonicalized form."""
				753	if pair[0]:
				754	return '"' + pair[0] + '" <' + pair[1] + '>'
				755	else:
				756	return pair[1]
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	757
				758	# Parse a date field
				759
Guido van Rossum	db01ee0	1998-12-23 22:22:10 +0000	[diff] [blame]	760	_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
				761	'aug', 'sep', 'oct', 'nov', 'dec',
				762	'january', 'february', 'march', 'april', 'may', 'june', 'july',
				763	'august', 'september', 'october', 'november', 'december']
				764	_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	765
Guido van Rossum	27cb8a4	1996-11-20 22:12:26 +0000	[diff] [blame]	766	# The timezone table does not include the military time zones defined
				767	# in RFC822, other than Z. According to RFC1123, the description in
				768	# RFC822 gets the signs wrong, so we can't rely on any such time
				769	# zones. RFC1123 recommends that numeric timezone indicators be used
				770	# instead of timezone names.
				771
				772	_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum	67133e2	1998-05-18 16:09:10 +0000	[diff] [blame]	773	'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum	45e2fbc	1998-03-26 21:13:24 +0000	[diff] [blame]	774	'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum	67133e2	1998-05-18 16:09:10 +0000	[diff] [blame]	775	'CST': -600, 'CDT': -500, # Central
				776	'MST': -700, 'MDT': -600, # Mountain
				777	'PST': -800, 'PDT': -700 # Pacific
Guido van Rossum	45e2fbc	1998-03-26 21:13:24 +0000	[diff] [blame]	778	}
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	779
Guido van Rossum	27cb8a4	1996-11-20 22:12:26 +0000	[diff] [blame]	780
				781	def parsedate_tz(data):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	782	"""Convert a date string to a time tuple.
				783
				784	Accounts for military timezones.
				785	"""
				786	data = string.split(data)
Guido van Rossum	db01ee0	1998-12-23 22:22:10 +0000	[diff] [blame]	787	if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	788	# There's a dayname here. Skip it
				789	del data[0]
				790	if len(data) == 3: # RFC 850 date, deprecated
				791	stuff = string.split(data[0], '-')
				792	if len(stuff) == 3:
				793	data = stuff + data[1:]
				794	if len(data) == 4:
				795	s = data[3]
				796	i = string.find(s, '+')
				797	if i > 0:
				798	data[3:] = [s[:i], s[i+1:]]
				799	else:
				800	data.append('') # Dummy tz
				801	if len(data) < 5:
				802	return None
				803	data = data[:5]
				804	[dd, mm, yy, tm, tz] = data
Guido van Rossum	db01ee0	1998-12-23 22:22:10 +0000	[diff] [blame]	805	mm = string.lower(mm)
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	806	if not mm in _monthnames:
Guido van Rossum	db01ee0	1998-12-23 22:22:10 +0000	[diff] [blame]	807	dd, mm = mm, string.lower(dd)
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	808	if not mm in _monthnames:
				809	return None
				810	mm = _monthnames.index(mm)+1
Guido van Rossum	db01ee0	1998-12-23 22:22:10 +0000	[diff] [blame]	811	if dd[-1] == ',':
				812	dd = dd[:-1]
				813	i = string.find(yy, ':')
				814	if i > 0:
				815	yy, tm = tm, yy
				816	if yy[-1] == ',':
				817	yy = yy[:-1]
				818	if yy[0] not in string.digits:
				819	yy, tz = tz, yy
				820	if tm[-1] == ',':
				821	tm = tm[:-1]
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	822	tm = string.splitfields(tm, ':')
				823	if len(tm) == 2:
				824	[thh, tmm] = tm
				825	tss = '0'
Guido van Rossum	99e1131	1998-12-23 21:58:38 +0000	[diff] [blame]	826	elif len(tm) == 3:
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	827	[thh, tmm, tss] = tm
Guido van Rossum	99e1131	1998-12-23 21:58:38 +0000	[diff] [blame]	828	else:
				829	return None
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	830	try:
				831	yy = string.atoi(yy)
				832	dd = string.atoi(dd)
				833	thh = string.atoi(thh)
				834	tmm = string.atoi(tmm)
				835	tss = string.atoi(tss)
				836	except string.atoi_error:
				837	return None
Guido van Rossum	a73033f	1998-02-19 00:28:58 +0000	[diff] [blame]	838	tzoffset=None
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	839	tz=string.upper(tz)
				840	if _timezones.has_key(tz):
				841	tzoffset=_timezones[tz]
				842	else:
				843	try:
				844	tzoffset=string.atoi(tz)
				845	except string.atoi_error:
				846	pass
				847	# Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossum	a73033f	1998-02-19 00:28:58 +0000	[diff] [blame]	848	if tzoffset:
Guido van Rossum	45e2fbc	1998-03-26 21:13:24 +0000	[diff] [blame]	849	if tzoffset < 0:
				850	tzsign = -1
				851	tzoffset = -tzoffset
				852	else:
				853	tzsign = 1
				854	tzoffset = tzsign * ( (tzoffset/100)3600 + (tzoffset % 100)60)
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	855	tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
				856	return tuple
				857
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	858
Guido van Rossum	27cb8a4	1996-11-20 22:12:26 +0000	[diff] [blame]	859	def parsedate(data):
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	860	"""Convert a time string to a time tuple."""
				861	t=parsedate_tz(data)
				862	if type(t)==type( () ):
				863	return t[:9]
				864	else: return t
				865
Guido van Rossum	27cb8a4	1996-11-20 22:12:26 +0000	[diff] [blame]	866
Guido van Rossum	6cdd7a0	1996-12-12 18:39:54 +0000	[diff] [blame]	867	def mktime_tz(data):
Guido van Rossum	67133e2	1998-05-18 16:09:10 +0000	[diff] [blame]	868	"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossum	a73033f	1998-02-19 00:28:58 +0000	[diff] [blame]	869	if data[9] is None:
Guido van Rossum	45e2fbc	1998-03-26 21:13:24 +0000	[diff] [blame]	870	# No zone info, so localtime is better assumption than GMT
				871	return time.mktime(data[:8] + (-1,))
Guido van Rossum	a73033f	1998-02-19 00:28:58 +0000	[diff] [blame]	872	else:
Guido van Rossum	45e2fbc	1998-03-26 21:13:24 +0000	[diff] [blame]	873	t = time.mktime(data[:8] + (0,))
				874	return t - data[9] - time.timezone
Guido van Rossum	6cdd7a0	1996-12-12 18:39:54 +0000	[diff] [blame]	875
Guido van Rossum	247a78a	1999-04-19 18:04:38 +0000	[diff] [blame]	876	def formatdate(timeval=None):
				877	"""Returns time format preferred for Internet standards.
				878
				879	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
				880	"""
				881	if timeval is None:
				882	timeval = time.time()
				883	return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
				884	time.gmtime(timeval))
				885
Guido van Rossum	b6775db	1994-08-01 11:34:53 +0000	[diff] [blame]	886
				887	# When used as script, run a small test program.
				888	# The first command line argument must be a filename containing one
				889	# message in RFC-822 format.
				890
				891	if __name__ == '__main__':
Guido van Rossum	9ab94c1	1997-12-10 16:17:39 +0000	[diff] [blame]	892	import sys, os
				893	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
				894	if sys.argv[1:]: file = sys.argv[1]
				895	f = open(file, 'r')
				896	m = Message(f)
				897	print 'From:', m.getaddr('from')
				898	print 'To:', m.getaddrlist('to')
				899	print 'Subject:', m.getheader('subject')
				900	print 'Date:', m.getheader('date')
				901	date = m.getdate_tz('date')
				902	if date:
				903	print 'ParsedDate:', time.asctime(date[:-1]),
				904	hhmmss = date[-1]
				905	hhmm, ss = divmod(hhmmss, 60)
				906	hh, mm = divmod(hhmm, 60)
				907	print "%+03d%02d" % (hh, mm),
				908	if ss: print ".%02d" % ss,
				909	print
				910	else:
				911	print 'ParsedDate:', None
				912	m.rewindbody()
				913	n = 0
				914	while f.readline():
				915	n = n + 1
				916	print 'Lines:', n
				917	print '-'*70
				918	print 'len =', len(m)
				919	if m.has_key('Date'): print 'Date =', m['Date']
				920	if m.has_key('X-Nonsense'): pass
				921	print 'keys =', m.keys()
				922	print 'values =', m.values()
				923	print 'items =', m.items()