Blame - Lib/dos-8x3/rfc822-n.py - platform/external/python/cpython2

blob: ef7ab433ffb39733a21bf2479c3048d4cdcd224b [file] [log] [blame]

Guido van Rossum	aad6761	2000-05-08 17:31:04 +0000	[diff] [blame]	1	"""RFC-822 message manipulation class.
				2
				3	XXX This is only a very rough sketch of a full RFC-822 parser;
				4	in particular the tokenizing of addresses does not adhere to all the
				5	quoting rules.
				6
				7	Directions for use:
				8
				9	To create a Message object: first open a file, e.g.:
				10	fp = open(file, 'r')
				11	You can use any other legal way of getting an open file object, e.g. use
				12	sys.stdin or call os.popen().
				13	Then pass the open file object to the Message() constructor:
				14	m = Message(fp)
				15
				16	This class can work with any input object that supports a readline
				17	method. If the input object has seek and tell capability, the
				18	rewindbody method will work; also illegal lines will be pushed back
				19	onto the input stream. If the input object lacks seek but has an
				20	`unread' method that can push back a line of input, Message will use
				21	that to push back illegal lines. Thus this class can be used to parse
				22	messages coming from a buffered stream.
				23
				24	The optional `seekable' argument is provided as a workaround for
				25	certain stdio libraries in which tell() discards buffered data before
				26	discovering that the lseek() system call doesn't work. For maximum
				27	portability, you should set the seekable argument to zero to prevent
				28	that initial \code{tell} when passing in an unseekable object such as
				29	a a file object created from a socket object. If it is 1 on entry --
				30	which it is by default -- the tell() method of the open file object is
				31	called once; if this raises an exception, seekable is reset to 0. For
				32	other nonzero values of seekable, this test is not made.
				33
				34	To get the text of a particular header there are several methods:
				35	str = m.getheader(name)
				36	str = m.getrawheader(name)
				37	where name is the name of the header, e.g. 'Subject'.
				38	The difference is that getheader() strips the leading and trailing
				39	whitespace, while getrawheader() doesn't. Both functions retain
				40	embedded whitespace (including newlines) exactly as they are
				41	specified in the header, and leave the case of the text unchanged.
				42
				43	For addresses and address lists there are functions
				44	realname, mailaddress = m.getaddr(name) and
				45	list = m.getaddrlist(name)
				46	where the latter returns a list of (realname, mailaddr) tuples.
				47
				48	There is also a method
				49	time = m.getdate(name)
				50	which parses a Date-like field and returns a time-compatible tuple,
				51	i.e. a tuple such as returned by time.localtime() or accepted by
				52	time.mktime().
				53
				54	See the class definition for lower level access methods.
				55
				56	There are also some utility functions here.
				57	"""
				58	# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
				59
				60	import string
				61	import time
				62
				63
				64	_blanklines = ('\r\n', '\n') # Optimization for islast()
				65
				66
				67	class Message:
				68	"""Represents a single RFC-822-compliant message."""
				69
				70	def __init__(self, fp, seekable = 1):
				71	"""Initialize the class instance and read the headers."""
				72	if seekable == 1:
				73	# Exercise tell() to make sure it works
				74	# (and then assume seek() works, too)
				75	try:
				76	fp.tell()
				77	except:
				78	seekable = 0
				79	else:
				80	seekable = 1
				81	self.fp = fp
				82	self.seekable = seekable
				83	self.startofheaders = None
				84	self.startofbody = None
				85	#
				86	if self.seekable:
				87	try:
				88	self.startofheaders = self.fp.tell()
				89	except IOError:
				90	self.seekable = 0
				91	#
				92	self.readheaders()
				93	#
				94	if self.seekable:
				95	try:
				96	self.startofbody = self.fp.tell()
				97	except IOError:
				98	self.seekable = 0
				99
				100	def rewindbody(self):
				101	"""Rewind the file to the start of the body (if seekable)."""
				102	if not self.seekable:
				103	raise IOError, "unseekable file"
				104	self.fp.seek(self.startofbody)
				105
				106	def readheaders(self):
				107	"""Read header lines.
				108
				109	Read header lines up to the entirely blank line that
				110	terminates them. The (normally blank) line that ends the
				111	headers is skipped, but not included in the returned list.
				112	If a non-header line ends the headers, (which is an error),
				113	an attempt is made to backspace over it; it is never
				114	included in the returned list.
				115
				116	The variable self.status is set to the empty string if all
				117	went well, otherwise it is an error message.
				118	The variable self.headers is a completely uninterpreted list
				119	of lines contained in the header (so printing them will
				120	reproduce the header exactly as it appears in the file).
				121	"""
				122	self.dict = {}
				123	self.__gamh_cache = {}
				124	self.__gh_cache = {}
				125	self.unixfrom = ''
				126	self.headers = list = []
				127	self.status = ''
				128	headerseen = ""
				129	firstline = 1
				130	startofline = unread = tell = None
				131	if hasattr(self.fp, 'unread'):
				132	unread = self.fp.unread
				133	elif self.seekable:
				134	tell = self.fp.tell
				135	while 1:
				136	if tell:
				137	startofline = tell()
				138	line = self.fp.readline()
				139	if not line:
				140	self.status = 'EOF in headers'
				141	break
				142	# Skip unix From name time lines
				143	if firstline and line[:5] == 'From ':
				144	self.unixfrom = self.unixfrom + line
				145	continue
				146	firstline = 0
				147	if headerseen and line[0] in ' \t':
				148	# It's a continuation line.
				149	list.append(line)
				150	self.__gamh_cache[headerseen].append(line)
				151	x = string.lstrip(
				152	"%s\n %s" % (self.dict[headerseen], string.strip(line)))
				153	self.dict[headerseen] = x
				154	self.__gh_cache[headerseen][-1] = x
				155	continue
				156	elif self.iscomment(line):
				157	# It's a comment. Ignore it.
				158	continue
				159	elif self.islast(line):
				160	# Note! No pushback here! The delimiter line gets eaten.
				161	break
				162	headerseen = self.isheader(line)
				163	if headerseen:
				164	# It's a legal header line, save it.
				165	list.append(line)
				166	l = self.__gamh_cache.get(headerseen)
				167	if not l:
				168	self.__gamh_cache[headerseen] = l = []
				169	l.append(line)
				170	x = string.strip(line[len(headerseen)+1:])
				171	self.dict[headerseen] = x
				172	l = self.__gh_cache.get(headerseen)
				173	if not l:
				174	self.__gh_cache[headerseen] = l = []
				175	l.append(x)
				176	continue
				177	else:
				178	# It's not a header line; throw it back and stop here.
				179	if not self.dict:
				180	self.status = 'No headers'
				181	else:
				182	self.status = 'Non-header line where header expected'
				183	# Try to undo the read.
				184	if unread:
				185	unread(line)
				186	elif tell:
				187	self.fp.seek(startofline)
				188	else:
				189	self.status = self.status + '; bad seek'
				190	break
				191
				192	def isheader(self, line):
				193	"""Determine whether a given line is a legal header.
				194
				195	This method should return the header name, suitably canonicalized.
				196	You may override this method in order to use Message parsing
				197	on tagged data in RFC822-like formats with special header formats.
				198	"""
				199	i = string.find(line, ':')
				200	if i > 0:
				201	return string.lower(line[:i])
				202	else:
				203	return None
				204
				205	def islast(self, line):
				206	"""Determine whether a line is a legal end of RFC-822 headers.
				207
				208	You may override this method if your application wants
				209	to bend the rules, e.g. to strip trailing whitespace,
				210	or to recognise MH template separators ('--------').
				211	For convenience (e.g. for code reading from sockets) a
				212	line consisting of \r\n also matches.
				213	"""
				214	return line in _blanklines
				215
				216	def iscomment(self, line):
				217	"""Determine whether a line should be skipped entirely.
				218
				219	You may override this method in order to use Message parsing
				220	on tagged data in RFC822-like formats that support embedded
				221	comments or free-text data.
				222	"""
				223	return None
				224
				225	def getallmatchingheaders(self, name,
				226	# speed hack:
				227	lower = string.lower):
				228	"""Find all header lines matching a given header name.
				229
				230	Look through the list of headers and find all lines
				231	matching a given header name (and their continuation
				232	lines). A list of the lines is returned, without
				233	interpretation. If the header does not occur, an
				234	empty list is returned. If the header occurs multiple
				235	times, all occurrences are returned. Case is not
				236	important in the header name.
				237	"""
				238	r = self.__gamh_cache.get(lower(name))
				239	if r:
				240	return r[:]
				241	return []
				242
				243	def getfirstmatchingheader(self, name,
				244	# speed hack:
				245	lower = string.lower):
				246	"""Get the first header line matching name.
				247
				248	This is similar to getallmatchingheaders, but it returns
				249	only the first matching header (and its continuation
				250	lines).
				251	"""
				252	l = self.__gamh_cache.get(lower(name))
				253	if not l:
				254	return []
				255	r = []
				256	for item in l:
				257	if r and item[0] not in " \t":
				258	break
				259	r.append(item)
				260	return r
				261
				262	def getrawheader(self, name):
				263	"""A higher-level interface to getfirstmatchingheader().
				264
				265	Return a string containing the literal text of the
				266	header but with the keyword stripped. All leading,
				267	trailing and embedded whitespace is kept in the
				268	string, however.
				269	Return None if the header does not occur.
				270	"""
				271
				272	list = self.getfirstmatchingheader(name)
				273	if not list:
				274	return None
				275	list[0] = list[0][len(name) + 1:]
				276	return string.joinfields(list, '')
				277
				278	def getheader(self, name, default=None):
				279	"""Get the header value for a name.
				280
				281	This is the normal interface: it returns a stripped
				282	version of the header value for a given header name,
				283	or None if it doesn't exist. This uses the dictionary
				284	version which finds the last such header.
				285	"""
				286	try:
				287	return self.dict[string.lower(name)]
				288	except KeyError:
				289	return default
				290	get = getheader
				291
				292	def getheaders(self, name,
				293	# speed hack:
				294	lower = string.lower):
				295	"""Get all values for a header.
				296
				297	This returns a list of values for headers given more than once;
				298	each value in the result list is stripped in the same way as the
				299	result of getheader(). If the header is not given, return an
				300	empty list.
				301	"""
				302	r = self.__gh_cache.get(lower(name))
				303	if r:
				304	return r[:]
				305	return []
				306
				307	def getaddr(self, name):
				308	"""Get a single address from a header, as a tuple.
				309
				310	An example return value:
				311	('Guido van Rossum', 'guido@cwi.nl')
				312	"""
				313	# New, by Ben Escoto
				314	alist = self.getaddrlist(name)
				315	if alist:
				316	return alist[0]
				317	else:
				318	return (None, None)
				319
				320	def getaddrlist(self, name):
				321	"""Get a list of addresses from a header.
				322
				323	Retrieves a list of addresses from a header, where each address is a
				324	tuple as returned by getaddr(). Scans all named headers, so it works
				325	properly with multiple To: or Cc: headers for example.
				326
				327	"""
				328	raw = []
				329	for h in self.getallmatchingheaders(name):
				330	if h[0] in ' \t':
				331	raw.append(h)
				332	else:
				333	if raw:
				334	raw.append(', ')
				335	i = string.find(h, ':')
				336	if i > 0:
				337	addr = h[i+1:]
				338	raw.append(addr)
				339	alladdrs = string.join(raw, '')
				340	a = AddrlistClass(alladdrs)
				341	return a.getaddrlist()
				342
				343	def getdate(self, name):
				344	"""Retrieve a date field from a header.
				345
				346	Retrieves a date field from the named header, returning
				347	a tuple compatible with time.mktime().
				348	"""
				349	try:
				350	data = self[name]
				351	except KeyError:
				352	return None
				353	return parsedate(data)
				354
				355	def getdate_tz(self, name):
				356	"""Retrieve a date field from a header as a 10-tuple.
				357
				358	The first 9 elements make up a tuple compatible with
				359	time.mktime(), and the 10th is the offset of the poster's
				360	time zone from GMT/UTC.
				361	"""
				362	try:
				363	data = self[name]
				364	except KeyError:
				365	return None
				366	return parsedate_tz(data)
				367
				368
				369	# Access as a dictionary (only finds last header of each type):
				370
				371	def __len__(self):
				372	"""Get the number of headers in a message."""
				373	return len(self.dict)
				374
				375	def __getitem__(self, name):
				376	"""Get a specific header, as from a dictionary."""
				377	return self.dict[string.lower(name)]
				378
				379	def __setitem__(self, name, value):
				380	"""Set the value of a header.
				381
				382	Note: This is not a perfect inversion of __getitem__, because
				383	any changed headers get stuck at the end of the raw-headers list
				384	rather than where the altered header was.
				385	"""
				386	del self[name] # Won't fail if it doesn't exist
				387	self.dict[string.lower(name)] = value
				388	text = name + ": " + value
				389	lines = string.split(text, "\n")
				390	for line in lines:
				391	self.headers.append(line + "\n")
				392
				393	def __delitem__(self, name):
				394	"""Delete all occurrences of a specific header, if it is present."""
				395	name = string.lower(name)
				396	if not self.dict.has_key(name):
				397	return
				398	del self.dict[name]
				399	name = name + ':'
				400	n = len(name)
				401	list = []
				402	hit = 0
				403	for i in range(len(self.headers)):
				404	line = self.headers[i]
				405	if string.lower(line[:n]) == name:
				406	hit = 1
				407	elif line[:1] not in string.whitespace:
				408	hit = 0
				409	if hit:
				410	list.append(i)
				411	list.reverse()
				412	for i in list:
				413	del self.headers[i]
				414
				415	def has_key(self, name):
				416	"""Determine whether a message contains the named header."""
				417	return self.dict.has_key(string.lower(name))
				418
				419	def keys(self):
				420	"""Get all of a message's header field names."""
				421	return self.dict.keys()
				422
				423	def values(self):
				424	"""Get all of a message's header field values."""
				425	return self.dict.values()
				426
				427	def items(self):
				428	"""Get all of a message's headers.
				429
				430	Returns a list of name, value tuples.
				431	"""
				432	return self.dict.items()
				433
				434	def __str__(self):
				435	str = ''
				436	for hdr in self.headers:
				437	str = str + hdr
				438	return str
				439
				440
				441	# Utility functions
				442	# -----------------
				443
				444	# XXX Should fix unquote() and quote() to be really conformant.
				445	# XXX The inverses of the parse functions may also be useful.
				446
				447
				448	def unquote(str):
				449	"""Remove quotes from a string."""
				450	if len(str) > 1:
				451	if str[0] == '"' and str[-1:] == '"':
				452	return str[1:-1]
				453	if str[0] == '<' and str[-1:] == '>':
				454	return str[1:-1]
				455	return str
				456
				457
				458	def quote(str):
				459	"""Add quotes around a string."""
				460	return '"%s"' % string.join(
				461	string.split(
				462	string.join(
				463	string.split(str, '\\'),
				464	'\\\\'),
				465	'"'),
				466	'\\"')
				467
				468
				469	def parseaddr(address):
				470	"""Parse an address into a (realname, mailaddr) tuple."""
				471	a = AddrlistClass(address)
				472	list = a.getaddrlist()
				473	if not list:
				474	return (None, None)
				475	else:
				476	return list[0]
				477
				478
				479	class AddrlistClass:
				480	"""Address parser class by Ben Escoto.
				481
				482	To understand what this class does, it helps to have a copy of
				483	RFC-822 in front of you.
				484
				485	Note: this class interface is deprecated and may be removed in the future.
				486	Use rfc822.AddressList instead.
				487	"""
				488
				489	def __init__(self, field):
				490	"""Initialize a new instance.
				491
				492	`field' is an unparsed address header field, containing
				493	one or more addresses.
				494	"""
				495	self.specials = '()<>@,:;.\"[]'
				496	self.pos = 0
				497	self.LWS = ' \t'
				498	self.CR = '\r\n'
				499	self.atomends = self.specials + self.LWS + self.CR
				500	self.field = field
				501	self.commentlist = []
				502
				503	def gotonext(self):
				504	"""Parse up to the start of the next address."""
				505	while self.pos < len(self.field):
				506	if self.field[self.pos] in self.LWS + '\n\r':
				507	self.pos = self.pos + 1
				508	elif self.field[self.pos] == '(':
				509	self.commentlist.append(self.getcomment())
				510	else: break
				511
				512	def getaddrlist(self):
				513	"""Parse all addresses.
				514
				515	Returns a list containing all of the addresses.
				516	"""
				517	ad = self.getaddress()
				518	if ad:
				519	return ad + self.getaddrlist()
				520	else: return []
				521
				522	def getaddress(self):
				523	"""Parse the next address."""
				524	self.commentlist = []
				525	self.gotonext()
				526
				527	oldpos = self.pos
				528	oldcl = self.commentlist
				529	plist = self.getphraselist()
				530
				531	self.gotonext()
				532	returnlist = []
				533
				534	if self.pos >= len(self.field):
				535	# Bad email address technically, no domain.
				536	if plist:
				537	returnlist = [(string.join(self.commentlist), plist[0])]
				538
				539	elif self.field[self.pos] in '.@':
				540	# email address is just an addrspec
				541	# this isn't very efficient since we start over
				542	self.pos = oldpos
				543	self.commentlist = oldcl
				544	addrspec = self.getaddrspec()
				545	returnlist = [(string.join(self.commentlist), addrspec)]
				546
				547	elif self.field[self.pos] == ':':
				548	# address is a group
				549	returnlist = []
				550
				551	fieldlen = len(self.field)
				552	self.pos = self.pos + 1
				553	while self.pos < len(self.field):
				554	self.gotonext()
				555	if self.pos < fieldlen and self.field[self.pos] == ';':
				556	self.pos = self.pos + 1
				557	break
				558	returnlist = returnlist + self.getaddress()
				559
				560	elif self.field[self.pos] == '<':
				561	# Address is a phrase then a route addr
				562	routeaddr = self.getrouteaddr()
				563
				564	if self.commentlist:
				565	returnlist = [(string.join(plist) + ' (' + \
				566	string.join(self.commentlist) + ')', routeaddr)]
				567	else: returnlist = [(string.join(plist), routeaddr)]
				568
				569	else:
				570	if plist:
				571	returnlist = [(string.join(self.commentlist), plist[0])]
				572	elif self.field[self.pos] in self.specials:
				573	self.pos = self.pos + 1
				574
				575	self.gotonext()
				576	if self.pos < len(self.field) and self.field[self.pos] == ',':
				577	self.pos = self.pos + 1
				578	return returnlist
				579
				580	def getrouteaddr(self):
				581	"""Parse a route address (Return-path value).
				582
				583	This method just skips all the route stuff and returns the addrspec.
				584	"""
				585	if self.field[self.pos] != '<':
				586	return
				587
				588	expectroute = 0
				589	self.pos = self.pos + 1
				590	self.gotonext()
				591	adlist = None
				592	while self.pos < len(self.field):
				593	if expectroute:
				594	self.getdomain()
				595	expectroute = 0
				596	elif self.field[self.pos] == '>':
				597	self.pos = self.pos + 1
				598	break
				599	elif self.field[self.pos] == '@':
				600	self.pos = self.pos + 1
				601	expectroute = 1
				602	elif self.field[self.pos] == ':':
				603	self.pos = self.pos + 1
				604	expectaddrspec = 1
				605	else:
				606	adlist = self.getaddrspec()
				607	self.pos = self.pos + 1
				608	break
				609	self.gotonext()
				610
				611	return adlist
				612
				613	def getaddrspec(self):
				614	"""Parse an RFC-822 addr-spec."""
				615	aslist = []
				616
				617	self.gotonext()
				618	while self.pos < len(self.field):
				619	if self.field[self.pos] == '.':
				620	aslist.append('.')
				621	self.pos = self.pos + 1
				622	elif self.field[self.pos] == '"':
				623	aslist.append('"%s"' % self.getquote())
				624	elif self.field[self.pos] in self.atomends:
				625	break
				626	else: aslist.append(self.getatom())
				627	self.gotonext()
				628
				629	if self.pos >= len(self.field) or self.field[self.pos] != '@':
				630	return string.join(aslist, '')
				631
				632	aslist.append('@')
				633	self.pos = self.pos + 1
				634	self.gotonext()
				635	return string.join(aslist, '') + self.getdomain()
				636
				637	def getdomain(self):
				638	"""Get the complete domain name from an address."""
				639	sdlist = []
				640	while self.pos < len(self.field):
				641	if self.field[self.pos] in self.LWS:
				642	self.pos = self.pos + 1
				643	elif self.field[self.pos] == '(':
				644	self.commentlist.append(self.getcomment())
				645	elif self.field[self.pos] == '[':
				646	sdlist.append(self.getdomainliteral())
				647	elif self.field[self.pos] == '.':
				648	self.pos = self.pos + 1
				649	sdlist.append('.')
				650	elif self.field[self.pos] in self.atomends:
				651	break
				652	else: sdlist.append(self.getatom())
				653	return string.join(sdlist, '')
				654
				655	def getdelimited(self, beginchar, endchars, allowcomments = 1):
				656	"""Parse a header fragment delimited by special characters.
				657
				658	`beginchar' is the start character for the fragment.
				659	If self is not looking at an instance of `beginchar' then
				660	getdelimited returns the empty string.
				661
				662	`endchars' is a sequence of allowable end-delimiting characters.
				663	Parsing stops when one of these is encountered.
				664
				665	If `allowcomments' is non-zero, embedded RFC-822 comments
				666	are allowed within the parsed fragment.
				667	"""
				668	if self.field[self.pos] != beginchar:
				669	return ''
				670
				671	slist = ['']
				672	quote = 0
				673	self.pos = self.pos + 1
				674	while self.pos < len(self.field):
				675	if quote == 1:
				676	slist.append(self.field[self.pos])
				677	quote = 0
				678	elif self.field[self.pos] in endchars:
				679	self.pos = self.pos + 1
				680	break
				681	elif allowcomments and self.field[self.pos] == '(':
				682	slist.append(self.getcomment())
				683	elif self.field[self.pos] == '\\':
				684	quote = 1
				685	else:
				686	slist.append(self.field[self.pos])
				687	self.pos = self.pos + 1
				688
				689	return string.join(slist, '')
				690
				691	def getquote(self):
				692	"""Get a quote-delimited fragment from self's field."""
				693	return self.getdelimited('"', '"\r', 0)
				694
				695	def getcomment(self):
				696	"""Get a parenthesis-delimited fragment from self's field."""
				697	return self.getdelimited('(', ')\r', 1)
				698
				699	def getdomainliteral(self):
				700	"""Parse an RFC-822 domain-literal."""
				701	return self.getdelimited('[', ']\r', 0)
				702
				703	def getatom(self):
				704	"""Parse an RFC-822 atom."""
				705	atomlist = ['']
				706
				707	while self.pos < len(self.field):
				708	if self.field[self.pos] in self.atomends:
				709	break
				710	else: atomlist.append(self.field[self.pos])
				711	self.pos = self.pos + 1
				712
				713	return string.join(atomlist, '')
				714
				715	def getphraselist(self):
				716	"""Parse a sequence of RFC-822 phrases.
				717
				718	A phrase is a sequence of words, which are in turn either
				719	RFC-822 atoms or quoted-strings. Phrases are canonicalized
				720	by squeezing all runs of continuous whitespace into one space.
				721	"""
				722	plist = []
				723
				724	while self.pos < len(self.field):
				725	if self.field[self.pos] in self.LWS:
				726	self.pos = self.pos + 1
				727	elif self.field[self.pos] == '"':
				728	plist.append(self.getquote())
				729	elif self.field[self.pos] == '(':
				730	self.commentlist.append(self.getcomment())
				731	elif self.field[self.pos] in self.atomends:
				732	break
				733	else: plist.append(self.getatom())
				734
				735	return plist
				736
				737	class AddressList(AddrlistClass):
				738	"""An AddressList encapsulates a list of parsed RFC822 addresses."""
				739	def __init__(self, field):
				740	AddrlistClass.__init__(self, field)
				741	if field:
				742	self.addresslist = self.getaddrlist()
				743	else:
				744	self.addresslist = []
				745
				746	def __len__(self):
				747	return len(self.addresslist)
				748
				749	def __str__(self):
				750	return string.joinfields(map(dump_address_pair, self.addresslist),", ")
				751
				752	def __add__(self, other):
				753	# Set union
				754	newaddr = AddressList(None)
				755	newaddr.addresslist = self.addresslist[:]
				756	for x in other.addresslist:
				757	if not x in self.addresslist:
				758	newaddr.addresslist.append(x)
				759	return newaddr
				760
				761	def __sub__(self, other):
				762	# Set difference
				763	newaddr = AddressList(None)
				764	for x in self.addresslist:
				765	if not x in other.addresslist:
				766	newaddr.addresslist.append(x)
				767	return newaddr
				768
				769	def __getitem__(self, index):
				770	# Make indexing, slices, and 'in' work
				771	return self.addresslist[index]
				772
				773	def dump_address_pair(pair):
				774	"""Dump a (name, address) pair in a canonicalized form."""
				775	if pair[0]:
				776	return '"' + pair[0] + '" <' + pair[1] + '>'
				777	else:
				778	return pair[1]
				779
				780	# Parse a date field
				781
				782	_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
				783	'aug', 'sep', 'oct', 'nov', 'dec',
				784	'january', 'february', 'march', 'april', 'may', 'june', 'july',
				785	'august', 'september', 'october', 'november', 'december']
				786	_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
				787
				788	# The timezone table does not include the military time zones defined
				789	# in RFC822, other than Z. According to RFC1123, the description in
				790	# RFC822 gets the signs wrong, so we can't rely on any such time
				791	# zones. RFC1123 recommends that numeric timezone indicators be used
				792	# instead of timezone names.
				793
				794	_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
				795	'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
				796	'EST': -500, 'EDT': -400, # Eastern
				797	'CST': -600, 'CDT': -500, # Central
				798	'MST': -700, 'MDT': -600, # Mountain
				799	'PST': -800, 'PDT': -700 # Pacific
				800	}
				801
				802
				803	def parsedate_tz(data):
				804	"""Convert a date string to a time tuple.
				805
				806	Accounts for military timezones.
				807	"""
				808	data = string.split(data)
				809	if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
				810	# There's a dayname here. Skip it
				811	del data[0]
				812	if len(data) == 3: # RFC 850 date, deprecated
				813	stuff = string.split(data[0], '-')
				814	if len(stuff) == 3:
				815	data = stuff + data[1:]
				816	if len(data) == 4:
				817	s = data[3]
				818	i = string.find(s, '+')
				819	if i > 0:
				820	data[3:] = [s[:i], s[i+1:]]
				821	else:
				822	data.append('') # Dummy tz
				823	if len(data) < 5:
				824	return None
				825	data = data[:5]
				826	[dd, mm, yy, tm, tz] = data
				827	mm = string.lower(mm)
				828	if not mm in _monthnames:
				829	dd, mm = mm, string.lower(dd)
				830	if not mm in _monthnames:
				831	return None
				832	mm = _monthnames.index(mm)+1
				833	if mm > 12: mm = mm - 12
				834	if dd[-1] == ',':
				835	dd = dd[:-1]
				836	i = string.find(yy, ':')
				837	if i > 0:
				838	yy, tm = tm, yy
				839	if yy[-1] == ',':
				840	yy = yy[:-1]
				841	if yy[0] not in string.digits:
				842	yy, tz = tz, yy
				843	if tm[-1] == ',':
				844	tm = tm[:-1]
				845	tm = string.splitfields(tm, ':')
				846	if len(tm) == 2:
				847	[thh, tmm] = tm
				848	tss = '0'
				849	elif len(tm) == 3:
				850	[thh, tmm, tss] = tm
				851	else:
				852	return None
				853	try:
				854	yy = string.atoi(yy)
				855	dd = string.atoi(dd)
				856	thh = string.atoi(thh)
				857	tmm = string.atoi(tmm)
				858	tss = string.atoi(tss)
				859	except string.atoi_error:
				860	return None
				861	tzoffset=None
				862	tz=string.upper(tz)
				863	if _timezones.has_key(tz):
				864	tzoffset=_timezones[tz]
				865	else:
				866	try:
				867	tzoffset=string.atoi(tz)
				868	except string.atoi_error:
				869	pass
				870	# Convert a timezone offset into seconds ; -0500 -> -18000
				871	if tzoffset:
				872	if tzoffset < 0:
				873	tzsign = -1
				874	tzoffset = -tzoffset
				875	else:
				876	tzsign = 1
				877	tzoffset = tzsign * ( (tzoffset/100)3600 + (tzoffset % 100)60)
				878	tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
				879	return tuple
				880
				881
				882	def parsedate(data):
				883	"""Convert a time string to a time tuple."""
				884	t=parsedate_tz(data)
				885	if type(t)==type( () ):
				886	return t[:9]
				887	else: return t
				888
				889
				890	def mktime_tz(data):
				891	"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
				892	if data[9] is None:
				893	# No zone info, so localtime is better assumption than GMT
				894	return time.mktime(data[:8] + (-1,))
				895	else:
				896	t = time.mktime(data[:8] + (0,))
				897	return t - data[9] - time.timezone
				898
				899	def formatdate(timeval=None):
				900	"""Returns time format preferred for Internet standards.
				901
				902	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
				903	"""
				904	if timeval is None:
				905	timeval = time.time()
				906	return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
				907	time.gmtime(timeval))
				908
				909
				910	# When used as script, run a small test program.
				911	# The first command line argument must be a filename containing one
				912	# message in RFC-822 format.
				913
				914	if __name__ == '__main__':
				915	import sys, os
				916	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
				917	if sys.argv[1:]: file = sys.argv[1]
				918	f = open(file, 'r')
				919	m = Message(f)
				920	print 'From:', m.getaddr('from')
				921	print 'To:', m.getaddrlist('to')
				922	print 'Subject:', m.getheader('subject')
				923	print 'Date:', m.getheader('date')
				924	date = m.getdate_tz('date')
				925	if date:
				926	print 'ParsedDate:', time.asctime(date[:-1]),
				927	hhmmss = date[-1]
				928	hhmm, ss = divmod(hhmmss, 60)
				929	hh, mm = divmod(hhmm, 60)
				930	print "%+03d%02d" % (hh, mm),
				931	if ss: print ".%02d" % ss,
				932	print
				933	else:
				934	print 'ParsedDate:', None
				935	m.rewindbody()
				936	n = 0
				937	while f.readline():
				938	n = n + 1
				939	print 'Lines:', n
				940	print '-'*70
				941	print 'len =', len(m)
				942	if m.has_key('Date'): print 'Date =', m['Date']
				943	if m.has_key('X-Nonsense'): pass
				944	print 'keys =', m.keys()
				945	print 'values =', m.values()
				946	print 'items =', m.items()