Blame - Lib/rfc822.py - platform/external/python/cpython2

blob: 63f2fb61f0d43664c774650da809ef473505f7cf [file] [log] [blame]

Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame^]	1	# RFC-822 message manipulation class.
				2	#
				3	# XXX This is only a very rough sketch of a full RFC-822 parser;
				4	# additional methods are needed to parse addresses and dates, and to
				5	# tokenize lines according to various other syntax rules.
				6	#
				7	# Directions for use:
				8	#
				9	# To create a Message object: first open a file, e.g.:
				10	# fp = open(file, 'r')
				11	# (or use any other legal way of getting an open file object, e.g. use
				12	# sys.stdin or call os.popen()).
				13	# Then pass the open file object to the init() method of Message:
				14	# m = Message().init(fp)
				15	#
				16	# To get the text of a particular header there are several methods:
				17	# str = m.getheader(name)
				18	# str = m.getrawheader(name)
				19	# where name is the name of the header, e.g. 'Subject'.
				20	# The difference is that getheader() strips the leading and trailing
				21	# whitespace, while getrawheader() doesn't. Both functions retain
				22	# embedded whitespace (including newlines) exactly as they are
				23	# specified in the header, and leave the case of the text unchanged.
				24	#
				25	# See the class definition for lower level access methods.
				26	#
				27	# There are also some utility functions here.
				28
				29
				30	import regex
				31	import string
				32
				33
				34	class Message:
				35
				36	# Initialize the class instance and read the headers.
				37
				38	def init(self, fp):
				39	self.fp = fp
				40	#
				41	try:
				42	self.startofheaders = self.fp.tell()
				43	except IOError:
				44	self.startofheaders = None
				45	#
				46	self.readheaders()
				47	#
				48	try:
				49	self.startofbody = self.fp.tell()
				50	except IOError:
				51	self.startofbody = None
				52	#
				53	return self
				54
				55
				56	# Rewind the file to the start of the body (if seekable).
				57
				58	def rewindbody(self):
				59	self.fp.seek(self.startofbody)
				60
				61
				62	# Read header lines up to the entirely blank line that
				63	# terminates them. The (normally blank) line that ends the
				64	# headers is skipped, but not included in the returned list.
				65	# If a non-header line ends the headers, (which is an error),
				66	# an attempt is made to backspace over it; it is never
				67	# included in the returned list.
				68	#
				69	# The variable self.status is set to the empty string if all
				70	# went well, otherwise it is an error message.
				71	# The variable self.headers is a completely uninterpreted list
				72	# of lines contained in the header (so printing them will
				73	# reproduce the header exactly as it appears in the file).
				74
				75	def readheaders(self):
				76	self.headers = list = []
				77	self.status = ''
				78	headerseen = 0
				79	while 1:
				80	line = self.fp.readline()
				81	if not line:
				82	self.status = 'EOF in headers'
				83	break
				84	if self.islast(line):
				85	break
				86	elif headerseen and line[0] in ' \t':
				87	# It's a continuation line.
				88	list.append(line)
				89	elif regex.match('^[!-9;-~]+:', line):
				90	# It's a header line.
				91	list.append(line)
				92	headerseen = 1
				93	else:
				94	# It's not a header line; stop here.
				95	if not headerseen:
				96	self.status = 'No headers'
				97	else:
				98	self.status = 'Bad header'
				99	# Try to undo the read.
				100	try:
				101	self.fp.seek(-len(line), 1)
				102	except IOError:
				103	self.status = \
				104	self.status + '; bad seek'
				105	break
				106
				107
				108	# Method to determine whether a line is a legal end of
				109	# RFC-822 headers. You may override this method if your
				110	# application wants to bend the rules, e.g. to accept lines
				111	# ending in '\r\n', to strip trailing whitespace, or to
				112	# recognise MH template separators ('--------').
				113
				114	def islast(self, line):
				115	return line == '\n'
				116
				117
				118	# Look through the list of headers and find all lines matching
				119	# a given header name (and their continuation lines).
				120	# A list of the lines is returned, without interpretation.
				121	# If the header does not occur, an empty list is returned.
				122	# If the header occurs multiple times, all occurrences are
				123	# returned. Case is not important in the header name.
				124
				125	def getallmatchingheaders(self, name):
				126	name = string.lower(name) + ':'
				127	n = len(name)
				128	list = []
				129	hit = 0
				130	for line in self.headers:
				131	if string.lower(line[:n]) == name:
				132	hit = 1
				133	elif line[:1] not in string.whitespace:
				134	hit = 0
				135	if hit:
				136	list.append(line)
				137	return list
				138
				139
				140	# Similar, but return only the first matching header (and its
				141	# continuation lines).
				142
				143	def getfirstmatchingheader(self, name):
				144	name = string.lower(name) + ':'
				145	n = len(name)
				146	list = []
				147	hit = 0
				148	for line in self.headers:
				149	if string.lower(line[:n]) == name:
				150	hit = 1
				151	elif line[:1] not in string.whitespace:
				152	if hit:
				153	break
				154	if hit:
				155	list.append(line)
				156	return list
				157
				158
				159	# A higher-level interface to getfirstmatchingheader().
				160	# Return a string containing the literal text of the header
				161	# but with the keyword stripped. All leading, trailing and
				162	# embedded whitespace is kept in the string, however.
				163	# Return None if the header does not occur.
				164
				165	def getrawheader(self, name):
				166	list = self.getfirstmatchingheader(name)
				167	if not list:
				168	return None
				169	list[0] = list[0][len(name) + 1:]
				170	return string.joinfields(list, '')
				171
				172
				173	# Going one step further: also strip leading and trailing
				174	# whitespace.
				175
				176	def getheader(self, name):
				177	text = self.getrawheader(name)
				178	if text == None:
				179	return None
				180	return string.strip(text)
				181
				182
				183	# XXX The next step would be to define self.getaddr(name)
				184	# and self.getaddrlist(name) which would parse a header
				185	# consisting of a single mail address and a number of mail
				186	# addresses, respectively. Lower level functions would be
				187	# parseaddr(string) and parseaddrlist(string).
				188
				189	# XXX Similar, there would be a function self.getdate(name) to
				190	# return a date in canonical form (perhaps a number compatible
				191	# to time.time()) and a function parsedate(string).
				192
				193	# XXX The inverses of the parse functions may also be useful.
				194
				195
				196
				197
				198	# Utility functions
				199	# -----------------
				200
				201
				202	# Remove quotes from a string.
				203	# XXX Should fix this to be really conformant.
				204
				205	def unquote(str):
				206	if len(str) > 1:
				207	if str[0] == '"' and str[-1:] == '"':
				208	return str[1:-1]
				209	if str[0] == '<' and str[-1:] == '>':
				210	return str[1:-1]
				211	return str