Blame - Lib/rfc822.py - platform/external/python/cpython3

blob: 39ab6a608c4b8c745d016995383675d6459f6b19 [file] [log] [blame]

Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	1	# RFC-822 message manipulation class.
				2	#
				3	# XXX This is only a very rough sketch of a full RFC-822 parser;
				4	# additional methods are needed to parse addresses and dates, and to
				5	# tokenize lines according to various other syntax rules.
				6	#
				7	# Directions for use:
				8	#
				9	# To create a Message object: first open a file, e.g.:
				10	# fp = open(file, 'r')
				11	# (or use any other legal way of getting an open file object, e.g. use
				12	# sys.stdin or call os.popen()).
Guido van Rossum	7bc817d	1993-12-17 15:25:27 +0000	[diff] [blame]	13	# Then pass the open file object to the Message() constructor:
				14	# m = Message(fp)
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	15	#
				16	# To get the text of a particular header there are several methods:
				17	# str = m.getheader(name)
				18	# str = m.getrawheader(name)
				19	# where name is the name of the header, e.g. 'Subject'.
				20	# The difference is that getheader() strips the leading and trailing
				21	# whitespace, while getrawheader() doesn't. Both functions retain
				22	# embedded whitespace (including newlines) exactly as they are
				23	# specified in the header, and leave the case of the text unchanged.
				24	#
				25	# See the class definition for lower level access methods.
				26	#
				27	# There are also some utility functions here.
				28
				29
				30	import regex
				31	import string
				32
				33
				34	class Message:
				35
				36	# Initialize the class instance and read the headers.
				37
Guido van Rossum	7bc817d	1993-12-17 15:25:27 +0000	[diff] [blame]	38	def __init__(self, fp):
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	39	self.fp = fp
				40	#
				41	try:
				42	self.startofheaders = self.fp.tell()
				43	except IOError:
				44	self.startofheaders = None
				45	#
				46	self.readheaders()
				47	#
				48	try:
				49	self.startofbody = self.fp.tell()
				50	except IOError:
				51	self.startofbody = None
Guido van Rossum	01ca336	1992-07-13 14:28:59 +0000	[diff] [blame]	52
				53
				54	# Rewind the file to the start of the body (if seekable).
				55
				56	def rewindbody(self):
				57	self.fp.seek(self.startofbody)
				58
				59
				60	# Read header lines up to the entirely blank line that
				61	# terminates them. The (normally blank) line that ends the
				62	# headers is skipped, but not included in the returned list.
				63	# If a non-header line ends the headers, (which is an error),
				64	# an attempt is made to backspace over it; it is never
				65	# included in the returned list.
				66	#
				67	# The variable self.status is set to the empty string if all
				68	# went well, otherwise it is an error message.
				69	# The variable self.headers is a completely uninterpreted list
				70	# of lines contained in the header (so printing them will
				71	# reproduce the header exactly as it appears in the file).
				72
				73	def readheaders(self):
				74	self.headers = list = []
				75	self.status = ''
				76	headerseen = 0
				77	while 1:
				78	line = self.fp.readline()
				79	if not line:
				80	self.status = 'EOF in headers'
				81	break
				82	if self.islast(line):
				83	break
				84	elif headerseen and line[0] in ' \t':
				85	# It's a continuation line.
				86	list.append(line)
				87	elif regex.match('^[!-9;-~]+:', line):
				88	# It's a header line.
				89	list.append(line)
				90	headerseen = 1
				91	else:
				92	# It's not a header line; stop here.
				93	if not headerseen:
				94	self.status = 'No headers'
				95	else:
				96	self.status = 'Bad header'
				97	# Try to undo the read.
				98	try:
				99	self.fp.seek(-len(line), 1)
				100	except IOError:
				101	self.status = \
				102	self.status + '; bad seek'
				103	break
				104
				105
				106	# Method to determine whether a line is a legal end of
				107	# RFC-822 headers. You may override this method if your
				108	# application wants to bend the rules, e.g. to accept lines
				109	# ending in '\r\n', to strip trailing whitespace, or to
				110	# recognise MH template separators ('--------').
				111
				112	def islast(self, line):
				113	return line == '\n'
				114
				115
				116	# Look through the list of headers and find all lines matching
				117	# a given header name (and their continuation lines).
				118	# A list of the lines is returned, without interpretation.
				119	# If the header does not occur, an empty list is returned.
				120	# If the header occurs multiple times, all occurrences are
				121	# returned. Case is not important in the header name.
				122
				123	def getallmatchingheaders(self, name):
				124	name = string.lower(name) + ':'
				125	n = len(name)
				126	list = []
				127	hit = 0
				128	for line in self.headers:
				129	if string.lower(line[:n]) == name:
				130	hit = 1
				131	elif line[:1] not in string.whitespace:
				132	hit = 0
				133	if hit:
				134	list.append(line)
				135	return list
				136
				137
				138	# Similar, but return only the first matching header (and its
				139	# continuation lines).
				140
				141	def getfirstmatchingheader(self, name):
				142	name = string.lower(name) + ':'
				143	n = len(name)
				144	list = []
				145	hit = 0
				146	for line in self.headers:
				147	if string.lower(line[:n]) == name:
				148	hit = 1
				149	elif line[:1] not in string.whitespace:
				150	if hit:
				151	break
				152	if hit:
				153	list.append(line)
				154	return list
				155
				156
				157	# A higher-level interface to getfirstmatchingheader().
				158	# Return a string containing the literal text of the header
				159	# but with the keyword stripped. All leading, trailing and
				160	# embedded whitespace is kept in the string, however.
				161	# Return None if the header does not occur.
				162
				163	def getrawheader(self, name):
				164	list = self.getfirstmatchingheader(name)
				165	if not list:
				166	return None
				167	list[0] = list[0][len(name) + 1:]
				168	return string.joinfields(list, '')
				169
				170
				171	# Going one step further: also strip leading and trailing
				172	# whitespace.
				173
				174	def getheader(self, name):
				175	text = self.getrawheader(name)
				176	if text == None:
				177	return None
				178	return string.strip(text)
				179
				180
				181	# XXX The next step would be to define self.getaddr(name)
				182	# and self.getaddrlist(name) which would parse a header
				183	# consisting of a single mail address and a number of mail
				184	# addresses, respectively. Lower level functions would be
				185	# parseaddr(string) and parseaddrlist(string).
				186
				187	# XXX Similar, there would be a function self.getdate(name) to
				188	# return a date in canonical form (perhaps a number compatible
				189	# to time.time()) and a function parsedate(string).
				190
				191	# XXX The inverses of the parse functions may also be useful.
				192
				193
				194
				195
				196	# Utility functions
				197	# -----------------
				198
				199
				200	# Remove quotes from a string.
				201	# XXX Should fix this to be really conformant.
				202
				203	def unquote(str):
				204	if len(str) > 1:
				205	if str[0] == '"' and str[-1:] == '"':
				206	return str[1:-1]
				207	if str[0] == '<' and str[-1:] == '>':
				208	return str[1:-1]
				209	return str