blob: 18277d6dff4dbe080e9902d65e961623b1f8e354 [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Skip Montanaro0de65802001-02-15 22:15:14 +000076__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000077
Guido van Rossum9ab94c11997-12-10 16:17:39 +000078_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000079
80
Guido van Rossum01ca3361992-07-13 14:28:59 +000081class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000082 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000083
Guido van Rossum9ab94c11997-12-10 16:17:39 +000084 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000086 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000091 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000092 seekable = 0
93 else:
94 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000095 self.fp = fp
96 self.seekable = seekable
97 self.startofheaders = None
98 self.startofbody = None
99 #
100 if self.seekable:
101 try:
102 self.startofheaders = self.fp.tell()
103 except IOError:
104 self.seekable = 0
105 #
106 self.readheaders()
107 #
108 if self.seekable:
109 try:
110 self.startofbody = self.fp.tell()
111 except IOError:
112 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000113
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000114 def rewindbody(self):
115 """Rewind the file to the start of the body (if seekable)."""
116 if not self.seekable:
117 raise IOError, "unseekable file"
118 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000119
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000120 def readheaders(self):
121 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000122
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000123 Read header lines up to the entirely blank line that terminates them.
124 The (normally blank) line that ends the headers is skipped, but not
125 included in the returned list. If a non-header line ends the headers,
126 (which is an error), an attempt is made to backspace over it; it is
127 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000128
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000129 The variable self.status is set to the empty string if all went well,
130 otherwise it is an error message. The variable self.headers is a
131 completely uninterpreted list of lines contained in the header (so
132 printing them will reproduce the header exactly as it appears in the
133 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000134 """
135 self.dict = {}
136 self.unixfrom = ''
137 self.headers = list = []
138 self.status = ''
139 headerseen = ""
140 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000141 startofline = unread = tell = None
142 if hasattr(self.fp, 'unread'):
143 unread = self.fp.unread
144 elif self.seekable:
145 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000146 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000147 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000148 try:
149 startofline = tell()
150 except IOError:
151 startofline = tell = None
152 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000153 line = self.fp.readline()
154 if not line:
155 self.status = 'EOF in headers'
156 break
157 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000158 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000159 self.unixfrom = self.unixfrom + line
160 continue
161 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000162 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000163 # It's a continuation line.
164 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000165 x = (self.dict[headerseen] + "\n " + line.strip())
166 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000167 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000168 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000169 # It's a comment. Ignore it.
170 continue
171 elif self.islast(line):
172 # Note! No pushback here! The delimiter line gets eaten.
173 break
174 headerseen = self.isheader(line)
175 if headerseen:
176 # It's a legal header line, save it.
177 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000178 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000179 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000180 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000181 # It's not a header line; throw it back and stop here.
182 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000183 self.status = 'No headers'
184 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000185 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000186 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000187 if unread:
188 unread(line)
189 elif tell:
190 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000191 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000192 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000193 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000194
195 def isheader(self, line):
196 """Determine whether a given line is a legal header.
197
198 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000199 You may override this method in order to use Message parsing on tagged
200 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000201 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000202 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000203 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000204 return line[:i].lower()
Guido van Rossume894fc01998-06-11 13:58:40 +0000205 else:
206 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000207
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000208 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000209 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000210
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000211 You may override this method if your application wants to bend the
212 rules, e.g. to strip trailing whitespace, or to recognize MH template
213 separators ('--------'). For convenience (e.g. for code reading from
214 sockets) a line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000215 """
216 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000217
218 def iscomment(self, line):
219 """Determine whether a line should be skipped entirely.
220
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000221 You may override this method in order to use Message parsing on tagged
222 data in RFC 2822-like formats that support embedded comments or
223 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000224 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000225 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000226
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000227 def getallmatchingheaders(self, name):
228 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000229
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000230 Look through the list of headers and find all lines matching a given
231 header name (and their continuation lines). A list of the lines is
232 returned, without interpretation. If the header does not occur, an
233 empty list is returned. If the header occurs multiple times, all
234 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000235 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000236 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000237 n = len(name)
238 list = []
239 hit = 0
240 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000241 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000242 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000243 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000244 hit = 0
245 if hit:
246 list.append(line)
247 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000248
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000249 def getfirstmatchingheader(self, name):
250 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000251
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000252 This is similar to getallmatchingheaders, but it returns only the
253 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000254 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000255 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000256 n = len(name)
257 list = []
258 hit = 0
259 for line in self.headers:
260 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000261 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000262 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000263 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000264 hit = 1
265 if hit:
266 list.append(line)
267 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000268
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000269 def getrawheader(self, name):
270 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000271
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000272 Return a string containing the literal text of the header but with the
273 keyword stripped. All leading, trailing and embedded whitespace is
274 kept in the string, however. Return None if the header does not
275 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000276 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000277
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000278 list = self.getfirstmatchingheader(name)
279 if not list:
280 return None
281 list[0] = list[0][len(name) + 1:]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000282 return ''.join(list)
Tim Peters0c9886d2001-01-15 01:18:21 +0000283
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000284 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000285 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000286
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000287 This is the normal interface: it returns a stripped version of the
288 header value for a given header name, or None if it doesn't exist.
289 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000290 """
291 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000292 return self.dict[name.lower()]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000293 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000294 return default
295 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000296
297 def getheaders(self, name):
298 """Get all values for a header.
299
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000300 This returns a list of values for headers given more than once; each
301 value in the result list is stripped in the same way as the result of
302 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000303 """
304 result = []
305 current = ''
306 have_header = 0
307 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000308 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000309 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000312 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000313 else:
314 if have_header:
315 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000316 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000317 have_header = 1
318 if have_header:
319 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000320 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000321
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000322 def getaddr(self, name):
323 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000324
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000325 An example return value:
326 ('Guido van Rossum', 'guido@cwi.nl')
327 """
328 # New, by Ben Escoto
329 alist = self.getaddrlist(name)
330 if alist:
331 return alist[0]
332 else:
333 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000334
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000335 def getaddrlist(self, name):
336 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000337
338 Retrieves a list of addresses from a header, where each address is a
339 tuple as returned by getaddr(). Scans all named headers, so it works
340 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000341 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000342 raw = []
343 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000344 if h[0] in ' \t':
345 raw.append(h)
346 else:
347 if raw:
348 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000349 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000350 if i > 0:
351 addr = h[i+1:]
352 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000353 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000354 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000355 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000356
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000357 def getdate(self, name):
358 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000359
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000360 Retrieves a date field from the named header, returning a tuple
361 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000362 """
363 try:
364 data = self[name]
365 except KeyError:
366 return None
367 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000368
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000369 def getdate_tz(self, name):
370 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000371
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000372 The first 9 elements make up a tuple compatible with time.mktime(),
373 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000374 """
375 try:
376 data = self[name]
377 except KeyError:
378 return None
379 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000380
381
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000382 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000383
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000384 def __len__(self):
385 """Get the number of headers in a message."""
386 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000387
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000388 def __getitem__(self, name):
389 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000390 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000391
392 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000393 """Set the value of a header.
394
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000395 Note: This is not a perfect inversion of __getitem__, because any
396 changed headers get stuck at the end of the raw-headers list rather
397 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000398 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000399 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000400 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000401 text = name + ": " + value
Guido van Rossumc80f1822000-12-15 15:37:48 +0000402 lines = text.split("\n")
Guido van Rossume894fc01998-06-11 13:58:40 +0000403 for line in lines:
404 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000405
Guido van Rossum75d92c11998-04-02 21:33:20 +0000406 def __delitem__(self, name):
407 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000408 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000409 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000410 return
411 del self.dict[name]
412 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000413 n = len(name)
414 list = []
415 hit = 0
416 for i in range(len(self.headers)):
417 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000418 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000419 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000420 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000421 hit = 0
422 if hit:
423 list.append(i)
Raymond Hettinger85c20a42003-11-06 14:06:48 +0000424 for i in reversed(list):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000425 del self.headers[i]
426
Fred Drake233226e2001-05-22 19:36:50 +0000427 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000428 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000429 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000430 return self.dict[lowername]
431 else:
Fred Drake233226e2001-05-22 19:36:50 +0000432 text = name + ": " + default
Fred Drake02959292001-05-22 14:58:10 +0000433 lines = text.split("\n")
434 for line in lines:
435 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000436 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000437 return default
438
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000439 def has_key(self, name):
440 """Determine whether a message contains the named header."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000441 return name.lower() in self.dict
442
443 def __contains__(self, name):
444 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000445 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000446
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000447 def __iter__(self):
448 return iter(self.dict)
449
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000450 def keys(self):
451 """Get all of a message's header field names."""
452 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000453
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000454 def values(self):
455 """Get all of a message's header field values."""
456 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000457
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000458 def items(self):
459 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000460
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000461 Returns a list of name, value tuples.
462 """
463 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000464
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000465 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000466 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000467
468
469# Utility functions
470# -----------------
471
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000472# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000473# XXX The inverses of the parse functions may also be useful.
474
Guido van Rossum01ca3361992-07-13 14:28:59 +0000475
Guido van Rossum01ca3361992-07-13 14:28:59 +0000476def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000477 """Remove quotes from a string."""
478 if len(str) > 1:
Barry Warsaw4e09d5c2002-09-11 02:32:14 +0000479 if str.startswith('"') and str.endswith('"'):
480 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
481 if str.startswith('<') and str.endswith('>'):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000482 return str[1:-1]
483 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000484
485
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000486def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000487 """Add quotes around a string."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000488 return str.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000489
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000490
Guido van Rossumb6775db1994-08-01 11:34:53 +0000491def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000492 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000493 a = AddressList(address)
Barry Warsawf6553282002-05-23 03:21:01 +0000494 list = a.addresslist
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000495 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000496 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000497 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000498 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000499
500
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000501class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000502 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000503
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000504 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000505 RFC 2822 in front of you.
506
507 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000508
509 Note: this class interface is deprecated and may be removed in the future.
510 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000511 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000512
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000513 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000514 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000515
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000516 `field' is an unparsed address header field, containing one or more
517 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000518 """
519 self.specials = '()<>@,:;.\"[]'
520 self.pos = 0
521 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000522 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000523 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000524 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
525 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
526 # syntax, so allow dots in phrases.
527 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000528 self.field = field
529 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000530
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000531 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000532 """Parse up to the start of the next address."""
533 while self.pos < len(self.field):
534 if self.field[self.pos] in self.LWS + '\n\r':
535 self.pos = self.pos + 1
536 elif self.field[self.pos] == '(':
537 self.commentlist.append(self.getcomment())
538 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000539
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000540 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000541 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000542
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000543 Returns a list containing all of the addresses.
544 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000545 result = []
546 while 1:
547 ad = self.getaddress()
548 if ad:
549 result += ad
550 else:
551 break
552 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000553
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000554 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000555 """Parse the next address."""
556 self.commentlist = []
557 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000558
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000559 oldpos = self.pos
560 oldcl = self.commentlist
561 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000562
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000563 self.gotonext()
564 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000565
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000566 if self.pos >= len(self.field):
567 # Bad email address technically, no domain.
568 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000569 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000570
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000571 elif self.field[self.pos] in '.@':
572 # email address is just an addrspec
573 # this isn't very efficient since we start over
574 self.pos = oldpos
575 self.commentlist = oldcl
576 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000577 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000578
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000579 elif self.field[self.pos] == ':':
580 # address is a group
581 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000582
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000583 fieldlen = len(self.field)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000584 self.pos = self.pos + 1
585 while self.pos < len(self.field):
586 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000587 if self.pos < fieldlen and self.field[self.pos] == ';':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000588 self.pos = self.pos + 1
589 break
590 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000591
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000592 elif self.field[self.pos] == '<':
593 # Address is a phrase then a route addr
594 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000595
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000596 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000597 returnlist = [(' '.join(plist) + ' (' + \
598 ' '.join(self.commentlist) + ')', routeaddr)]
599 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000600
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000601 else:
602 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000603 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000604 elif self.field[self.pos] in self.specials:
605 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000606
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000607 self.gotonext()
608 if self.pos < len(self.field) and self.field[self.pos] == ',':
609 self.pos = self.pos + 1
610 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000611
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000612 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000613 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000614
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000615 This method just skips all the route stuff and returns the addrspec.
616 """
617 if self.field[self.pos] != '<':
618 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000619
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000620 expectroute = 0
621 self.pos = self.pos + 1
622 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000623 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000624 while self.pos < len(self.field):
625 if expectroute:
626 self.getdomain()
627 expectroute = 0
628 elif self.field[self.pos] == '>':
629 self.pos = self.pos + 1
630 break
631 elif self.field[self.pos] == '@':
632 self.pos = self.pos + 1
633 expectroute = 1
634 elif self.field[self.pos] == ':':
635 self.pos = self.pos + 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000636 else:
637 adlist = self.getaddrspec()
638 self.pos = self.pos + 1
639 break
640 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000641
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000642 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000643
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000644 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000645 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000646 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000647
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000648 self.gotonext()
649 while self.pos < len(self.field):
650 if self.field[self.pos] == '.':
651 aslist.append('.')
652 self.pos = self.pos + 1
653 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000654 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000655 elif self.field[self.pos] in self.atomends:
656 break
657 else: aslist.append(self.getatom())
658 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000659
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000660 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000661 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000662
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000663 aslist.append('@')
664 self.pos = self.pos + 1
665 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000666 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000667
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000668 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000669 """Get the complete domain name from an address."""
670 sdlist = []
671 while self.pos < len(self.field):
672 if self.field[self.pos] in self.LWS:
673 self.pos = self.pos + 1
674 elif self.field[self.pos] == '(':
675 self.commentlist.append(self.getcomment())
676 elif self.field[self.pos] == '[':
677 sdlist.append(self.getdomainliteral())
678 elif self.field[self.pos] == '.':
679 self.pos = self.pos + 1
680 sdlist.append('.')
681 elif self.field[self.pos] in self.atomends:
682 break
683 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000684 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000685
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000686 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000687 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000688
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000689 `beginchar' is the start character for the fragment. If self is not
690 looking at an instance of `beginchar' then getdelimited returns the
691 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000692
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000693 `endchars' is a sequence of allowable end-delimiting characters.
694 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000695
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000696 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
697 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000698 """
699 if self.field[self.pos] != beginchar:
700 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000701
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000702 slist = ['']
703 quote = 0
704 self.pos = self.pos + 1
705 while self.pos < len(self.field):
706 if quote == 1:
707 slist.append(self.field[self.pos])
708 quote = 0
709 elif self.field[self.pos] in endchars:
710 self.pos = self.pos + 1
711 break
712 elif allowcomments and self.field[self.pos] == '(':
713 slist.append(self.getcomment())
714 elif self.field[self.pos] == '\\':
715 quote = 1
716 else:
717 slist.append(self.field[self.pos])
718 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000719
Guido van Rossumc80f1822000-12-15 15:37:48 +0000720 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000721
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000722 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000723 """Get a quote-delimited fragment from self's field."""
724 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000725
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000726 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000727 """Get a parenthesis-delimited fragment from self's field."""
728 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000729
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000730 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000731 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000732 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000733
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000734 def getatom(self, atomends=None):
735 """Parse an RFC 2822 atom.
736
737 Optional atomends specifies a different set of end token delimiters
738 (the default is to use self.atomends). This is used e.g. in
739 getphraselist() since phrase endings must not include the `.' (which
740 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000741 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000742 if atomends is None:
743 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000744
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000745 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000746 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000747 break
748 else: atomlist.append(self.field[self.pos])
749 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000750
Guido van Rossumc80f1822000-12-15 15:37:48 +0000751 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000752
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000753 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000754 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000755
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000756 A phrase is a sequence of words, which are in turn either RFC 2822
757 atoms or quoted-strings. Phrases are canonicalized by squeezing all
758 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000759 """
760 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000761
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000762 while self.pos < len(self.field):
763 if self.field[self.pos] in self.LWS:
764 self.pos = self.pos + 1
765 elif self.field[self.pos] == '"':
766 plist.append(self.getquote())
767 elif self.field[self.pos] == '(':
768 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000769 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000770 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000771 else:
772 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000773
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000774 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000775
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000776class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000777 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000778 def __init__(self, field):
779 AddrlistClass.__init__(self, field)
780 if field:
781 self.addresslist = self.getaddrlist()
782 else:
783 self.addresslist = []
784
785 def __len__(self):
786 return len(self.addresslist)
787
788 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000789 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000790
791 def __add__(self, other):
792 # Set union
793 newaddr = AddressList(None)
794 newaddr.addresslist = self.addresslist[:]
795 for x in other.addresslist:
796 if not x in self.addresslist:
797 newaddr.addresslist.append(x)
798 return newaddr
799
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000800 def __iadd__(self, other):
801 # Set union, in-place
802 for x in other.addresslist:
803 if not x in self.addresslist:
804 self.addresslist.append(x)
805 return self
806
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000807 def __sub__(self, other):
808 # Set difference
809 newaddr = AddressList(None)
810 for x in self.addresslist:
811 if not x in other.addresslist:
812 newaddr.addresslist.append(x)
813 return newaddr
814
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000815 def __isub__(self, other):
816 # Set difference, in-place
817 for x in other.addresslist:
818 if x in self.addresslist:
819 self.addresslist.remove(x)
820 return self
821
Guido van Rossum81d10b41998-06-16 22:29:03 +0000822 def __getitem__(self, index):
823 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000824 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000825
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000826def dump_address_pair(pair):
827 """Dump a (name, address) pair in a canonicalized form."""
828 if pair[0]:
829 return '"' + pair[0] + '" <' + pair[1] + '>'
830 else:
831 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000832
833# Parse a date field
834
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000835_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
836 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000837 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000838 'august', 'september', 'october', 'november', 'december']
839_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000840
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000841# The timezone table does not include the military time zones defined
842# in RFC822, other than Z. According to RFC1123, the description in
843# RFC822 gets the signs wrong, so we can't rely on any such time
844# zones. RFC1123 recommends that numeric timezone indicators be used
845# instead of timezone names.
846
Tim Peters0c9886d2001-01-15 01:18:21 +0000847_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000848 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000849 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000850 'CST': -600, 'CDT': -500, # Central
851 'MST': -700, 'MDT': -600, # Mountain
852 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000853 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000854
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000855
856def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000857 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000858
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000859 Accounts for military timezones.
860 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000861 if not data:
862 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000863 data = data.split()
864 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000865 # There's a dayname here. Skip it
866 del data[0]
867 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000868 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000869 if len(stuff) == 3:
870 data = stuff + data[1:]
871 if len(data) == 4:
872 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000873 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000874 if i > 0:
875 data[3:] = [s[:i], s[i+1:]]
876 else:
877 data.append('') # Dummy tz
878 if len(data) < 5:
879 return None
880 data = data[:5]
881 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000882 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000883 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000884 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000885 if not mm in _monthnames:
886 return None
887 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000888 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000889 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000890 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000891 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000892 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000893 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000894 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000895 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000896 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000897 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000898 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000899 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000900 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000901 if len(tm) == 2:
902 [thh, tmm] = tm
903 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000904 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000905 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000906 else:
907 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000908 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000909 yy = int(yy)
910 dd = int(dd)
911 thh = int(thh)
912 tmm = int(tmm)
913 tss = int(tss)
914 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000915 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000916 tzoffset = None
917 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000918 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000919 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000920 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000921 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000922 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000923 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000924 pass
925 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000926 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000927 if tzoffset < 0:
928 tzsign = -1
929 tzoffset = -tzoffset
930 else:
931 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000932 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Barry Warsawe8bedeb2004-08-07 16:38:40 +0000933 tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000934 return tuple
935
Guido van Rossumb6775db1994-08-01 11:34:53 +0000936
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000937def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000938 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000939 t = parsedate_tz(data)
940 if type(t) == type( () ):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000941 return t[:9]
Tim Peters0c9886d2001-01-15 01:18:21 +0000942 else: return t
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000943
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000944
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000945def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000946 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000947 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000948 # No zone info, so localtime is better assumption than GMT
949 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000950 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000951 t = time.mktime(data[:8] + (0,))
952 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000953
Guido van Rossum247a78a1999-04-19 18:04:38 +0000954def formatdate(timeval=None):
955 """Returns time format preferred for Internet standards.
956
957 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000958
959 According to RFC 1123, day and month names must always be in
960 English. If not for that, this code could use strftime(). It
961 can't because strftime() honors the locale and could generated
962 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000963 """
964 if timeval is None:
965 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000966 timeval = time.gmtime(timeval)
967 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
968 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],
969 timeval[2],
970 ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
971 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000972 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000973
Guido van Rossumb6775db1994-08-01 11:34:53 +0000974
975# When used as script, run a small test program.
976# The first command line argument must be a filename containing one
977# message in RFC-822 format.
978
979if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000980 import sys, os
981 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
982 if sys.argv[1:]: file = sys.argv[1]
983 f = open(file, 'r')
984 m = Message(f)
985 print 'From:', m.getaddr('from')
986 print 'To:', m.getaddrlist('to')
987 print 'Subject:', m.getheader('subject')
988 print 'Date:', m.getheader('date')
989 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000990 tz = date[-1]
991 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000992 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000993 print 'ParsedDate:', time.asctime(date),
994 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000995 hhmm, ss = divmod(hhmmss, 60)
996 hh, mm = divmod(hhmm, 60)
997 print "%+03d%02d" % (hh, mm),
998 if ss: print ".%02d" % ss,
999 print
1000 else:
1001 print 'ParsedDate:', None
1002 m.rewindbody()
1003 n = 0
1004 while f.readline():
1005 n = n + 1
1006 print 'Lines:', n
1007 print '-'*70
1008 print 'len =', len(m)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001009 if 'Date' in m: print 'Date =', m['Date']
1010 if 'X-Nonsense' in m: pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001011 print 'keys =', m.keys()
1012 print 'values =', m.values()
1013 print 'items =', m.items()