blob: 14fd2a8cec069f5fca9df65cabbda2ffd2e313d5 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
Guido van Rossumc7bb8571998-06-10 21:31:01 +000011You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
Guido van Rossume894fc01998-06-11 13:58:40 +000016This class can work with any input object that supports a readline
17method. If the input object has seek and tell capability, the
18rewindbody method will work; also illegal lines will be pushed back
19onto the input stream. If the input object lacks seek but has an
20`unread' method that can push back a line of input, Message will use
21that to push back illegal lines. Thus this class can be used to parse
22messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000023
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
Tim Peters0c9886d2001-01-15 01:18:21 +000031called once; if this raises an exception, seekable is reset to 0. For
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032other nonzero values of seekable, this test is not made.
33
Guido van Rossum9ab94c11997-12-10 16:17:39 +000034To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000058# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000059
Guido van Rossumb6775db1994-08-01 11:34:53 +000060import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000061
Skip Montanaro0de65802001-02-15 22:15:14 +000062__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000065
66
Guido van Rossum01ca3361992-07-13 14:28:59 +000067class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000068 """Represents a single RFC-822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000069
Guido van Rossum9ab94c11997-12-10 16:17:39 +000070 def __init__(self, fp, seekable = 1):
71 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000072 if seekable == 1:
73 # Exercise tell() to make sure it works
74 # (and then assume seek() works, too)
75 try:
76 fp.tell()
77 except:
78 seekable = 0
79 else:
80 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000081 self.fp = fp
82 self.seekable = seekable
83 self.startofheaders = None
84 self.startofbody = None
85 #
86 if self.seekable:
87 try:
88 self.startofheaders = self.fp.tell()
89 except IOError:
90 self.seekable = 0
91 #
92 self.readheaders()
93 #
94 if self.seekable:
95 try:
96 self.startofbody = self.fp.tell()
97 except IOError:
98 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +000099
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000100 def rewindbody(self):
101 """Rewind the file to the start of the body (if seekable)."""
102 if not self.seekable:
103 raise IOError, "unseekable file"
104 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000105
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000106 def readheaders(self):
107 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000108
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000109 Read header lines up to the entirely blank line that
110 terminates them. The (normally blank) line that ends the
111 headers is skipped, but not included in the returned list.
112 If a non-header line ends the headers, (which is an error),
113 an attempt is made to backspace over it; it is never
114 included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000115
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000116 The variable self.status is set to the empty string if all
117 went well, otherwise it is an error message.
118 The variable self.headers is a completely uninterpreted list
119 of lines contained in the header (so printing them will
120 reproduce the header exactly as it appears in the file).
121 """
122 self.dict = {}
123 self.unixfrom = ''
124 self.headers = list = []
125 self.status = ''
126 headerseen = ""
127 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000128 startofline = unread = tell = None
129 if hasattr(self.fp, 'unread'):
130 unread = self.fp.unread
131 elif self.seekable:
132 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000133 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000134 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000135 try:
136 startofline = tell()
137 except IOError:
138 startofline = tell = None
139 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000140 line = self.fp.readline()
141 if not line:
142 self.status = 'EOF in headers'
143 break
144 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000145 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000146 self.unixfrom = self.unixfrom + line
147 continue
148 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000149 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000150 # It's a continuation line.
151 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000152 x = (self.dict[headerseen] + "\n " + line.strip())
153 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000154 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000155 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000156 # It's a comment. Ignore it.
157 continue
158 elif self.islast(line):
159 # Note! No pushback here! The delimiter line gets eaten.
160 break
161 headerseen = self.isheader(line)
162 if headerseen:
163 # It's a legal header line, save it.
164 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000165 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000166 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000167 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000168 # It's not a header line; throw it back and stop here.
169 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000170 self.status = 'No headers'
171 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000172 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000173 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000174 if unread:
175 unread(line)
176 elif tell:
177 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000178 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000179 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000180 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000181
182 def isheader(self, line):
183 """Determine whether a given line is a legal header.
184
185 This method should return the header name, suitably canonicalized.
186 You may override this method in order to use Message parsing
187 on tagged data in RFC822-like formats with special header formats.
188 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000189 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000190 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000191 return line[:i].lower()
Guido van Rossume894fc01998-06-11 13:58:40 +0000192 else:
193 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000194
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000195 def islast(self, line):
196 """Determine whether a line is a legal end of RFC-822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000197
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000198 You may override this method if your application wants
199 to bend the rules, e.g. to strip trailing whitespace,
Thomas Wouters7e474022000-07-16 12:04:32 +0000200 or to recognize MH template separators ('--------').
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000201 For convenience (e.g. for code reading from sockets) a
Tim Peters0c9886d2001-01-15 01:18:21 +0000202 line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000203 """
204 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000205
206 def iscomment(self, line):
207 """Determine whether a line should be skipped entirely.
208
209 You may override this method in order to use Message parsing
210 on tagged data in RFC822-like formats that support embedded
211 comments or free-text data.
212 """
213 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000214
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000215 def getallmatchingheaders(self, name):
216 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000217
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000218 Look through the list of headers and find all lines
219 matching a given header name (and their continuation
220 lines). A list of the lines is returned, without
221 interpretation. If the header does not occur, an
222 empty list is returned. If the header occurs multiple
223 times, all occurrences are returned. Case is not
224 important in the header name.
225 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000226 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000227 n = len(name)
228 list = []
229 hit = 0
230 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000231 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000232 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000233 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000234 hit = 0
235 if hit:
236 list.append(line)
237 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000238
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000239 def getfirstmatchingheader(self, name):
240 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000241
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000242 This is similar to getallmatchingheaders, but it returns
243 only the first matching header (and its continuation
244 lines).
245 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000246 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000247 n = len(name)
248 list = []
249 hit = 0
250 for line in self.headers:
251 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000252 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000253 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000254 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 hit = 1
256 if hit:
257 list.append(line)
258 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000259
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000260 def getrawheader(self, name):
261 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000262
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000263 Return a string containing the literal text of the
264 header but with the keyword stripped. All leading,
265 trailing and embedded whitespace is kept in the
266 string, however.
267 Return None if the header does not occur.
268 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000269
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000270 list = self.getfirstmatchingheader(name)
271 if not list:
272 return None
273 list[0] = list[0][len(name) + 1:]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000274 return ''.join(list)
Tim Peters0c9886d2001-01-15 01:18:21 +0000275
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000276 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000278
Fred Drakeddf22c41999-04-28 21:17:38 +0000279 This is the normal interface: it returns a stripped
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000280 version of the header value for a given header name,
281 or None if it doesn't exist. This uses the dictionary
282 version which finds the *last* such header.
283 """
284 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000285 return self.dict[name.lower()]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000286 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000287 return default
288 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000289
290 def getheaders(self, name):
291 """Get all values for a header.
292
293 This returns a list of values for headers given more than once;
294 each value in the result list is stripped in the same way as the
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000295 result of getheader(). If the header is not given, return an
296 empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000297 """
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000302 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000303 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000304 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000305 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000306 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000307 else:
308 if have_header:
309 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 have_header = 1
312 if have_header:
313 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000314 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000315
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000318
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
321 """
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000328
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000331
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
335
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000336 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000337 raw = []
338 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000339 if h[0] in ' \t':
340 raw.append(h)
341 else:
342 if raw:
343 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000344 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000345 if i > 0:
346 addr = h[i+1:]
347 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000348 alladdrs = ''.join(raw)
Barry Warsaw8a578431999-01-14 19:59:58 +0000349 a = AddrlistClass(alladdrs)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000350 return a.getaddrlist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000351
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000352 def getdate(self, name):
353 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000354
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000355 Retrieves a date field from the named header, returning
356 a tuple compatible with time.mktime().
357 """
358 try:
359 data = self[name]
360 except KeyError:
361 return None
362 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000363
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000364 def getdate_tz(self, name):
365 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000366
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000367 The first 9 elements make up a tuple compatible with
368 time.mktime(), and the 10th is the offset of the poster's
369 time zone from GMT/UTC.
370 """
371 try:
372 data = self[name]
373 except KeyError:
374 return None
375 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000376
377
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000379
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000380 def __len__(self):
381 """Get the number of headers in a message."""
382 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000383
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000384 def __getitem__(self, name):
385 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000386 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000387
388 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000389 """Set the value of a header.
390
Tim Peters0c9886d2001-01-15 01:18:21 +0000391 Note: This is not a perfect inversion of __getitem__, because
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000392 any changed headers get stuck at the end of the raw-headers list
393 rather than where the altered header was.
394 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000395 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000396 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000397 text = name + ": " + value
Guido van Rossumc80f1822000-12-15 15:37:48 +0000398 lines = text.split("\n")
Guido van Rossume894fc01998-06-11 13:58:40 +0000399 for line in lines:
400 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000401
Guido van Rossum75d92c11998-04-02 21:33:20 +0000402 def __delitem__(self, name):
403 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000404 name = name.lower()
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000405 if not self.dict.has_key(name):
406 return
407 del self.dict[name]
408 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000409 n = len(name)
410 list = []
411 hit = 0
412 for i in range(len(self.headers)):
413 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000414 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000415 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000416 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000417 hit = 0
418 if hit:
419 list.append(i)
420 list.reverse()
421 for i in list:
422 del self.headers[i]
423
Fred Drake233226e2001-05-22 19:36:50 +0000424 def get(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000425 name = name.lower()
426 if self.dict.has_key(name):
427 return self.dict[name]
428 else:
429 return default
430
Fred Drake233226e2001-05-22 19:36:50 +0000431 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000432 lowername = name.lower()
433 if self.dict.has_key(lowername):
434 return self.dict[lowername]
435 else:
Fred Drake233226e2001-05-22 19:36:50 +0000436 text = name + ": " + default
Fred Drake02959292001-05-22 14:58:10 +0000437 lines = text.split("\n")
438 for line in lines:
439 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000440 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000441 return default
442
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000443 def has_key(self, name):
444 """Determine whether a message contains the named header."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000445 return self.dict.has_key(name.lower())
Tim Peters0c9886d2001-01-15 01:18:21 +0000446
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000447 def keys(self):
448 """Get all of a message's header field names."""
449 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000450
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000451 def values(self):
452 """Get all of a message's header field values."""
453 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000454
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000455 def items(self):
456 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000457
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000458 Returns a list of name, value tuples.
459 """
460 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000461
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000462 def __str__(self):
463 str = ''
464 for hdr in self.headers:
465 str = str + hdr
466 return str
Guido van Rossum01ca3361992-07-13 14:28:59 +0000467
468
469# Utility functions
470# -----------------
471
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000472# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000473# XXX The inverses of the parse functions may also be useful.
474
Guido van Rossum01ca3361992-07-13 14:28:59 +0000475
Guido van Rossum01ca3361992-07-13 14:28:59 +0000476def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000477 """Remove quotes from a string."""
478 if len(str) > 1:
479 if str[0] == '"' and str[-1:] == '"':
480 return str[1:-1]
481 if str[0] == '<' and str[-1:] == '>':
482 return str[1:-1]
483 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000484
485
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000486def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000487 """Add quotes around a string."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000488 return str.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000489
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000490
Guido van Rossumb6775db1994-08-01 11:34:53 +0000491def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000492 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000493 a = AddrlistClass(address)
494 list = a.getaddrlist()
495 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000496 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000497 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000498 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000499
500
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000501class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000502 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000503
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000504 To understand what this class does, it helps to have a copy of
505 RFC-822 in front of you.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000506
507 Note: this class interface is deprecated and may be removed in the future.
508 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000509 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000510
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000511 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000512 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000513
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000514 `field' is an unparsed address header field, containing
515 one or more addresses.
516 """
517 self.specials = '()<>@,:;.\"[]'
518 self.pos = 0
519 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000520 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000521 self.atomends = self.specials + self.LWS + self.CR
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000522 self.field = field
523 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000524
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000525 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000526 """Parse up to the start of the next address."""
527 while self.pos < len(self.field):
528 if self.field[self.pos] in self.LWS + '\n\r':
529 self.pos = self.pos + 1
530 elif self.field[self.pos] == '(':
531 self.commentlist.append(self.getcomment())
532 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000533
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000534 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000535 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000536
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000537 Returns a list containing all of the addresses.
538 """
539 ad = self.getaddress()
540 if ad:
541 return ad + self.getaddrlist()
542 else: return []
Tim Peters0c9886d2001-01-15 01:18:21 +0000543
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000544 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000545 """Parse the next address."""
546 self.commentlist = []
547 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000548
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000549 oldpos = self.pos
550 oldcl = self.commentlist
551 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000552
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000553 self.gotonext()
554 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000555
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000556 if self.pos >= len(self.field):
557 # Bad email address technically, no domain.
558 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000559 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000560
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000561 elif self.field[self.pos] in '.@':
562 # email address is just an addrspec
563 # this isn't very efficient since we start over
564 self.pos = oldpos
565 self.commentlist = oldcl
566 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000567 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000568
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000569 elif self.field[self.pos] == ':':
570 # address is a group
571 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000572
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000573 fieldlen = len(self.field)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000574 self.pos = self.pos + 1
575 while self.pos < len(self.field):
576 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000577 if self.pos < fieldlen and self.field[self.pos] == ';':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000578 self.pos = self.pos + 1
579 break
580 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000581
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000582 elif self.field[self.pos] == '<':
583 # Address is a phrase then a route addr
584 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000585
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000586 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000587 returnlist = [(' '.join(plist) + ' (' + \
588 ' '.join(self.commentlist) + ')', routeaddr)]
589 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000590
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000591 else:
592 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000593 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000594 elif self.field[self.pos] in self.specials:
595 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000596
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000597 self.gotonext()
598 if self.pos < len(self.field) and self.field[self.pos] == ',':
599 self.pos = self.pos + 1
600 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000601
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000602 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000603 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000604
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000605 This method just skips all the route stuff and returns the addrspec.
606 """
607 if self.field[self.pos] != '<':
608 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000609
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000610 expectroute = 0
611 self.pos = self.pos + 1
612 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000613 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000614 while self.pos < len(self.field):
615 if expectroute:
616 self.getdomain()
617 expectroute = 0
618 elif self.field[self.pos] == '>':
619 self.pos = self.pos + 1
620 break
621 elif self.field[self.pos] == '@':
622 self.pos = self.pos + 1
623 expectroute = 1
624 elif self.field[self.pos] == ':':
625 self.pos = self.pos + 1
626 expectaddrspec = 1
627 else:
628 adlist = self.getaddrspec()
629 self.pos = self.pos + 1
630 break
631 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000632
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000633 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000634
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000635 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000636 """Parse an RFC-822 addr-spec."""
637 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000638
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000639 self.gotonext()
640 while self.pos < len(self.field):
641 if self.field[self.pos] == '.':
642 aslist.append('.')
643 self.pos = self.pos + 1
644 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000645 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000646 elif self.field[self.pos] in self.atomends:
647 break
648 else: aslist.append(self.getatom())
649 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000650
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000651 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000652 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000653
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000654 aslist.append('@')
655 self.pos = self.pos + 1
656 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000657 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000658
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000659 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000660 """Get the complete domain name from an address."""
661 sdlist = []
662 while self.pos < len(self.field):
663 if self.field[self.pos] in self.LWS:
664 self.pos = self.pos + 1
665 elif self.field[self.pos] == '(':
666 self.commentlist.append(self.getcomment())
667 elif self.field[self.pos] == '[':
668 sdlist.append(self.getdomainliteral())
669 elif self.field[self.pos] == '.':
670 self.pos = self.pos + 1
671 sdlist.append('.')
672 elif self.field[self.pos] in self.atomends:
673 break
674 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000675 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000676
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000677 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000678 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000679
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000680 `beginchar' is the start character for the fragment.
681 If self is not looking at an instance of `beginchar' then
682 getdelimited returns the empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000683
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000684 `endchars' is a sequence of allowable end-delimiting characters.
685 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000686
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000687 If `allowcomments' is non-zero, embedded RFC-822 comments
688 are allowed within the parsed fragment.
689 """
690 if self.field[self.pos] != beginchar:
691 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000692
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000693 slist = ['']
694 quote = 0
695 self.pos = self.pos + 1
696 while self.pos < len(self.field):
697 if quote == 1:
698 slist.append(self.field[self.pos])
699 quote = 0
700 elif self.field[self.pos] in endchars:
701 self.pos = self.pos + 1
702 break
703 elif allowcomments and self.field[self.pos] == '(':
704 slist.append(self.getcomment())
705 elif self.field[self.pos] == '\\':
706 quote = 1
707 else:
708 slist.append(self.field[self.pos])
709 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000710
Guido van Rossumc80f1822000-12-15 15:37:48 +0000711 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000712
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000713 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000714 """Get a quote-delimited fragment from self's field."""
715 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000716
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000717 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000718 """Get a parenthesis-delimited fragment from self's field."""
719 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000720
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000721 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000722 """Parse an RFC-822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000723 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000724
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000725 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000726 """Parse an RFC-822 atom."""
727 atomlist = ['']
Tim Peters0c9886d2001-01-15 01:18:21 +0000728
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000729 while self.pos < len(self.field):
730 if self.field[self.pos] in self.atomends:
731 break
732 else: atomlist.append(self.field[self.pos])
733 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000734
Guido van Rossumc80f1822000-12-15 15:37:48 +0000735 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000736
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000737 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000738 """Parse a sequence of RFC-822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000739
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000740 A phrase is a sequence of words, which are in turn either
Guido van Rossume894fc01998-06-11 13:58:40 +0000741 RFC-822 atoms or quoted-strings. Phrases are canonicalized
742 by squeezing all runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000743 """
744 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000745
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000746 while self.pos < len(self.field):
747 if self.field[self.pos] in self.LWS:
748 self.pos = self.pos + 1
749 elif self.field[self.pos] == '"':
750 plist.append(self.getquote())
751 elif self.field[self.pos] == '(':
752 self.commentlist.append(self.getcomment())
753 elif self.field[self.pos] in self.atomends:
754 break
755 else: plist.append(self.getatom())
Tim Peters0c9886d2001-01-15 01:18:21 +0000756
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000757 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000758
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000759class AddressList(AddrlistClass):
760 """An AddressList encapsulates a list of parsed RFC822 addresses."""
761 def __init__(self, field):
762 AddrlistClass.__init__(self, field)
763 if field:
764 self.addresslist = self.getaddrlist()
765 else:
766 self.addresslist = []
767
768 def __len__(self):
769 return len(self.addresslist)
770
771 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000772 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000773
774 def __add__(self, other):
775 # Set union
776 newaddr = AddressList(None)
777 newaddr.addresslist = self.addresslist[:]
778 for x in other.addresslist:
779 if not x in self.addresslist:
780 newaddr.addresslist.append(x)
781 return newaddr
782
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000783 def __iadd__(self, other):
784 # Set union, in-place
785 for x in other.addresslist:
786 if not x in self.addresslist:
787 self.addresslist.append(x)
788 return self
789
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000790 def __sub__(self, other):
791 # Set difference
792 newaddr = AddressList(None)
793 for x in self.addresslist:
794 if not x in other.addresslist:
795 newaddr.addresslist.append(x)
796 return newaddr
797
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000798 def __isub__(self, other):
799 # Set difference, in-place
800 for x in other.addresslist:
801 if x in self.addresslist:
802 self.addresslist.remove(x)
803 return self
804
Guido van Rossum81d10b41998-06-16 22:29:03 +0000805 def __getitem__(self, index):
806 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000807 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000808
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000809def dump_address_pair(pair):
810 """Dump a (name, address) pair in a canonicalized form."""
811 if pair[0]:
812 return '"' + pair[0] + '" <' + pair[1] + '>'
813 else:
814 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000815
816# Parse a date field
817
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000818_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
819 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000820 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000821 'august', 'september', 'october', 'november', 'december']
822_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000823
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000824# The timezone table does not include the military time zones defined
825# in RFC822, other than Z. According to RFC1123, the description in
826# RFC822 gets the signs wrong, so we can't rely on any such time
827# zones. RFC1123 recommends that numeric timezone indicators be used
828# instead of timezone names.
829
Tim Peters0c9886d2001-01-15 01:18:21 +0000830_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000831 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000832 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000833 'CST': -600, 'CDT': -500, # Central
834 'MST': -700, 'MDT': -600, # Mountain
835 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000836 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000837
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000838
839def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000840 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000841
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000842 Accounts for military timezones.
843 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000844 data = data.split()
845 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000846 # There's a dayname here. Skip it
847 del data[0]
848 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000849 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000850 if len(stuff) == 3:
851 data = stuff + data[1:]
852 if len(data) == 4:
853 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000854 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000855 if i > 0:
856 data[3:] = [s[:i], s[i+1:]]
857 else:
858 data.append('') # Dummy tz
859 if len(data) < 5:
860 return None
861 data = data[:5]
862 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000863 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000864 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000865 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000866 if not mm in _monthnames:
867 return None
868 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000869 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000870 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000871 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000872 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000873 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000874 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000875 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000876 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000877 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000878 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000879 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000880 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000881 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000882 if len(tm) == 2:
883 [thh, tmm] = tm
884 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000885 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000886 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000887 else:
888 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000889 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000890 yy = int(yy)
891 dd = int(dd)
892 thh = int(thh)
893 tmm = int(tmm)
894 tss = int(tss)
895 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000896 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000897 tzoffset = None
898 tz = tz.upper()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000899 if _timezones.has_key(tz):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000900 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000901 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000902 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000903 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000904 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000905 pass
906 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000907 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000908 if tzoffset < 0:
909 tzsign = -1
910 tzoffset = -tzoffset
911 else:
912 tzsign = 1
913 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000914 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
915 return tuple
916
Guido van Rossumb6775db1994-08-01 11:34:53 +0000917
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000918def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000919 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000920 t = parsedate_tz(data)
921 if type(t) == type( () ):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000922 return t[:9]
Tim Peters0c9886d2001-01-15 01:18:21 +0000923 else: return t
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000924
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000925
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000926def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000927 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000928 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000929 # No zone info, so localtime is better assumption than GMT
930 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000931 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000932 t = time.mktime(data[:8] + (0,))
933 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000934
Guido van Rossum247a78a1999-04-19 18:04:38 +0000935def formatdate(timeval=None):
936 """Returns time format preferred for Internet standards.
937
938 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
939 """
940 if timeval is None:
941 timeval = time.time()
942 return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
943 time.gmtime(timeval))
944
Guido van Rossumb6775db1994-08-01 11:34:53 +0000945
946# When used as script, run a small test program.
947# The first command line argument must be a filename containing one
948# message in RFC-822 format.
949
950if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000951 import sys, os
952 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
953 if sys.argv[1:]: file = sys.argv[1]
954 f = open(file, 'r')
955 m = Message(f)
956 print 'From:', m.getaddr('from')
957 print 'To:', m.getaddrlist('to')
958 print 'Subject:', m.getheader('subject')
959 print 'Date:', m.getheader('date')
960 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000961 tz = date[-1]
962 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000963 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000964 print 'ParsedDate:', time.asctime(date),
965 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000966 hhmm, ss = divmod(hhmmss, 60)
967 hh, mm = divmod(hhmm, 60)
968 print "%+03d%02d" % (hh, mm),
969 if ss: print ".%02d" % ss,
970 print
971 else:
972 print 'ParsedDate:', None
973 m.rewindbody()
974 n = 0
975 while f.readline():
976 n = n + 1
977 print 'Lines:', n
978 print '-'*70
979 print 'len =', len(m)
980 if m.has_key('Date'): print 'Date =', m['Date']
981 if m.has_key('X-Nonsense'): pass
982 print 'keys =', m.keys()
983 print 'values =', m.values()
984 print 'items =', m.items()