blob: 9093beb211f9776c6446e93866c13fa67127541a [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
Guido van Rossumc7bb8571998-06-10 21:31:01 +000011You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
Guido van Rossume894fc01998-06-11 13:58:40 +000016This class can work with any input object that supports a readline
17method. If the input object has seek and tell capability, the
18rewindbody method will work; also illegal lines will be pushed back
19onto the input stream. If the input object lacks seek but has an
20`unread' method that can push back a line of input, Message will use
21that to push back illegal lines. Thus this class can be used to parse
22messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000023
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
Tim Peters0c9886d2001-01-15 01:18:21 +000031called once; if this raises an exception, seekable is reset to 0. For
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032other nonzero values of seekable, this test is not made.
33
Guido van Rossum9ab94c11997-12-10 16:17:39 +000034To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000058# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000059
Guido van Rossumb6775db1994-08-01 11:34:53 +000060import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000061
Skip Montanaro0de65802001-02-15 22:15:14 +000062__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000065
66
Guido van Rossum01ca3361992-07-13 14:28:59 +000067class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000068 """Represents a single RFC-822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000069
Guido van Rossum9ab94c11997-12-10 16:17:39 +000070 def __init__(self, fp, seekable = 1):
71 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000072 if seekable == 1:
73 # Exercise tell() to make sure it works
74 # (and then assume seek() works, too)
75 try:
76 fp.tell()
77 except:
78 seekable = 0
79 else:
80 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000081 self.fp = fp
82 self.seekable = seekable
83 self.startofheaders = None
84 self.startofbody = None
85 #
86 if self.seekable:
87 try:
88 self.startofheaders = self.fp.tell()
89 except IOError:
90 self.seekable = 0
91 #
92 self.readheaders()
93 #
94 if self.seekable:
95 try:
96 self.startofbody = self.fp.tell()
97 except IOError:
98 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +000099
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000100 def rewindbody(self):
101 """Rewind the file to the start of the body (if seekable)."""
102 if not self.seekable:
103 raise IOError, "unseekable file"
104 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000105
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000106 def readheaders(self):
107 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000108
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000109 Read header lines up to the entirely blank line that
110 terminates them. The (normally blank) line that ends the
111 headers is skipped, but not included in the returned list.
112 If a non-header line ends the headers, (which is an error),
113 an attempt is made to backspace over it; it is never
114 included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000115
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000116 The variable self.status is set to the empty string if all
117 went well, otherwise it is an error message.
118 The variable self.headers is a completely uninterpreted list
119 of lines contained in the header (so printing them will
120 reproduce the header exactly as it appears in the file).
121 """
122 self.dict = {}
123 self.unixfrom = ''
124 self.headers = list = []
125 self.status = ''
126 headerseen = ""
127 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000128 startofline = unread = tell = None
129 if hasattr(self.fp, 'unread'):
130 unread = self.fp.unread
131 elif self.seekable:
132 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000133 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000134 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000135 try:
136 startofline = tell()
137 except IOError:
138 startofline = tell = None
139 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000140 line = self.fp.readline()
141 if not line:
142 self.status = 'EOF in headers'
143 break
144 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000145 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000146 self.unixfrom = self.unixfrom + line
147 continue
148 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000149 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000150 # It's a continuation line.
151 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000152 x = (self.dict[headerseen] + "\n " + line.strip())
153 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000154 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000155 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000156 # It's a comment. Ignore it.
157 continue
158 elif self.islast(line):
159 # Note! No pushback here! The delimiter line gets eaten.
160 break
161 headerseen = self.isheader(line)
162 if headerseen:
163 # It's a legal header line, save it.
164 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000165 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000166 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000167 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000168 # It's not a header line; throw it back and stop here.
169 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000170 self.status = 'No headers'
171 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000172 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000173 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000174 if unread:
175 unread(line)
176 elif tell:
177 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000178 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000179 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000180 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000181
182 def isheader(self, line):
183 """Determine whether a given line is a legal header.
184
185 This method should return the header name, suitably canonicalized.
186 You may override this method in order to use Message parsing
187 on tagged data in RFC822-like formats with special header formats.
188 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000189 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000190 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000191 return line[:i].lower()
Guido van Rossume894fc01998-06-11 13:58:40 +0000192 else:
193 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000194
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000195 def islast(self, line):
196 """Determine whether a line is a legal end of RFC-822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000197
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000198 You may override this method if your application wants
199 to bend the rules, e.g. to strip trailing whitespace,
Thomas Wouters7e474022000-07-16 12:04:32 +0000200 or to recognize MH template separators ('--------').
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000201 For convenience (e.g. for code reading from sockets) a
Tim Peters0c9886d2001-01-15 01:18:21 +0000202 line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000203 """
204 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000205
206 def iscomment(self, line):
207 """Determine whether a line should be skipped entirely.
208
209 You may override this method in order to use Message parsing
210 on tagged data in RFC822-like formats that support embedded
211 comments or free-text data.
212 """
213 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000214
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000215 def getallmatchingheaders(self, name):
216 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000217
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000218 Look through the list of headers and find all lines
219 matching a given header name (and their continuation
220 lines). A list of the lines is returned, without
221 interpretation. If the header does not occur, an
222 empty list is returned. If the header occurs multiple
223 times, all occurrences are returned. Case is not
224 important in the header name.
225 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000226 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000227 n = len(name)
228 list = []
229 hit = 0
230 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000231 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000232 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000233 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000234 hit = 0
235 if hit:
236 list.append(line)
237 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000238
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000239 def getfirstmatchingheader(self, name):
240 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000241
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000242 This is similar to getallmatchingheaders, but it returns
243 only the first matching header (and its continuation
244 lines).
245 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000246 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000247 n = len(name)
248 list = []
249 hit = 0
250 for line in self.headers:
251 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000252 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000253 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000254 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 hit = 1
256 if hit:
257 list.append(line)
258 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000259
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000260 def getrawheader(self, name):
261 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000262
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000263 Return a string containing the literal text of the
264 header but with the keyword stripped. All leading,
265 trailing and embedded whitespace is kept in the
266 string, however.
267 Return None if the header does not occur.
268 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000269
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000270 list = self.getfirstmatchingheader(name)
271 if not list:
272 return None
273 list[0] = list[0][len(name) + 1:]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000274 return ''.join(list)
Tim Peters0c9886d2001-01-15 01:18:21 +0000275
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000276 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000278
Fred Drakeddf22c41999-04-28 21:17:38 +0000279 This is the normal interface: it returns a stripped
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000280 version of the header value for a given header name,
281 or None if it doesn't exist. This uses the dictionary
282 version which finds the *last* such header.
283 """
284 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000285 return self.dict[name.lower()]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000286 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000287 return default
288 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000289
290 def getheaders(self, name):
291 """Get all values for a header.
292
293 This returns a list of values for headers given more than once;
294 each value in the result list is stripped in the same way as the
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000295 result of getheader(). If the header is not given, return an
296 empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000297 """
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000302 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000303 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000304 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000305 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000306 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000307 else:
308 if have_header:
309 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 have_header = 1
312 if have_header:
313 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000314 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000315
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000318
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
321 """
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000328
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000331
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
335
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000336 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000337 raw = []
338 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000339 if h[0] in ' \t':
340 raw.append(h)
341 else:
342 if raw:
343 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000344 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000345 if i > 0:
346 addr = h[i+1:]
347 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000348 alladdrs = ''.join(raw)
Barry Warsaw8a578431999-01-14 19:59:58 +0000349 a = AddrlistClass(alladdrs)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000350 return a.getaddrlist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000351
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000352 def getdate(self, name):
353 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000354
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000355 Retrieves a date field from the named header, returning
356 a tuple compatible with time.mktime().
357 """
358 try:
359 data = self[name]
360 except KeyError:
361 return None
362 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000363
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000364 def getdate_tz(self, name):
365 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000366
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000367 The first 9 elements make up a tuple compatible with
368 time.mktime(), and the 10th is the offset of the poster's
369 time zone from GMT/UTC.
370 """
371 try:
372 data = self[name]
373 except KeyError:
374 return None
375 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000376
377
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000379
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000380 def __len__(self):
381 """Get the number of headers in a message."""
382 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000383
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000384 def __getitem__(self, name):
385 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000386 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000387
388 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000389 """Set the value of a header.
390
Tim Peters0c9886d2001-01-15 01:18:21 +0000391 Note: This is not a perfect inversion of __getitem__, because
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000392 any changed headers get stuck at the end of the raw-headers list
393 rather than where the altered header was.
394 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000395 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000396 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000397 text = name + ": " + value
Guido van Rossumc80f1822000-12-15 15:37:48 +0000398 lines = text.split("\n")
Guido van Rossume894fc01998-06-11 13:58:40 +0000399 for line in lines:
400 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000401
Guido van Rossum75d92c11998-04-02 21:33:20 +0000402 def __delitem__(self, name):
403 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000404 name = name.lower()
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000405 if not self.dict.has_key(name):
406 return
407 del self.dict[name]
408 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000409 n = len(name)
410 list = []
411 hit = 0
412 for i in range(len(self.headers)):
413 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000414 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000415 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000416 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000417 hit = 0
418 if hit:
419 list.append(i)
420 list.reverse()
421 for i in list:
422 del self.headers[i]
423
Fred Drake02959292001-05-22 14:58:10 +0000424 def get(self, name, default=None):
425 name = name.lower()
426 if self.dict.has_key(name):
427 return self.dict[name]
428 else:
429 return default
430
431 def setdefault(self, name, default=''):
432 lowername = name.lower()
433 if self.dict.has_key(lowername):
434 return self.dict[lowername]
435 else:
436 default = default or ""
437 self.dict[lowername] = default
438 text = "%s: %s" % (name, default)
439 lines = text.split("\n")
440 for line in lines:
441 self.headers.append(line + "\n")
442 return default
443
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000444 def has_key(self, name):
445 """Determine whether a message contains the named header."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000446 return self.dict.has_key(name.lower())
Tim Peters0c9886d2001-01-15 01:18:21 +0000447
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000448 def keys(self):
449 """Get all of a message's header field names."""
450 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000451
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000452 def values(self):
453 """Get all of a message's header field values."""
454 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000455
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000456 def items(self):
457 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000458
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000459 Returns a list of name, value tuples.
460 """
461 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000462
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000463 def __str__(self):
464 str = ''
465 for hdr in self.headers:
466 str = str + hdr
467 return str
Guido van Rossum01ca3361992-07-13 14:28:59 +0000468
469
470# Utility functions
471# -----------------
472
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000473# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000474# XXX The inverses of the parse functions may also be useful.
475
Guido van Rossum01ca3361992-07-13 14:28:59 +0000476
Guido van Rossum01ca3361992-07-13 14:28:59 +0000477def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000478 """Remove quotes from a string."""
479 if len(str) > 1:
480 if str[0] == '"' and str[-1:] == '"':
481 return str[1:-1]
482 if str[0] == '<' and str[-1:] == '>':
483 return str[1:-1]
484 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000485
486
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000487def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000488 """Add quotes around a string."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000489 return str.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000490
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000491
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000493 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000494 a = AddrlistClass(address)
495 list = a.getaddrlist()
496 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000497 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000498 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000499 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000500
501
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000502class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000503 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000504
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000505 To understand what this class does, it helps to have a copy of
506 RFC-822 in front of you.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000507
508 Note: this class interface is deprecated and may be removed in the future.
509 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000510 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000511
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000512 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000513 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000514
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000515 `field' is an unparsed address header field, containing
516 one or more addresses.
517 """
518 self.specials = '()<>@,:;.\"[]'
519 self.pos = 0
520 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000521 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000522 self.atomends = self.specials + self.LWS + self.CR
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000523 self.field = field
524 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000525
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000526 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000527 """Parse up to the start of the next address."""
528 while self.pos < len(self.field):
529 if self.field[self.pos] in self.LWS + '\n\r':
530 self.pos = self.pos + 1
531 elif self.field[self.pos] == '(':
532 self.commentlist.append(self.getcomment())
533 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000534
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000535 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000536 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000537
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000538 Returns a list containing all of the addresses.
539 """
540 ad = self.getaddress()
541 if ad:
542 return ad + self.getaddrlist()
543 else: return []
Tim Peters0c9886d2001-01-15 01:18:21 +0000544
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000545 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000546 """Parse the next address."""
547 self.commentlist = []
548 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000549
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000550 oldpos = self.pos
551 oldcl = self.commentlist
552 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000553
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000554 self.gotonext()
555 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000556
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000557 if self.pos >= len(self.field):
558 # Bad email address technically, no domain.
559 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000560 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000561
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000562 elif self.field[self.pos] in '.@':
563 # email address is just an addrspec
564 # this isn't very efficient since we start over
565 self.pos = oldpos
566 self.commentlist = oldcl
567 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000568 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000569
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000570 elif self.field[self.pos] == ':':
571 # address is a group
572 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000573
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000574 fieldlen = len(self.field)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000575 self.pos = self.pos + 1
576 while self.pos < len(self.field):
577 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000578 if self.pos < fieldlen and self.field[self.pos] == ';':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000579 self.pos = self.pos + 1
580 break
581 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000582
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000583 elif self.field[self.pos] == '<':
584 # Address is a phrase then a route addr
585 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000586
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000587 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000588 returnlist = [(' '.join(plist) + ' (' + \
589 ' '.join(self.commentlist) + ')', routeaddr)]
590 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000591
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000592 else:
593 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000594 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000595 elif self.field[self.pos] in self.specials:
596 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000597
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000598 self.gotonext()
599 if self.pos < len(self.field) and self.field[self.pos] == ',':
600 self.pos = self.pos + 1
601 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000602
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000603 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000604 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000605
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000606 This method just skips all the route stuff and returns the addrspec.
607 """
608 if self.field[self.pos] != '<':
609 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000610
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000611 expectroute = 0
612 self.pos = self.pos + 1
613 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000614 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000615 while self.pos < len(self.field):
616 if expectroute:
617 self.getdomain()
618 expectroute = 0
619 elif self.field[self.pos] == '>':
620 self.pos = self.pos + 1
621 break
622 elif self.field[self.pos] == '@':
623 self.pos = self.pos + 1
624 expectroute = 1
625 elif self.field[self.pos] == ':':
626 self.pos = self.pos + 1
627 expectaddrspec = 1
628 else:
629 adlist = self.getaddrspec()
630 self.pos = self.pos + 1
631 break
632 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000633
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000634 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000635
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000636 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000637 """Parse an RFC-822 addr-spec."""
638 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000639
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000640 self.gotonext()
641 while self.pos < len(self.field):
642 if self.field[self.pos] == '.':
643 aslist.append('.')
644 self.pos = self.pos + 1
645 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000646 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000647 elif self.field[self.pos] in self.atomends:
648 break
649 else: aslist.append(self.getatom())
650 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000651
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000652 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000653 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000654
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000655 aslist.append('@')
656 self.pos = self.pos + 1
657 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000658 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000659
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000660 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000661 """Get the complete domain name from an address."""
662 sdlist = []
663 while self.pos < len(self.field):
664 if self.field[self.pos] in self.LWS:
665 self.pos = self.pos + 1
666 elif self.field[self.pos] == '(':
667 self.commentlist.append(self.getcomment())
668 elif self.field[self.pos] == '[':
669 sdlist.append(self.getdomainliteral())
670 elif self.field[self.pos] == '.':
671 self.pos = self.pos + 1
672 sdlist.append('.')
673 elif self.field[self.pos] in self.atomends:
674 break
675 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000676 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000677
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000678 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000679 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000680
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000681 `beginchar' is the start character for the fragment.
682 If self is not looking at an instance of `beginchar' then
683 getdelimited returns the empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000684
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000685 `endchars' is a sequence of allowable end-delimiting characters.
686 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000687
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000688 If `allowcomments' is non-zero, embedded RFC-822 comments
689 are allowed within the parsed fragment.
690 """
691 if self.field[self.pos] != beginchar:
692 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000693
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000694 slist = ['']
695 quote = 0
696 self.pos = self.pos + 1
697 while self.pos < len(self.field):
698 if quote == 1:
699 slist.append(self.field[self.pos])
700 quote = 0
701 elif self.field[self.pos] in endchars:
702 self.pos = self.pos + 1
703 break
704 elif allowcomments and self.field[self.pos] == '(':
705 slist.append(self.getcomment())
706 elif self.field[self.pos] == '\\':
707 quote = 1
708 else:
709 slist.append(self.field[self.pos])
710 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000711
Guido van Rossumc80f1822000-12-15 15:37:48 +0000712 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000713
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000714 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000715 """Get a quote-delimited fragment from self's field."""
716 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000717
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000718 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000719 """Get a parenthesis-delimited fragment from self's field."""
720 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000721
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000722 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000723 """Parse an RFC-822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000724 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000725
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000726 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000727 """Parse an RFC-822 atom."""
728 atomlist = ['']
Tim Peters0c9886d2001-01-15 01:18:21 +0000729
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000730 while self.pos < len(self.field):
731 if self.field[self.pos] in self.atomends:
732 break
733 else: atomlist.append(self.field[self.pos])
734 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000735
Guido van Rossumc80f1822000-12-15 15:37:48 +0000736 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000737
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000738 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000739 """Parse a sequence of RFC-822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000740
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000741 A phrase is a sequence of words, which are in turn either
Guido van Rossume894fc01998-06-11 13:58:40 +0000742 RFC-822 atoms or quoted-strings. Phrases are canonicalized
743 by squeezing all runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000744 """
745 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000746
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000747 while self.pos < len(self.field):
748 if self.field[self.pos] in self.LWS:
749 self.pos = self.pos + 1
750 elif self.field[self.pos] == '"':
751 plist.append(self.getquote())
752 elif self.field[self.pos] == '(':
753 self.commentlist.append(self.getcomment())
754 elif self.field[self.pos] in self.atomends:
755 break
756 else: plist.append(self.getatom())
Tim Peters0c9886d2001-01-15 01:18:21 +0000757
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000758 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000759
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000760class AddressList(AddrlistClass):
761 """An AddressList encapsulates a list of parsed RFC822 addresses."""
762 def __init__(self, field):
763 AddrlistClass.__init__(self, field)
764 if field:
765 self.addresslist = self.getaddrlist()
766 else:
767 self.addresslist = []
768
769 def __len__(self):
770 return len(self.addresslist)
771
772 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000773 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000774
775 def __add__(self, other):
776 # Set union
777 newaddr = AddressList(None)
778 newaddr.addresslist = self.addresslist[:]
779 for x in other.addresslist:
780 if not x in self.addresslist:
781 newaddr.addresslist.append(x)
782 return newaddr
783
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000784 def __iadd__(self, other):
785 # Set union, in-place
786 for x in other.addresslist:
787 if not x in self.addresslist:
788 self.addresslist.append(x)
789 return self
790
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000791 def __sub__(self, other):
792 # Set difference
793 newaddr = AddressList(None)
794 for x in self.addresslist:
795 if not x in other.addresslist:
796 newaddr.addresslist.append(x)
797 return newaddr
798
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000799 def __isub__(self, other):
800 # Set difference, in-place
801 for x in other.addresslist:
802 if x in self.addresslist:
803 self.addresslist.remove(x)
804 return self
805
Guido van Rossum81d10b41998-06-16 22:29:03 +0000806 def __getitem__(self, index):
807 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000808 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000809
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000810def dump_address_pair(pair):
811 """Dump a (name, address) pair in a canonicalized form."""
812 if pair[0]:
813 return '"' + pair[0] + '" <' + pair[1] + '>'
814 else:
815 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000816
817# Parse a date field
818
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000819_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
820 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000821 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000822 'august', 'september', 'october', 'november', 'december']
823_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000824
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000825# The timezone table does not include the military time zones defined
826# in RFC822, other than Z. According to RFC1123, the description in
827# RFC822 gets the signs wrong, so we can't rely on any such time
828# zones. RFC1123 recommends that numeric timezone indicators be used
829# instead of timezone names.
830
Tim Peters0c9886d2001-01-15 01:18:21 +0000831_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000832 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000833 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000834 'CST': -600, 'CDT': -500, # Central
835 'MST': -700, 'MDT': -600, # Mountain
836 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000837 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000838
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000839
840def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000841 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000842
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000843 Accounts for military timezones.
844 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000845 data = data.split()
846 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000847 # There's a dayname here. Skip it
848 del data[0]
849 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000850 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000851 if len(stuff) == 3:
852 data = stuff + data[1:]
853 if len(data) == 4:
854 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000855 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000856 if i > 0:
857 data[3:] = [s[:i], s[i+1:]]
858 else:
859 data.append('') # Dummy tz
860 if len(data) < 5:
861 return None
862 data = data[:5]
863 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000864 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000865 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000866 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000867 if not mm in _monthnames:
868 return None
869 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000870 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000871 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000872 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000873 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000874 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000875 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000876 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000877 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000878 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000879 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000880 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000881 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000882 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000883 if len(tm) == 2:
884 [thh, tmm] = tm
885 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000886 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000887 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000888 else:
889 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000890 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000891 yy = int(yy)
892 dd = int(dd)
893 thh = int(thh)
894 tmm = int(tmm)
895 tss = int(tss)
896 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000897 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000898 tzoffset = None
899 tz = tz.upper()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000900 if _timezones.has_key(tz):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000901 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000902 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000903 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000904 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000905 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000906 pass
907 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000908 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000909 if tzoffset < 0:
910 tzsign = -1
911 tzoffset = -tzoffset
912 else:
913 tzsign = 1
914 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000915 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
916 return tuple
917
Guido van Rossumb6775db1994-08-01 11:34:53 +0000918
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000919def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000920 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000921 t = parsedate_tz(data)
922 if type(t) == type( () ):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000923 return t[:9]
Tim Peters0c9886d2001-01-15 01:18:21 +0000924 else: return t
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000925
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000926
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000927def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000928 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000929 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000930 # No zone info, so localtime is better assumption than GMT
931 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000932 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000933 t = time.mktime(data[:8] + (0,))
934 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000935
Guido van Rossum247a78a1999-04-19 18:04:38 +0000936def formatdate(timeval=None):
937 """Returns time format preferred for Internet standards.
938
939 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
940 """
941 if timeval is None:
942 timeval = time.time()
943 return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
944 time.gmtime(timeval))
945
Guido van Rossumb6775db1994-08-01 11:34:53 +0000946
947# When used as script, run a small test program.
948# The first command line argument must be a filename containing one
949# message in RFC-822 format.
950
951if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000952 import sys, os
953 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
954 if sys.argv[1:]: file = sys.argv[1]
955 f = open(file, 'r')
956 m = Message(f)
957 print 'From:', m.getaddr('from')
958 print 'To:', m.getaddrlist('to')
959 print 'Subject:', m.getheader('subject')
960 print 'Date:', m.getheader('date')
961 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000962 tz = date[-1]
963 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000964 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000965 print 'ParsedDate:', time.asctime(date),
966 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000967 hhmm, ss = divmod(hhmmss, 60)
968 hh, mm = divmod(hhmm, 60)
969 print "%+03d%02d" % (hh, mm),
970 if ss: print ".%02d" % ss,
971 print
972 else:
973 print 'ParsedDate:', None
974 m.rewindbody()
975 n = 0
976 while f.readline():
977 n = n + 1
978 print 'Lines:', n
979 print '-'*70
980 print 'len =', len(m)
981 if m.has_key('Date'): print 'Date =', m['Date']
982 if m.has_key('X-Nonsense'): pass
983 print 'keys =', m.keys()
984 print 'values =', m.values()
985 print 'items =', m.items()