blob: 89de69a528d5e95f46a2a9a1baa6fe4a91613b62 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
Guido van Rossumc7bb8571998-06-10 21:31:01 +000011You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
Guido van Rossume894fc01998-06-11 13:58:40 +000016This class can work with any input object that supports a readline
17method. If the input object has seek and tell capability, the
18rewindbody method will work; also illegal lines will be pushed back
19onto the input stream. If the input object lacks seek but has an
20`unread' method that can push back a line of input, Message will use
21that to push back illegal lines. Thus this class can be used to parse
22messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000023
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
Tim Peters0c9886d2001-01-15 01:18:21 +000031called once; if this raises an exception, seekable is reset to 0. For
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032other nonzero values of seekable, this test is not made.
33
Guido van Rossum9ab94c11997-12-10 16:17:39 +000034To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000058# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000059
Guido van Rossumb6775db1994-08-01 11:34:53 +000060import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000061
62
Guido van Rossum9ab94c11997-12-10 16:17:39 +000063_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000064
65
Guido van Rossum01ca3361992-07-13 14:28:59 +000066class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000067 """Represents a single RFC-822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000068
Guido van Rossum9ab94c11997-12-10 16:17:39 +000069 def __init__(self, fp, seekable = 1):
70 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000071 if seekable == 1:
72 # Exercise tell() to make sure it works
73 # (and then assume seek() works, too)
74 try:
75 fp.tell()
76 except:
77 seekable = 0
78 else:
79 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000080 self.fp = fp
81 self.seekable = seekable
82 self.startofheaders = None
83 self.startofbody = None
84 #
85 if self.seekable:
86 try:
87 self.startofheaders = self.fp.tell()
88 except IOError:
89 self.seekable = 0
90 #
91 self.readheaders()
92 #
93 if self.seekable:
94 try:
95 self.startofbody = self.fp.tell()
96 except IOError:
97 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +000098
Guido van Rossum9ab94c11997-12-10 16:17:39 +000099 def rewindbody(self):
100 """Rewind the file to the start of the body (if seekable)."""
101 if not self.seekable:
102 raise IOError, "unseekable file"
103 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000104
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000105 def readheaders(self):
106 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000107
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000108 Read header lines up to the entirely blank line that
109 terminates them. The (normally blank) line that ends the
110 headers is skipped, but not included in the returned list.
111 If a non-header line ends the headers, (which is an error),
112 an attempt is made to backspace over it; it is never
113 included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000114
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000115 The variable self.status is set to the empty string if all
116 went well, otherwise it is an error message.
117 The variable self.headers is a completely uninterpreted list
118 of lines contained in the header (so printing them will
119 reproduce the header exactly as it appears in the file).
120 """
121 self.dict = {}
122 self.unixfrom = ''
123 self.headers = list = []
124 self.status = ''
125 headerseen = ""
126 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000127 startofline = unread = tell = None
128 if hasattr(self.fp, 'unread'):
129 unread = self.fp.unread
130 elif self.seekable:
131 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000132 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000133 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000134 try:
135 startofline = tell()
136 except IOError:
137 startofline = tell = None
138 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000139 line = self.fp.readline()
140 if not line:
141 self.status = 'EOF in headers'
142 break
143 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000144 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000145 self.unixfrom = self.unixfrom + line
146 continue
147 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000148 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000149 # It's a continuation line.
150 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000151 x = (self.dict[headerseen] + "\n " + line.strip())
152 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000153 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000154 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000155 # It's a comment. Ignore it.
156 continue
157 elif self.islast(line):
158 # Note! No pushback here! The delimiter line gets eaten.
159 break
160 headerseen = self.isheader(line)
161 if headerseen:
162 # It's a legal header line, save it.
163 list.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000164 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000165 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000166 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000167 # It's not a header line; throw it back and stop here.
168 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000169 self.status = 'No headers'
170 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000171 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000172 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000173 if unread:
174 unread(line)
175 elif tell:
176 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000177 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000178 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000179 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000180
181 def isheader(self, line):
182 """Determine whether a given line is a legal header.
183
184 This method should return the header name, suitably canonicalized.
185 You may override this method in order to use Message parsing
186 on tagged data in RFC822-like formats with special header formats.
187 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000188 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000189 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000190 return line[:i].lower()
Guido van Rossume894fc01998-06-11 13:58:40 +0000191 else:
192 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000193
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000194 def islast(self, line):
195 """Determine whether a line is a legal end of RFC-822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000196
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000197 You may override this method if your application wants
198 to bend the rules, e.g. to strip trailing whitespace,
Thomas Wouters7e474022000-07-16 12:04:32 +0000199 or to recognize MH template separators ('--------').
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000200 For convenience (e.g. for code reading from sockets) a
Tim Peters0c9886d2001-01-15 01:18:21 +0000201 line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000202 """
203 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000204
205 def iscomment(self, line):
206 """Determine whether a line should be skipped entirely.
207
208 You may override this method in order to use Message parsing
209 on tagged data in RFC822-like formats that support embedded
210 comments or free-text data.
211 """
212 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000213
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000214 def getallmatchingheaders(self, name):
215 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000216
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000217 Look through the list of headers and find all lines
218 matching a given header name (and their continuation
219 lines). A list of the lines is returned, without
220 interpretation. If the header does not occur, an
221 empty list is returned. If the header occurs multiple
222 times, all occurrences are returned. Case is not
223 important in the header name.
224 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000225 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000226 n = len(name)
227 list = []
228 hit = 0
229 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000230 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000231 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000232 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000233 hit = 0
234 if hit:
235 list.append(line)
236 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000237
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000238 def getfirstmatchingheader(self, name):
239 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000240
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000241 This is similar to getallmatchingheaders, but it returns
242 only the first matching header (and its continuation
243 lines).
244 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000245 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000246 n = len(name)
247 list = []
248 hit = 0
249 for line in self.headers:
250 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000251 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000252 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000253 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000254 hit = 1
255 if hit:
256 list.append(line)
257 return list
Tim Peters0c9886d2001-01-15 01:18:21 +0000258
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000259 def getrawheader(self, name):
260 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000261
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000262 Return a string containing the literal text of the
263 header but with the keyword stripped. All leading,
264 trailing and embedded whitespace is kept in the
265 string, however.
266 Return None if the header does not occur.
267 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000268
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000269 list = self.getfirstmatchingheader(name)
270 if not list:
271 return None
272 list[0] = list[0][len(name) + 1:]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000273 return ''.join(list)
Tim Peters0c9886d2001-01-15 01:18:21 +0000274
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000275 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000276 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000277
Fred Drakeddf22c41999-04-28 21:17:38 +0000278 This is the normal interface: it returns a stripped
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000279 version of the header value for a given header name,
280 or None if it doesn't exist. This uses the dictionary
281 version which finds the *last* such header.
282 """
283 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000284 return self.dict[name.lower()]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000285 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000286 return default
287 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000288
289 def getheaders(self, name):
290 """Get all values for a header.
291
292 This returns a list of values for headers given more than once;
293 each value in the result list is stripped in the same way as the
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000294 result of getheader(). If the header is not given, return an
295 empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000296 """
297 result = []
298 current = ''
299 have_header = 0
300 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000301 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000302 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000303 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000304 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000305 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000306 else:
307 if have_header:
308 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000309 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000310 have_header = 1
311 if have_header:
312 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000313 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000314
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000315 def getaddr(self, name):
316 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000317
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000318 An example return value:
319 ('Guido van Rossum', 'guido@cwi.nl')
320 """
321 # New, by Ben Escoto
322 alist = self.getaddrlist(name)
323 if alist:
324 return alist[0]
325 else:
326 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000327
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000328 def getaddrlist(self, name):
329 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000330
331 Retrieves a list of addresses from a header, where each address is a
332 tuple as returned by getaddr(). Scans all named headers, so it works
333 properly with multiple To: or Cc: headers for example.
334
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000335 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000336 raw = []
337 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000343 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000347 alladdrs = ''.join(raw)
Barry Warsaw8a578431999-01-14 19:59:58 +0000348 a = AddrlistClass(alladdrs)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000349 return a.getaddrlist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000350
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000351 def getdate(self, name):
352 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000353
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000354 Retrieves a date field from the named header, returning
355 a tuple compatible with time.mktime().
356 """
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000362
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000365
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000366 The first 9 elements make up a tuple compatible with
367 time.mktime(), and the 10th is the offset of the poster's
368 time zone from GMT/UTC.
369 """
370 try:
371 data = self[name]
372 except KeyError:
373 return None
374 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000375
376
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000377 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000378
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000379 def __len__(self):
380 """Get the number of headers in a message."""
381 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000382
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000383 def __getitem__(self, name):
384 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000385 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000386
387 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000388 """Set the value of a header.
389
Tim Peters0c9886d2001-01-15 01:18:21 +0000390 Note: This is not a perfect inversion of __getitem__, because
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000391 any changed headers get stuck at the end of the raw-headers list
392 rather than where the altered header was.
393 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000394 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000395 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000396 text = name + ": " + value
Guido van Rossumc80f1822000-12-15 15:37:48 +0000397 lines = text.split("\n")
Guido van Rossume894fc01998-06-11 13:58:40 +0000398 for line in lines:
399 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000400
Guido van Rossum75d92c11998-04-02 21:33:20 +0000401 def __delitem__(self, name):
402 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000403 name = name.lower()
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000404 if not self.dict.has_key(name):
405 return
406 del self.dict[name]
407 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000408 n = len(name)
409 list = []
410 hit = 0
411 for i in range(len(self.headers)):
412 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000413 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000414 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000415 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000416 hit = 0
417 if hit:
418 list.append(i)
419 list.reverse()
420 for i in list:
421 del self.headers[i]
422
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000423 def has_key(self, name):
424 """Determine whether a message contains the named header."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000425 return self.dict.has_key(name.lower())
Tim Peters0c9886d2001-01-15 01:18:21 +0000426
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000427 def keys(self):
428 """Get all of a message's header field names."""
429 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000430
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000431 def values(self):
432 """Get all of a message's header field values."""
433 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000434
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000435 def items(self):
436 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000437
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000438 Returns a list of name, value tuples.
439 """
440 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000441
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000442 def __str__(self):
443 str = ''
444 for hdr in self.headers:
445 str = str + hdr
446 return str
Guido van Rossum01ca3361992-07-13 14:28:59 +0000447
448
449# Utility functions
450# -----------------
451
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000452# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000453# XXX The inverses of the parse functions may also be useful.
454
Guido van Rossum01ca3361992-07-13 14:28:59 +0000455
Guido van Rossum01ca3361992-07-13 14:28:59 +0000456def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000457 """Remove quotes from a string."""
458 if len(str) > 1:
459 if str[0] == '"' and str[-1:] == '"':
460 return str[1:-1]
461 if str[0] == '<' and str[-1:] == '>':
462 return str[1:-1]
463 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000464
465
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000466def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000467 """Add quotes around a string."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000468 return str.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000469
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000470
Guido van Rossumb6775db1994-08-01 11:34:53 +0000471def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000472 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000473 a = AddrlistClass(address)
474 list = a.getaddrlist()
475 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000476 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000477 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000478 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000479
480
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000481class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000482 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000483
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000484 To understand what this class does, it helps to have a copy of
485 RFC-822 in front of you.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000486
487 Note: this class interface is deprecated and may be removed in the future.
488 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000489 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000490
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000491 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000492 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000493
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000494 `field' is an unparsed address header field, containing
495 one or more addresses.
496 """
497 self.specials = '()<>@,:;.\"[]'
498 self.pos = 0
499 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000500 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000501 self.atomends = self.specials + self.LWS + self.CR
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000502 self.field = field
503 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000504
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000505 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000506 """Parse up to the start of the next address."""
507 while self.pos < len(self.field):
508 if self.field[self.pos] in self.LWS + '\n\r':
509 self.pos = self.pos + 1
510 elif self.field[self.pos] == '(':
511 self.commentlist.append(self.getcomment())
512 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000513
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000514 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000515 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000516
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000517 Returns a list containing all of the addresses.
518 """
519 ad = self.getaddress()
520 if ad:
521 return ad + self.getaddrlist()
522 else: return []
Tim Peters0c9886d2001-01-15 01:18:21 +0000523
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000524 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000525 """Parse the next address."""
526 self.commentlist = []
527 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000528
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000529 oldpos = self.pos
530 oldcl = self.commentlist
531 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000532
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000533 self.gotonext()
534 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000535
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000536 if self.pos >= len(self.field):
537 # Bad email address technically, no domain.
538 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000539 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000540
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000541 elif self.field[self.pos] in '.@':
542 # email address is just an addrspec
543 # this isn't very efficient since we start over
544 self.pos = oldpos
545 self.commentlist = oldcl
546 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000547 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000548
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000549 elif self.field[self.pos] == ':':
550 # address is a group
551 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000552
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000553 fieldlen = len(self.field)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000554 self.pos = self.pos + 1
555 while self.pos < len(self.field):
556 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000557 if self.pos < fieldlen and self.field[self.pos] == ';':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000558 self.pos = self.pos + 1
559 break
560 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000561
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000562 elif self.field[self.pos] == '<':
563 # Address is a phrase then a route addr
564 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000565
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000566 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000567 returnlist = [(' '.join(plist) + ' (' + \
568 ' '.join(self.commentlist) + ')', routeaddr)]
569 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000570
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000571 else:
572 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000573 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000574 elif self.field[self.pos] in self.specials:
575 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000576
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000577 self.gotonext()
578 if self.pos < len(self.field) and self.field[self.pos] == ',':
579 self.pos = self.pos + 1
580 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000581
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000582 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000583 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000584
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000585 This method just skips all the route stuff and returns the addrspec.
586 """
587 if self.field[self.pos] != '<':
588 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000589
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000590 expectroute = 0
591 self.pos = self.pos + 1
592 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000593 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000594 while self.pos < len(self.field):
595 if expectroute:
596 self.getdomain()
597 expectroute = 0
598 elif self.field[self.pos] == '>':
599 self.pos = self.pos + 1
600 break
601 elif self.field[self.pos] == '@':
602 self.pos = self.pos + 1
603 expectroute = 1
604 elif self.field[self.pos] == ':':
605 self.pos = self.pos + 1
606 expectaddrspec = 1
607 else:
608 adlist = self.getaddrspec()
609 self.pos = self.pos + 1
610 break
611 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000612
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000613 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000614
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000615 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000616 """Parse an RFC-822 addr-spec."""
617 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000618
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000619 self.gotonext()
620 while self.pos < len(self.field):
621 if self.field[self.pos] == '.':
622 aslist.append('.')
623 self.pos = self.pos + 1
624 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000625 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000626 elif self.field[self.pos] in self.atomends:
627 break
628 else: aslist.append(self.getatom())
629 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000630
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000631 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000632 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000633
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000634 aslist.append('@')
635 self.pos = self.pos + 1
636 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000637 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000638
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000639 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000640 """Get the complete domain name from an address."""
641 sdlist = []
642 while self.pos < len(self.field):
643 if self.field[self.pos] in self.LWS:
644 self.pos = self.pos + 1
645 elif self.field[self.pos] == '(':
646 self.commentlist.append(self.getcomment())
647 elif self.field[self.pos] == '[':
648 sdlist.append(self.getdomainliteral())
649 elif self.field[self.pos] == '.':
650 self.pos = self.pos + 1
651 sdlist.append('.')
652 elif self.field[self.pos] in self.atomends:
653 break
654 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000655 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000656
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000657 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000658 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000659
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000660 `beginchar' is the start character for the fragment.
661 If self is not looking at an instance of `beginchar' then
662 getdelimited returns the empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000663
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000664 `endchars' is a sequence of allowable end-delimiting characters.
665 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000666
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000667 If `allowcomments' is non-zero, embedded RFC-822 comments
668 are allowed within the parsed fragment.
669 """
670 if self.field[self.pos] != beginchar:
671 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000672
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000673 slist = ['']
674 quote = 0
675 self.pos = self.pos + 1
676 while self.pos < len(self.field):
677 if quote == 1:
678 slist.append(self.field[self.pos])
679 quote = 0
680 elif self.field[self.pos] in endchars:
681 self.pos = self.pos + 1
682 break
683 elif allowcomments and self.field[self.pos] == '(':
684 slist.append(self.getcomment())
685 elif self.field[self.pos] == '\\':
686 quote = 1
687 else:
688 slist.append(self.field[self.pos])
689 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000690
Guido van Rossumc80f1822000-12-15 15:37:48 +0000691 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000692
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000693 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000694 """Get a quote-delimited fragment from self's field."""
695 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000696
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000697 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000698 """Get a parenthesis-delimited fragment from self's field."""
699 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000700
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000701 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000702 """Parse an RFC-822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000703 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000704
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000705 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000706 """Parse an RFC-822 atom."""
707 atomlist = ['']
Tim Peters0c9886d2001-01-15 01:18:21 +0000708
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000709 while self.pos < len(self.field):
710 if self.field[self.pos] in self.atomends:
711 break
712 else: atomlist.append(self.field[self.pos])
713 self.pos = self.pos + 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000714
Guido van Rossumc80f1822000-12-15 15:37:48 +0000715 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000716
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000717 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000718 """Parse a sequence of RFC-822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000719
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000720 A phrase is a sequence of words, which are in turn either
Guido van Rossume894fc01998-06-11 13:58:40 +0000721 RFC-822 atoms or quoted-strings. Phrases are canonicalized
722 by squeezing all runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000723 """
724 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000725
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000726 while self.pos < len(self.field):
727 if self.field[self.pos] in self.LWS:
728 self.pos = self.pos + 1
729 elif self.field[self.pos] == '"':
730 plist.append(self.getquote())
731 elif self.field[self.pos] == '(':
732 self.commentlist.append(self.getcomment())
733 elif self.field[self.pos] in self.atomends:
734 break
735 else: plist.append(self.getatom())
Tim Peters0c9886d2001-01-15 01:18:21 +0000736
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000737 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000738
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000739class AddressList(AddrlistClass):
740 """An AddressList encapsulates a list of parsed RFC822 addresses."""
741 def __init__(self, field):
742 AddrlistClass.__init__(self, field)
743 if field:
744 self.addresslist = self.getaddrlist()
745 else:
746 self.addresslist = []
747
748 def __len__(self):
749 return len(self.addresslist)
750
751 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000752 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000753
754 def __add__(self, other):
755 # Set union
756 newaddr = AddressList(None)
757 newaddr.addresslist = self.addresslist[:]
758 for x in other.addresslist:
759 if not x in self.addresslist:
760 newaddr.addresslist.append(x)
761 return newaddr
762
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000763 def __iadd__(self, other):
764 # Set union, in-place
765 for x in other.addresslist:
766 if not x in self.addresslist:
767 self.addresslist.append(x)
768 return self
769
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000770 def __sub__(self, other):
771 # Set difference
772 newaddr = AddressList(None)
773 for x in self.addresslist:
774 if not x in other.addresslist:
775 newaddr.addresslist.append(x)
776 return newaddr
777
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000778 def __isub__(self, other):
779 # Set difference, in-place
780 for x in other.addresslist:
781 if x in self.addresslist:
782 self.addresslist.remove(x)
783 return self
784
Guido van Rossum81d10b41998-06-16 22:29:03 +0000785 def __getitem__(self, index):
786 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000787 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000788
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000789def dump_address_pair(pair):
790 """Dump a (name, address) pair in a canonicalized form."""
791 if pair[0]:
792 return '"' + pair[0] + '" <' + pair[1] + '>'
793 else:
794 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000795
796# Parse a date field
797
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000798_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
799 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000800 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000801 'august', 'september', 'october', 'november', 'december']
802_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000803
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000804# The timezone table does not include the military time zones defined
805# in RFC822, other than Z. According to RFC1123, the description in
806# RFC822 gets the signs wrong, so we can't rely on any such time
807# zones. RFC1123 recommends that numeric timezone indicators be used
808# instead of timezone names.
809
Tim Peters0c9886d2001-01-15 01:18:21 +0000810_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000811 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000812 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000813 'CST': -600, 'CDT': -500, # Central
814 'MST': -700, 'MDT': -600, # Mountain
815 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000816 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000817
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000818
819def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000820 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000821
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000822 Accounts for military timezones.
823 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000824 data = data.split()
825 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000826 # There's a dayname here. Skip it
827 del data[0]
828 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000829 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000830 if len(stuff) == 3:
831 data = stuff + data[1:]
832 if len(data) == 4:
833 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000834 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000835 if i > 0:
836 data[3:] = [s[:i], s[i+1:]]
837 else:
838 data.append('') # Dummy tz
839 if len(data) < 5:
840 return None
841 data = data[:5]
842 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000843 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000844 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000845 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000846 if not mm in _monthnames:
847 return None
848 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000849 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000850 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000851 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000852 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000853 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000854 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000855 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000856 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000857 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000858 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000859 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000860 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000861 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000862 if len(tm) == 2:
863 [thh, tmm] = tm
864 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000865 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000866 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000867 else:
868 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000869 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000870 yy = int(yy)
871 dd = int(dd)
872 thh = int(thh)
873 tmm = int(tmm)
874 tss = int(tss)
875 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000876 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000877 tzoffset = None
878 tz = tz.upper()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000879 if _timezones.has_key(tz):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000880 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000881 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000882 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000883 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000884 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000885 pass
886 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000887 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000888 if tzoffset < 0:
889 tzsign = -1
890 tzoffset = -tzoffset
891 else:
892 tzsign = 1
893 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000894 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
895 return tuple
896
Guido van Rossumb6775db1994-08-01 11:34:53 +0000897
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000898def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000899 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000900 t = parsedate_tz(data)
901 if type(t) == type( () ):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000902 return t[:9]
Tim Peters0c9886d2001-01-15 01:18:21 +0000903 else: return t
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000904
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000905
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000906def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000907 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000908 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000909 # No zone info, so localtime is better assumption than GMT
910 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000911 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000912 t = time.mktime(data[:8] + (0,))
913 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000914
Guido van Rossum247a78a1999-04-19 18:04:38 +0000915def formatdate(timeval=None):
916 """Returns time format preferred for Internet standards.
917
918 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
919 """
920 if timeval is None:
921 timeval = time.time()
922 return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
923 time.gmtime(timeval))
924
Guido van Rossumb6775db1994-08-01 11:34:53 +0000925
926# When used as script, run a small test program.
927# The first command line argument must be a filename containing one
928# message in RFC-822 format.
929
930if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000931 import sys, os
932 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
933 if sys.argv[1:]: file = sys.argv[1]
934 f = open(file, 'r')
935 m = Message(f)
936 print 'From:', m.getaddr('from')
937 print 'To:', m.getaddrlist('to')
938 print 'Subject:', m.getheader('subject')
939 print 'Date:', m.getheader('date')
940 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000941 tz = date[-1]
942 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000943 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000944 print 'ParsedDate:', time.asctime(date),
945 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000946 hhmm, ss = divmod(hhmmss, 60)
947 hh, mm = divmod(hhmm, 60)
948 print "%+03d%02d" % (hh, mm),
949 if ss: print ".%02d" % ss,
950 print
951 else:
952 print 'ParsedDate:', None
953 m.rewindbody()
954 n = 0
955 while f.readline():
956 n = n + 1
957 print 'Lines:', n
958 print '-'*70
959 print 'len =', len(m)
960 if m.has_key('Date'): print 'Date =', m['Date']
961 if m.has_key('X-Nonsense'): pass
962 print 'keys =', m.keys()
963 print 'values =', m.values()
964 print 'items =', m.items()