blob: 06b372ac22cba3268aa33cccf314a074615631a2 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
Guido van Rossumc7bb8571998-06-10 21:31:01 +000011You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
Guido van Rossumc7bb8571998-06-10 21:31:01 +000016This class can work with any input object that supports read and seek
17methods. The initialization method which parses the message will work
18even without seek capability, but in that case the final seek to the
19start of the delimiter line won't take place. However, if the input
20object has an `unread' method that can push back a line of input,
21Message will use that to push back the delimiter line. Thus this class
22can be used to parse messages coming from a buffered stream.
23
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
31called once; if this raises an exception, seekable is reset to 0. For
32other nonzero values of seekable, this test is not made.
33
Guido van Rossum9ab94c11997-12-10 16:17:39 +000034To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
Guido van Rossum01ca3361992-07-13 14:28:59 +000058
Guido van Rossum01ca3361992-07-13 14:28:59 +000059import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000060import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000061
62
Guido van Rossum9ab94c11997-12-10 16:17:39 +000063_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000064
65
Guido van Rossum01ca3361992-07-13 14:28:59 +000066class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000067 """Represents a single RFC-822-compliant message."""
68
69 def __init__(self, fp, seekable = 1):
70 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000071 if seekable == 1:
72 # Exercise tell() to make sure it works
73 # (and then assume seek() works, too)
74 try:
75 fp.tell()
76 except:
77 seekable = 0
78 else:
79 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000080 self.fp = fp
81 self.seekable = seekable
82 self.startofheaders = None
83 self.startofbody = None
84 #
85 if self.seekable:
86 try:
87 self.startofheaders = self.fp.tell()
88 except IOError:
89 self.seekable = 0
90 #
91 self.readheaders()
92 #
93 if self.seekable:
94 try:
95 self.startofbody = self.fp.tell()
96 except IOError:
97 self.seekable = 0
98
99 def rewindbody(self):
100 """Rewind the file to the start of the body (if seekable)."""
101 if not self.seekable:
102 raise IOError, "unseekable file"
103 self.fp.seek(self.startofbody)
104
105 def readheaders(self):
106 """Read header lines.
107
108 Read header lines up to the entirely blank line that
109 terminates them. The (normally blank) line that ends the
110 headers is skipped, but not included in the returned list.
111 If a non-header line ends the headers, (which is an error),
112 an attempt is made to backspace over it; it is never
113 included in the returned list.
114
115 The variable self.status is set to the empty string if all
116 went well, otherwise it is an error message.
117 The variable self.headers is a completely uninterpreted list
118 of lines contained in the header (so printing them will
119 reproduce the header exactly as it appears in the file).
120 """
121 self.dict = {}
122 self.unixfrom = ''
123 self.headers = list = []
124 self.status = ''
125 headerseen = ""
126 firstline = 1
127 while 1:
128 line = self.fp.readline()
129 if not line:
130 self.status = 'EOF in headers'
131 break
132 # Skip unix From name time lines
133 if firstline and line[:5] == 'From ':
134 self.unixfrom = self.unixfrom + line
135 continue
136 firstline = 0
137 if self.islast(line):
138 break
139 elif headerseen and line[0] in ' \t':
140 # It's a continuation line.
141 list.append(line)
142 x = (self.dict[headerseen] + "\n " +
143 string.strip(line))
144 self.dict[headerseen] = string.strip(x)
145 elif ':' in line:
146 # It's a header line.
147 list.append(line)
148 i = string.find(line, ':')
149 headerseen = string.lower(line[:i])
150 self.dict[headerseen] = string.strip(
151 line[i+1:])
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000152 elif self.iscomment(line):
153 pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000154 else:
155 # It's not a header line; stop here.
156 if not headerseen:
157 self.status = 'No headers'
158 else:
159 self.status = 'Bad header'
160 # Try to undo the read.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000161 if getattr(self.fp, 'unread'):
162 self.fp.unread(line)
163 elif self.seekable:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000164 self.fp.seek(-len(line), 1)
165 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000166 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000167 break
168
169 def islast(self, line):
170 """Determine whether a line is a legal end of RFC-822 headers.
171
172 You may override this method if your application wants
173 to bend the rules, e.g. to strip trailing whitespace,
174 or to recognise MH template separators ('--------').
175 For convenience (e.g. for code reading from sockets) a
176 line consisting of \r\n also matches.
177 """
178 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000179
180 def iscomment(self, line):
181 """Determine whether a line should be skipped entirely.
182
183 You may override this method in order to use Message parsing
184 on tagged data in RFC822-like formats that support embedded
185 comments or free-text data.
186 """
187 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000188
189 def getallmatchingheaders(self, name):
190 """Find all header lines matching a given header name.
191
192 Look through the list of headers and find all lines
193 matching a given header name (and their continuation
194 lines). A list of the lines is returned, without
195 interpretation. If the header does not occur, an
196 empty list is returned. If the header occurs multiple
197 times, all occurrences are returned. Case is not
198 important in the header name.
199 """
200 name = string.lower(name) + ':'
201 n = len(name)
202 list = []
203 hit = 0
204 for line in self.headers:
205 if string.lower(line[:n]) == name:
206 hit = 1
207 elif line[:1] not in string.whitespace:
208 hit = 0
209 if hit:
210 list.append(line)
211 return list
212
213 def getfirstmatchingheader(self, name):
214 """Get the first header line matching name.
215
216 This is similar to getallmatchingheaders, but it returns
217 only the first matching header (and its continuation
218 lines).
219 """
220 name = string.lower(name) + ':'
221 n = len(name)
222 list = []
223 hit = 0
224 for line in self.headers:
225 if hit:
226 if line[:1] not in string.whitespace:
227 break
228 elif string.lower(line[:n]) == name:
229 hit = 1
230 if hit:
231 list.append(line)
232 return list
233
234 def getrawheader(self, name):
235 """A higher-level interface to getfirstmatchingheader().
236
237 Return a string containing the literal text of the
238 header but with the keyword stripped. All leading,
239 trailing and embedded whitespace is kept in the
240 string, however.
241 Return None if the header does not occur.
242 """
243
244 list = self.getfirstmatchingheader(name)
245 if not list:
246 return None
247 list[0] = list[0][len(name) + 1:]
248 return string.joinfields(list, '')
249
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000250 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000251 """Get the header value for a name.
252
253 This is the normal interface: it return a stripped
254 version of the header value for a given header name,
255 or None if it doesn't exist. This uses the dictionary
256 version which finds the *last* such header.
257 """
258 try:
259 return self.dict[string.lower(name)]
260 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000261 return default
262 get = getheader
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000263
264 def getaddr(self, name):
265 """Get a single address from a header, as a tuple.
266
267 An example return value:
268 ('Guido van Rossum', 'guido@cwi.nl')
269 """
270 # New, by Ben Escoto
271 alist = self.getaddrlist(name)
272 if alist:
273 return alist[0]
274 else:
275 return (None, None)
276
277 def getaddrlist(self, name):
278 """Get a list of addresses from a header.
279
280 Retrieves a list of addresses from a header, where each
281 address is a tuple as returned by getaddr().
282 """
283 # New, by Ben Escoto
284 try:
285 data = self[name]
286 except KeyError:
287 return []
288 a = AddrlistClass(data)
289 return a.getaddrlist()
290
291 def getdate(self, name):
292 """Retrieve a date field from a header.
293
294 Retrieves a date field from the named header, returning
295 a tuple compatible with time.mktime().
296 """
297 try:
298 data = self[name]
299 except KeyError:
300 return None
301 return parsedate(data)
302
303 def getdate_tz(self, name):
304 """Retrieve a date field from a header as a 10-tuple.
305
306 The first 9 elements make up a tuple compatible with
307 time.mktime(), and the 10th is the offset of the poster's
308 time zone from GMT/UTC.
309 """
310 try:
311 data = self[name]
312 except KeyError:
313 return None
314 return parsedate_tz(data)
315
316
317 # Access as a dictionary (only finds *last* header of each type):
318
319 def __len__(self):
320 """Get the number of headers in a message."""
321 return len(self.dict)
322
323 def __getitem__(self, name):
324 """Get a specific header, as from a dictionary."""
325 return self.dict[string.lower(name)]
326
Guido van Rossum75d92c11998-04-02 21:33:20 +0000327 def __delitem__(self, name):
328 """Delete all occurrences of a specific header, if it is present."""
329 name = string.lower(name)
330 if not self.dict.has_key(name):
331 return
332 del self.dict[name]
333 name = name + ':'
334 n = len(name)
335 list = []
336 hit = 0
337 for i in range(len(self.headers)):
338 line = self.headers[i]
339 if string.lower(line[:n]) == name:
340 hit = 1
341 elif line[:1] not in string.whitespace:
342 hit = 0
343 if hit:
344 list.append(i)
345 list.reverse()
346 for i in list:
347 del self.headers[i]
348
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000349 def has_key(self, name):
350 """Determine whether a message contains the named header."""
351 return self.dict.has_key(string.lower(name))
352
353 def keys(self):
354 """Get all of a message's header field names."""
355 return self.dict.keys()
356
357 def values(self):
358 """Get all of a message's header field values."""
359 return self.dict.values()
360
361 def items(self):
362 """Get all of a message's headers.
363
364 Returns a list of name, value tuples.
365 """
366 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000367
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000368 def __str__(self):
369 str = ''
370 for hdr in self.headers:
371 str = str + hdr
372 return str
Guido van Rossum01ca3361992-07-13 14:28:59 +0000373
374
375# Utility functions
376# -----------------
377
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000378# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000379# XXX The inverses of the parse functions may also be useful.
380
Guido van Rossum01ca3361992-07-13 14:28:59 +0000381
Guido van Rossum01ca3361992-07-13 14:28:59 +0000382def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000383 """Remove quotes from a string."""
384 if len(str) > 1:
385 if str[0] == '"' and str[-1:] == '"':
386 return str[1:-1]
387 if str[0] == '<' and str[-1:] == '>':
388 return str[1:-1]
389 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000390
391
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000392def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000393 """Add quotes around a string."""
394 return '"%s"' % string.join(
395 string.split(
396 string.join(
397 string.split(str, '\\'),
398 '\\\\'),
399 '"'),
400 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000401
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000402
Guido van Rossumb6775db1994-08-01 11:34:53 +0000403def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000404 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000405 a = AddrlistClass(address)
406 list = a.getaddrlist()
407 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000408 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000409 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000410 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000411
412
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000413class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000414 """Address parser class by Ben Escoto.
415
416 To understand what this class does, it helps to have a copy of
417 RFC-822 in front of you.
418 """
419
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000420 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000421 """Initialize a new instance.
422
423 `field' is an unparsed address header field, containing
424 one or more addresses.
425 """
426 self.specials = '()<>@,:;.\"[]'
427 self.pos = 0
428 self.LWS = ' \t'
429 self.CR = '\r'
430 self.atomends = self.specials + self.LWS + self.CR
431
432 self.field = field
433 self.commentlist = []
434
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000435 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000436 """Parse up to the start of the next address."""
437 while self.pos < len(self.field):
438 if self.field[self.pos] in self.LWS + '\n\r':
439 self.pos = self.pos + 1
440 elif self.field[self.pos] == '(':
441 self.commentlist.append(self.getcomment())
442 else: break
443
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000444 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000445 """Parse all addresses.
446
447 Returns a list containing all of the addresses.
448 """
449 ad = self.getaddress()
450 if ad:
451 return ad + self.getaddrlist()
452 else: return []
453
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000454 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000455 """Parse the next address."""
456 self.commentlist = []
457 self.gotonext()
458
459 oldpos = self.pos
460 oldcl = self.commentlist
461 plist = self.getphraselist()
462
463 self.gotonext()
464 returnlist = []
465
466 if self.pos >= len(self.field):
467 # Bad email address technically, no domain.
468 if plist:
469 returnlist = [(string.join(self.commentlist), plist[0])]
470
471 elif self.field[self.pos] in '.@':
472 # email address is just an addrspec
473 # this isn't very efficient since we start over
474 self.pos = oldpos
475 self.commentlist = oldcl
476 addrspec = self.getaddrspec()
477 returnlist = [(string.join(self.commentlist), addrspec)]
478
479 elif self.field[self.pos] == ':':
480 # address is a group
481 returnlist = []
482
483 self.pos = self.pos + 1
484 while self.pos < len(self.field):
485 self.gotonext()
486 if self.field[self.pos] == ';':
487 self.pos = self.pos + 1
488 break
489 returnlist = returnlist + self.getaddress()
490
491 elif self.field[self.pos] == '<':
492 # Address is a phrase then a route addr
493 routeaddr = self.getrouteaddr()
494
495 if self.commentlist:
496 returnlist = [(string.join(plist) + ' (' + \
497 string.join(self.commentlist) + ')', routeaddr)]
498 else: returnlist = [(string.join(plist), routeaddr)]
499
500 else:
501 if plist:
502 returnlist = [(string.join(self.commentlist), plist[0])]
503
504 self.gotonext()
505 if self.pos < len(self.field) and self.field[self.pos] == ',':
506 self.pos = self.pos + 1
507 return returnlist
508
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000509 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000510 """Parse a route address (Return-path value).
511
512 This method just skips all the route stuff and returns the addrspec.
513 """
514 if self.field[self.pos] != '<':
515 return
516
517 expectroute = 0
518 self.pos = self.pos + 1
519 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000520 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000521 while self.pos < len(self.field):
522 if expectroute:
523 self.getdomain()
524 expectroute = 0
525 elif self.field[self.pos] == '>':
526 self.pos = self.pos + 1
527 break
528 elif self.field[self.pos] == '@':
529 self.pos = self.pos + 1
530 expectroute = 1
531 elif self.field[self.pos] == ':':
532 self.pos = self.pos + 1
533 expectaddrspec = 1
534 else:
535 adlist = self.getaddrspec()
536 self.pos = self.pos + 1
537 break
538 self.gotonext()
539
540 return adlist
541
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000542 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000543 """Parse an RFC-822 addr-spec."""
544 aslist = []
545
546 self.gotonext()
547 while self.pos < len(self.field):
548 if self.field[self.pos] == '.':
549 aslist.append('.')
550 self.pos = self.pos + 1
551 elif self.field[self.pos] == '"':
552 aslist.append(self.getquote())
553 elif self.field[self.pos] in self.atomends:
554 break
555 else: aslist.append(self.getatom())
556 self.gotonext()
557
558 if self.pos >= len(self.field) or self.field[self.pos] != '@':
559 return string.join(aslist, '')
560
561 aslist.append('@')
562 self.pos = self.pos + 1
563 self.gotonext()
564 return string.join(aslist, '') + self.getdomain()
565
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000566 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000567 """Get the complete domain name from an address."""
568 sdlist = []
569 while self.pos < len(self.field):
570 if self.field[self.pos] in self.LWS:
571 self.pos = self.pos + 1
572 elif self.field[self.pos] == '(':
573 self.commentlist.append(self.getcomment())
574 elif self.field[self.pos] == '[':
575 sdlist.append(self.getdomainliteral())
576 elif self.field[self.pos] == '.':
577 self.pos = self.pos + 1
578 sdlist.append('.')
579 elif self.field[self.pos] in self.atomends:
580 break
581 else: sdlist.append(self.getatom())
582
583 return string.join(sdlist, '')
584
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000585 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000586 """Parse a header fragment delimited by special characters.
587
588 `beginchar' is the start character for the fragment.
589 If self is not looking at an instance of `beginchar' then
590 getdelimited returns the empty string.
591
592 `endchars' is a sequence of allowable end-delimiting characters.
593 Parsing stops when one of these is encountered.
594
595 If `allowcomments' is non-zero, embedded RFC-822 comments
596 are allowed within the parsed fragment.
597 """
598 if self.field[self.pos] != beginchar:
599 return ''
600
601 slist = ['']
602 quote = 0
603 self.pos = self.pos + 1
604 while self.pos < len(self.field):
605 if quote == 1:
606 slist.append(self.field[self.pos])
607 quote = 0
608 elif self.field[self.pos] in endchars:
609 self.pos = self.pos + 1
610 break
611 elif allowcomments and self.field[self.pos] == '(':
612 slist.append(self.getcomment())
613 elif self.field[self.pos] == '\\':
614 quote = 1
615 else:
616 slist.append(self.field[self.pos])
617 self.pos = self.pos + 1
618
619 return string.join(slist, '')
620
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000621 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 """Get a quote-delimited fragment from self's field."""
623 return self.getdelimited('"', '"\r', 0)
624
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000625 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000626 """Get a parenthesis-delimited fragment from self's field."""
627 return self.getdelimited('(', ')\r', 1)
628
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000629 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000630 """Parse an RFC-822 domain-literal."""
631 return self.getdelimited('[', ']\r', 0)
632
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000633 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000634 """Parse an RFC-822 atom."""
635 atomlist = ['']
636
637 while self.pos < len(self.field):
638 if self.field[self.pos] in self.atomends:
639 break
640 else: atomlist.append(self.field[self.pos])
641 self.pos = self.pos + 1
642
643 return string.join(atomlist, '')
644
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000645 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000646 """Parse a sequence of RFC-822 phrases.
647
648 A phrase is a sequence of words, which are in turn either
649 RFC-822 atoms or quoted-strings.
650 """
651 plist = []
652
653 while self.pos < len(self.field):
654 if self.field[self.pos] in self.LWS:
655 self.pos = self.pos + 1
656 elif self.field[self.pos] == '"':
657 plist.append(self.getquote())
658 elif self.field[self.pos] == '(':
659 self.commentlist.append(self.getcomment())
660 elif self.field[self.pos] in self.atomends:
661 break
662 else: plist.append(self.getatom())
663
664 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000665
666
667# Parse a date field
668
669_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000670 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum9a876a41997-07-25 15:20:52 +0000671_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000672
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000673# The timezone table does not include the military time zones defined
674# in RFC822, other than Z. According to RFC1123, the description in
675# RFC822 gets the signs wrong, so we can't rely on any such time
676# zones. RFC1123 recommends that numeric timezone indicators be used
677# instead of timezone names.
678
679_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000680 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000681 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000682 'CST': -600, 'CDT': -500, # Central
683 'MST': -700, 'MDT': -600, # Mountain
684 'PST': -800, 'PDT': -700 # Pacific
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000685 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000686
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000687
688def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000689 """Convert a date string to a time tuple.
690
691 Accounts for military timezones.
692 """
693 data = string.split(data)
694 if data[0][-1] == ',' or data[0] in _daynames:
695 # There's a dayname here. Skip it
696 del data[0]
697 if len(data) == 3: # RFC 850 date, deprecated
698 stuff = string.split(data[0], '-')
699 if len(stuff) == 3:
700 data = stuff + data[1:]
701 if len(data) == 4:
702 s = data[3]
703 i = string.find(s, '+')
704 if i > 0:
705 data[3:] = [s[:i], s[i+1:]]
706 else:
707 data.append('') # Dummy tz
708 if len(data) < 5:
709 return None
710 data = data[:5]
711 [dd, mm, yy, tm, tz] = data
712 if not mm in _monthnames:
713 dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
714 if not mm in _monthnames:
715 return None
716 mm = _monthnames.index(mm)+1
717 tm = string.splitfields(tm, ':')
718 if len(tm) == 2:
719 [thh, tmm] = tm
720 tss = '0'
721 else:
722 [thh, tmm, tss] = tm
723 try:
724 yy = string.atoi(yy)
725 dd = string.atoi(dd)
726 thh = string.atoi(thh)
727 tmm = string.atoi(tmm)
728 tss = string.atoi(tss)
729 except string.atoi_error:
730 return None
Guido van Rossuma73033f1998-02-19 00:28:58 +0000731 tzoffset=None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000732 tz=string.upper(tz)
733 if _timezones.has_key(tz):
734 tzoffset=_timezones[tz]
735 else:
736 try:
737 tzoffset=string.atoi(tz)
738 except string.atoi_error:
739 pass
740 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000741 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000742 if tzoffset < 0:
743 tzsign = -1
744 tzoffset = -tzoffset
745 else:
746 tzsign = 1
747 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000748 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
749 return tuple
750
Guido van Rossumb6775db1994-08-01 11:34:53 +0000751
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000752def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000753 """Convert a time string to a time tuple."""
754 t=parsedate_tz(data)
755 if type(t)==type( () ):
756 return t[:9]
757 else: return t
758
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000759
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000760def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000761 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000762 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000763 # No zone info, so localtime is better assumption than GMT
764 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000765 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000766 t = time.mktime(data[:8] + (0,))
767 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000768
Guido van Rossumb6775db1994-08-01 11:34:53 +0000769
770# When used as script, run a small test program.
771# The first command line argument must be a filename containing one
772# message in RFC-822 format.
773
774if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000775 import sys, os
776 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
777 if sys.argv[1:]: file = sys.argv[1]
778 f = open(file, 'r')
779 m = Message(f)
780 print 'From:', m.getaddr('from')
781 print 'To:', m.getaddrlist('to')
782 print 'Subject:', m.getheader('subject')
783 print 'Date:', m.getheader('date')
784 date = m.getdate_tz('date')
785 if date:
786 print 'ParsedDate:', time.asctime(date[:-1]),
787 hhmmss = date[-1]
788 hhmm, ss = divmod(hhmmss, 60)
789 hh, mm = divmod(hhmm, 60)
790 print "%+03d%02d" % (hh, mm),
791 if ss: print ".%02d" % ss,
792 print
793 else:
794 print 'ParsedDate:', None
795 m.rewindbody()
796 n = 0
797 while f.readline():
798 n = n + 1
799 print 'Lines:', n
800 print '-'*70
801 print 'len =', len(m)
802 if m.has_key('Date'): print 'Date =', m['Date']
803 if m.has_key('X-Nonsense'): pass
804 print 'keys =', m.keys()
805 print 'values =', m.values()
806 print 'items =', m.items()