blob: ef7ab433ffb39733a21bf2479c3048d4cdcd224b [file] [log] [blame]
Guido van Rossumaad67612000-05-08 17:31:04 +00001"""RFC-822 message manipulation class.
2
3XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
11You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
13Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
16This class can work with any input object that supports a readline
17method. If the input object has seek and tell capability, the
18rewindbody method will work; also illegal lines will be pushed back
19onto the input stream. If the input object lacks seek but has an
20`unread' method that can push back a line of input, Message will use
21that to push back illegal lines. Thus this class can be used to parse
22messages coming from a buffered stream.
23
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
31called once; if this raises an exception, seekable is reset to 0. For
32other nonzero values of seekable, this test is not made.
33
34To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
58# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
59
60import string
61import time
62
63
64_blanklines = ('\r\n', '\n') # Optimization for islast()
65
66
67class Message:
68 """Represents a single RFC-822-compliant message."""
69
70 def __init__(self, fp, seekable = 1):
71 """Initialize the class instance and read the headers."""
72 if seekable == 1:
73 # Exercise tell() to make sure it works
74 # (and then assume seek() works, too)
75 try:
76 fp.tell()
77 except:
78 seekable = 0
79 else:
80 seekable = 1
81 self.fp = fp
82 self.seekable = seekable
83 self.startofheaders = None
84 self.startofbody = None
85 #
86 if self.seekable:
87 try:
88 self.startofheaders = self.fp.tell()
89 except IOError:
90 self.seekable = 0
91 #
92 self.readheaders()
93 #
94 if self.seekable:
95 try:
96 self.startofbody = self.fp.tell()
97 except IOError:
98 self.seekable = 0
99
100 def rewindbody(self):
101 """Rewind the file to the start of the body (if seekable)."""
102 if not self.seekable:
103 raise IOError, "unseekable file"
104 self.fp.seek(self.startofbody)
105
106 def readheaders(self):
107 """Read header lines.
108
109 Read header lines up to the entirely blank line that
110 terminates them. The (normally blank) line that ends the
111 headers is skipped, but not included in the returned list.
112 If a non-header line ends the headers, (which is an error),
113 an attempt is made to backspace over it; it is never
114 included in the returned list.
115
116 The variable self.status is set to the empty string if all
117 went well, otherwise it is an error message.
118 The variable self.headers is a completely uninterpreted list
119 of lines contained in the header (so printing them will
120 reproduce the header exactly as it appears in the file).
121 """
122 self.dict = {}
123 self.__gamh_cache = {}
124 self.__gh_cache = {}
125 self.unixfrom = ''
126 self.headers = list = []
127 self.status = ''
128 headerseen = ""
129 firstline = 1
130 startofline = unread = tell = None
131 if hasattr(self.fp, 'unread'):
132 unread = self.fp.unread
133 elif self.seekable:
134 tell = self.fp.tell
135 while 1:
136 if tell:
137 startofline = tell()
138 line = self.fp.readline()
139 if not line:
140 self.status = 'EOF in headers'
141 break
142 # Skip unix From name time lines
143 if firstline and line[:5] == 'From ':
144 self.unixfrom = self.unixfrom + line
145 continue
146 firstline = 0
147 if headerseen and line[0] in ' \t':
148 # It's a continuation line.
149 list.append(line)
150 self.__gamh_cache[headerseen].append(line)
151 x = string.lstrip(
152 "%s\n %s" % (self.dict[headerseen], string.strip(line)))
153 self.dict[headerseen] = x
154 self.__gh_cache[headerseen][-1] = x
155 continue
156 elif self.iscomment(line):
157 # It's a comment. Ignore it.
158 continue
159 elif self.islast(line):
160 # Note! No pushback here! The delimiter line gets eaten.
161 break
162 headerseen = self.isheader(line)
163 if headerseen:
164 # It's a legal header line, save it.
165 list.append(line)
166 l = self.__gamh_cache.get(headerseen)
167 if not l:
168 self.__gamh_cache[headerseen] = l = []
169 l.append(line)
170 x = string.strip(line[len(headerseen)+1:])
171 self.dict[headerseen] = x
172 l = self.__gh_cache.get(headerseen)
173 if not l:
174 self.__gh_cache[headerseen] = l = []
175 l.append(x)
176 continue
177 else:
178 # It's not a header line; throw it back and stop here.
179 if not self.dict:
180 self.status = 'No headers'
181 else:
182 self.status = 'Non-header line where header expected'
183 # Try to undo the read.
184 if unread:
185 unread(line)
186 elif tell:
187 self.fp.seek(startofline)
188 else:
189 self.status = self.status + '; bad seek'
190 break
191
192 def isheader(self, line):
193 """Determine whether a given line is a legal header.
194
195 This method should return the header name, suitably canonicalized.
196 You may override this method in order to use Message parsing
197 on tagged data in RFC822-like formats with special header formats.
198 """
199 i = string.find(line, ':')
200 if i > 0:
201 return string.lower(line[:i])
202 else:
203 return None
204
205 def islast(self, line):
206 """Determine whether a line is a legal end of RFC-822 headers.
207
208 You may override this method if your application wants
209 to bend the rules, e.g. to strip trailing whitespace,
210 or to recognise MH template separators ('--------').
211 For convenience (e.g. for code reading from sockets) a
212 line consisting of \r\n also matches.
213 """
214 return line in _blanklines
215
216 def iscomment(self, line):
217 """Determine whether a line should be skipped entirely.
218
219 You may override this method in order to use Message parsing
220 on tagged data in RFC822-like formats that support embedded
221 comments or free-text data.
222 """
223 return None
224
225 def getallmatchingheaders(self, name,
226 # speed hack:
227 lower = string.lower):
228 """Find all header lines matching a given header name.
229
230 Look through the list of headers and find all lines
231 matching a given header name (and their continuation
232 lines). A list of the lines is returned, without
233 interpretation. If the header does not occur, an
234 empty list is returned. If the header occurs multiple
235 times, all occurrences are returned. Case is not
236 important in the header name.
237 """
238 r = self.__gamh_cache.get(lower(name))
239 if r:
240 return r[:]
241 return []
242
243 def getfirstmatchingheader(self, name,
244 # speed hack:
245 lower = string.lower):
246 """Get the first header line matching name.
247
248 This is similar to getallmatchingheaders, but it returns
249 only the first matching header (and its continuation
250 lines).
251 """
252 l = self.__gamh_cache.get(lower(name))
253 if not l:
254 return []
255 r = []
256 for item in l:
257 if r and item[0] not in " \t":
258 break
259 r.append(item)
260 return r
261
262 def getrawheader(self, name):
263 """A higher-level interface to getfirstmatchingheader().
264
265 Return a string containing the literal text of the
266 header but with the keyword stripped. All leading,
267 trailing and embedded whitespace is kept in the
268 string, however.
269 Return None if the header does not occur.
270 """
271
272 list = self.getfirstmatchingheader(name)
273 if not list:
274 return None
275 list[0] = list[0][len(name) + 1:]
276 return string.joinfields(list, '')
277
278 def getheader(self, name, default=None):
279 """Get the header value for a name.
280
281 This is the normal interface: it returns a stripped
282 version of the header value for a given header name,
283 or None if it doesn't exist. This uses the dictionary
284 version which finds the *last* such header.
285 """
286 try:
287 return self.dict[string.lower(name)]
288 except KeyError:
289 return default
290 get = getheader
291
292 def getheaders(self, name,
293 # speed hack:
294 lower = string.lower):
295 """Get all values for a header.
296
297 This returns a list of values for headers given more than once;
298 each value in the result list is stripped in the same way as the
299 result of getheader(). If the header is not given, return an
300 empty list.
301 """
302 r = self.__gh_cache.get(lower(name))
303 if r:
304 return r[:]
305 return []
306
307 def getaddr(self, name):
308 """Get a single address from a header, as a tuple.
309
310 An example return value:
311 ('Guido van Rossum', 'guido@cwi.nl')
312 """
313 # New, by Ben Escoto
314 alist = self.getaddrlist(name)
315 if alist:
316 return alist[0]
317 else:
318 return (None, None)
319
320 def getaddrlist(self, name):
321 """Get a list of addresses from a header.
322
323 Retrieves a list of addresses from a header, where each address is a
324 tuple as returned by getaddr(). Scans all named headers, so it works
325 properly with multiple To: or Cc: headers for example.
326
327 """
328 raw = []
329 for h in self.getallmatchingheaders(name):
330 if h[0] in ' \t':
331 raw.append(h)
332 else:
333 if raw:
334 raw.append(', ')
335 i = string.find(h, ':')
336 if i > 0:
337 addr = h[i+1:]
338 raw.append(addr)
339 alladdrs = string.join(raw, '')
340 a = AddrlistClass(alladdrs)
341 return a.getaddrlist()
342
343 def getdate(self, name):
344 """Retrieve a date field from a header.
345
346 Retrieves a date field from the named header, returning
347 a tuple compatible with time.mktime().
348 """
349 try:
350 data = self[name]
351 except KeyError:
352 return None
353 return parsedate(data)
354
355 def getdate_tz(self, name):
356 """Retrieve a date field from a header as a 10-tuple.
357
358 The first 9 elements make up a tuple compatible with
359 time.mktime(), and the 10th is the offset of the poster's
360 time zone from GMT/UTC.
361 """
362 try:
363 data = self[name]
364 except KeyError:
365 return None
366 return parsedate_tz(data)
367
368
369 # Access as a dictionary (only finds *last* header of each type):
370
371 def __len__(self):
372 """Get the number of headers in a message."""
373 return len(self.dict)
374
375 def __getitem__(self, name):
376 """Get a specific header, as from a dictionary."""
377 return self.dict[string.lower(name)]
378
379 def __setitem__(self, name, value):
380 """Set the value of a header.
381
382 Note: This is not a perfect inversion of __getitem__, because
383 any changed headers get stuck at the end of the raw-headers list
384 rather than where the altered header was.
385 """
386 del self[name] # Won't fail if it doesn't exist
387 self.dict[string.lower(name)] = value
388 text = name + ": " + value
389 lines = string.split(text, "\n")
390 for line in lines:
391 self.headers.append(line + "\n")
392
393 def __delitem__(self, name):
394 """Delete all occurrences of a specific header, if it is present."""
395 name = string.lower(name)
396 if not self.dict.has_key(name):
397 return
398 del self.dict[name]
399 name = name + ':'
400 n = len(name)
401 list = []
402 hit = 0
403 for i in range(len(self.headers)):
404 line = self.headers[i]
405 if string.lower(line[:n]) == name:
406 hit = 1
407 elif line[:1] not in string.whitespace:
408 hit = 0
409 if hit:
410 list.append(i)
411 list.reverse()
412 for i in list:
413 del self.headers[i]
414
415 def has_key(self, name):
416 """Determine whether a message contains the named header."""
417 return self.dict.has_key(string.lower(name))
418
419 def keys(self):
420 """Get all of a message's header field names."""
421 return self.dict.keys()
422
423 def values(self):
424 """Get all of a message's header field values."""
425 return self.dict.values()
426
427 def items(self):
428 """Get all of a message's headers.
429
430 Returns a list of name, value tuples.
431 """
432 return self.dict.items()
433
434 def __str__(self):
435 str = ''
436 for hdr in self.headers:
437 str = str + hdr
438 return str
439
440
441# Utility functions
442# -----------------
443
444# XXX Should fix unquote() and quote() to be really conformant.
445# XXX The inverses of the parse functions may also be useful.
446
447
448def unquote(str):
449 """Remove quotes from a string."""
450 if len(str) > 1:
451 if str[0] == '"' and str[-1:] == '"':
452 return str[1:-1]
453 if str[0] == '<' and str[-1:] == '>':
454 return str[1:-1]
455 return str
456
457
458def quote(str):
459 """Add quotes around a string."""
460 return '"%s"' % string.join(
461 string.split(
462 string.join(
463 string.split(str, '\\'),
464 '\\\\'),
465 '"'),
466 '\\"')
467
468
469def parseaddr(address):
470 """Parse an address into a (realname, mailaddr) tuple."""
471 a = AddrlistClass(address)
472 list = a.getaddrlist()
473 if not list:
474 return (None, None)
475 else:
476 return list[0]
477
478
479class AddrlistClass:
480 """Address parser class by Ben Escoto.
481
482 To understand what this class does, it helps to have a copy of
483 RFC-822 in front of you.
484
485 Note: this class interface is deprecated and may be removed in the future.
486 Use rfc822.AddressList instead.
487 """
488
489 def __init__(self, field):
490 """Initialize a new instance.
491
492 `field' is an unparsed address header field, containing
493 one or more addresses.
494 """
495 self.specials = '()<>@,:;.\"[]'
496 self.pos = 0
497 self.LWS = ' \t'
498 self.CR = '\r\n'
499 self.atomends = self.specials + self.LWS + self.CR
500 self.field = field
501 self.commentlist = []
502
503 def gotonext(self):
504 """Parse up to the start of the next address."""
505 while self.pos < len(self.field):
506 if self.field[self.pos] in self.LWS + '\n\r':
507 self.pos = self.pos + 1
508 elif self.field[self.pos] == '(':
509 self.commentlist.append(self.getcomment())
510 else: break
511
512 def getaddrlist(self):
513 """Parse all addresses.
514
515 Returns a list containing all of the addresses.
516 """
517 ad = self.getaddress()
518 if ad:
519 return ad + self.getaddrlist()
520 else: return []
521
522 def getaddress(self):
523 """Parse the next address."""
524 self.commentlist = []
525 self.gotonext()
526
527 oldpos = self.pos
528 oldcl = self.commentlist
529 plist = self.getphraselist()
530
531 self.gotonext()
532 returnlist = []
533
534 if self.pos >= len(self.field):
535 # Bad email address technically, no domain.
536 if plist:
537 returnlist = [(string.join(self.commentlist), plist[0])]
538
539 elif self.field[self.pos] in '.@':
540 # email address is just an addrspec
541 # this isn't very efficient since we start over
542 self.pos = oldpos
543 self.commentlist = oldcl
544 addrspec = self.getaddrspec()
545 returnlist = [(string.join(self.commentlist), addrspec)]
546
547 elif self.field[self.pos] == ':':
548 # address is a group
549 returnlist = []
550
551 fieldlen = len(self.field)
552 self.pos = self.pos + 1
553 while self.pos < len(self.field):
554 self.gotonext()
555 if self.pos < fieldlen and self.field[self.pos] == ';':
556 self.pos = self.pos + 1
557 break
558 returnlist = returnlist + self.getaddress()
559
560 elif self.field[self.pos] == '<':
561 # Address is a phrase then a route addr
562 routeaddr = self.getrouteaddr()
563
564 if self.commentlist:
565 returnlist = [(string.join(plist) + ' (' + \
566 string.join(self.commentlist) + ')', routeaddr)]
567 else: returnlist = [(string.join(plist), routeaddr)]
568
569 else:
570 if plist:
571 returnlist = [(string.join(self.commentlist), plist[0])]
572 elif self.field[self.pos] in self.specials:
573 self.pos = self.pos + 1
574
575 self.gotonext()
576 if self.pos < len(self.field) and self.field[self.pos] == ',':
577 self.pos = self.pos + 1
578 return returnlist
579
580 def getrouteaddr(self):
581 """Parse a route address (Return-path value).
582
583 This method just skips all the route stuff and returns the addrspec.
584 """
585 if self.field[self.pos] != '<':
586 return
587
588 expectroute = 0
589 self.pos = self.pos + 1
590 self.gotonext()
591 adlist = None
592 while self.pos < len(self.field):
593 if expectroute:
594 self.getdomain()
595 expectroute = 0
596 elif self.field[self.pos] == '>':
597 self.pos = self.pos + 1
598 break
599 elif self.field[self.pos] == '@':
600 self.pos = self.pos + 1
601 expectroute = 1
602 elif self.field[self.pos] == ':':
603 self.pos = self.pos + 1
604 expectaddrspec = 1
605 else:
606 adlist = self.getaddrspec()
607 self.pos = self.pos + 1
608 break
609 self.gotonext()
610
611 return adlist
612
613 def getaddrspec(self):
614 """Parse an RFC-822 addr-spec."""
615 aslist = []
616
617 self.gotonext()
618 while self.pos < len(self.field):
619 if self.field[self.pos] == '.':
620 aslist.append('.')
621 self.pos = self.pos + 1
622 elif self.field[self.pos] == '"':
623 aslist.append('"%s"' % self.getquote())
624 elif self.field[self.pos] in self.atomends:
625 break
626 else: aslist.append(self.getatom())
627 self.gotonext()
628
629 if self.pos >= len(self.field) or self.field[self.pos] != '@':
630 return string.join(aslist, '')
631
632 aslist.append('@')
633 self.pos = self.pos + 1
634 self.gotonext()
635 return string.join(aslist, '') + self.getdomain()
636
637 def getdomain(self):
638 """Get the complete domain name from an address."""
639 sdlist = []
640 while self.pos < len(self.field):
641 if self.field[self.pos] in self.LWS:
642 self.pos = self.pos + 1
643 elif self.field[self.pos] == '(':
644 self.commentlist.append(self.getcomment())
645 elif self.field[self.pos] == '[':
646 sdlist.append(self.getdomainliteral())
647 elif self.field[self.pos] == '.':
648 self.pos = self.pos + 1
649 sdlist.append('.')
650 elif self.field[self.pos] in self.atomends:
651 break
652 else: sdlist.append(self.getatom())
653 return string.join(sdlist, '')
654
655 def getdelimited(self, beginchar, endchars, allowcomments = 1):
656 """Parse a header fragment delimited by special characters.
657
658 `beginchar' is the start character for the fragment.
659 If self is not looking at an instance of `beginchar' then
660 getdelimited returns the empty string.
661
662 `endchars' is a sequence of allowable end-delimiting characters.
663 Parsing stops when one of these is encountered.
664
665 If `allowcomments' is non-zero, embedded RFC-822 comments
666 are allowed within the parsed fragment.
667 """
668 if self.field[self.pos] != beginchar:
669 return ''
670
671 slist = ['']
672 quote = 0
673 self.pos = self.pos + 1
674 while self.pos < len(self.field):
675 if quote == 1:
676 slist.append(self.field[self.pos])
677 quote = 0
678 elif self.field[self.pos] in endchars:
679 self.pos = self.pos + 1
680 break
681 elif allowcomments and self.field[self.pos] == '(':
682 slist.append(self.getcomment())
683 elif self.field[self.pos] == '\\':
684 quote = 1
685 else:
686 slist.append(self.field[self.pos])
687 self.pos = self.pos + 1
688
689 return string.join(slist, '')
690
691 def getquote(self):
692 """Get a quote-delimited fragment from self's field."""
693 return self.getdelimited('"', '"\r', 0)
694
695 def getcomment(self):
696 """Get a parenthesis-delimited fragment from self's field."""
697 return self.getdelimited('(', ')\r', 1)
698
699 def getdomainliteral(self):
700 """Parse an RFC-822 domain-literal."""
701 return self.getdelimited('[', ']\r', 0)
702
703 def getatom(self):
704 """Parse an RFC-822 atom."""
705 atomlist = ['']
706
707 while self.pos < len(self.field):
708 if self.field[self.pos] in self.atomends:
709 break
710 else: atomlist.append(self.field[self.pos])
711 self.pos = self.pos + 1
712
713 return string.join(atomlist, '')
714
715 def getphraselist(self):
716 """Parse a sequence of RFC-822 phrases.
717
718 A phrase is a sequence of words, which are in turn either
719 RFC-822 atoms or quoted-strings. Phrases are canonicalized
720 by squeezing all runs of continuous whitespace into one space.
721 """
722 plist = []
723
724 while self.pos < len(self.field):
725 if self.field[self.pos] in self.LWS:
726 self.pos = self.pos + 1
727 elif self.field[self.pos] == '"':
728 plist.append(self.getquote())
729 elif self.field[self.pos] == '(':
730 self.commentlist.append(self.getcomment())
731 elif self.field[self.pos] in self.atomends:
732 break
733 else: plist.append(self.getatom())
734
735 return plist
736
737class AddressList(AddrlistClass):
738 """An AddressList encapsulates a list of parsed RFC822 addresses."""
739 def __init__(self, field):
740 AddrlistClass.__init__(self, field)
741 if field:
742 self.addresslist = self.getaddrlist()
743 else:
744 self.addresslist = []
745
746 def __len__(self):
747 return len(self.addresslist)
748
749 def __str__(self):
750 return string.joinfields(map(dump_address_pair, self.addresslist),", ")
751
752 def __add__(self, other):
753 # Set union
754 newaddr = AddressList(None)
755 newaddr.addresslist = self.addresslist[:]
756 for x in other.addresslist:
757 if not x in self.addresslist:
758 newaddr.addresslist.append(x)
759 return newaddr
760
761 def __sub__(self, other):
762 # Set difference
763 newaddr = AddressList(None)
764 for x in self.addresslist:
765 if not x in other.addresslist:
766 newaddr.addresslist.append(x)
767 return newaddr
768
769 def __getitem__(self, index):
770 # Make indexing, slices, and 'in' work
771 return self.addresslist[index]
772
773def dump_address_pair(pair):
774 """Dump a (name, address) pair in a canonicalized form."""
775 if pair[0]:
776 return '"' + pair[0] + '" <' + pair[1] + '>'
777 else:
778 return pair[1]
779
780# Parse a date field
781
782_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
783 'aug', 'sep', 'oct', 'nov', 'dec',
784 'january', 'february', 'march', 'april', 'may', 'june', 'july',
785 'august', 'september', 'october', 'november', 'december']
786_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
787
788# The timezone table does not include the military time zones defined
789# in RFC822, other than Z. According to RFC1123, the description in
790# RFC822 gets the signs wrong, so we can't rely on any such time
791# zones. RFC1123 recommends that numeric timezone indicators be used
792# instead of timezone names.
793
794_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
795 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
796 'EST': -500, 'EDT': -400, # Eastern
797 'CST': -600, 'CDT': -500, # Central
798 'MST': -700, 'MDT': -600, # Mountain
799 'PST': -800, 'PDT': -700 # Pacific
800 }
801
802
803def parsedate_tz(data):
804 """Convert a date string to a time tuple.
805
806 Accounts for military timezones.
807 """
808 data = string.split(data)
809 if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
810 # There's a dayname here. Skip it
811 del data[0]
812 if len(data) == 3: # RFC 850 date, deprecated
813 stuff = string.split(data[0], '-')
814 if len(stuff) == 3:
815 data = stuff + data[1:]
816 if len(data) == 4:
817 s = data[3]
818 i = string.find(s, '+')
819 if i > 0:
820 data[3:] = [s[:i], s[i+1:]]
821 else:
822 data.append('') # Dummy tz
823 if len(data) < 5:
824 return None
825 data = data[:5]
826 [dd, mm, yy, tm, tz] = data
827 mm = string.lower(mm)
828 if not mm in _monthnames:
829 dd, mm = mm, string.lower(dd)
830 if not mm in _monthnames:
831 return None
832 mm = _monthnames.index(mm)+1
833 if mm > 12: mm = mm - 12
834 if dd[-1] == ',':
835 dd = dd[:-1]
836 i = string.find(yy, ':')
837 if i > 0:
838 yy, tm = tm, yy
839 if yy[-1] == ',':
840 yy = yy[:-1]
841 if yy[0] not in string.digits:
842 yy, tz = tz, yy
843 if tm[-1] == ',':
844 tm = tm[:-1]
845 tm = string.splitfields(tm, ':')
846 if len(tm) == 2:
847 [thh, tmm] = tm
848 tss = '0'
849 elif len(tm) == 3:
850 [thh, tmm, tss] = tm
851 else:
852 return None
853 try:
854 yy = string.atoi(yy)
855 dd = string.atoi(dd)
856 thh = string.atoi(thh)
857 tmm = string.atoi(tmm)
858 tss = string.atoi(tss)
859 except string.atoi_error:
860 return None
861 tzoffset=None
862 tz=string.upper(tz)
863 if _timezones.has_key(tz):
864 tzoffset=_timezones[tz]
865 else:
866 try:
867 tzoffset=string.atoi(tz)
868 except string.atoi_error:
869 pass
870 # Convert a timezone offset into seconds ; -0500 -> -18000
871 if tzoffset:
872 if tzoffset < 0:
873 tzsign = -1
874 tzoffset = -tzoffset
875 else:
876 tzsign = 1
877 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
878 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
879 return tuple
880
881
882def parsedate(data):
883 """Convert a time string to a time tuple."""
884 t=parsedate_tz(data)
885 if type(t)==type( () ):
886 return t[:9]
887 else: return t
888
889
890def mktime_tz(data):
891 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
892 if data[9] is None:
893 # No zone info, so localtime is better assumption than GMT
894 return time.mktime(data[:8] + (-1,))
895 else:
896 t = time.mktime(data[:8] + (0,))
897 return t - data[9] - time.timezone
898
899def formatdate(timeval=None):
900 """Returns time format preferred for Internet standards.
901
902 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
903 """
904 if timeval is None:
905 timeval = time.time()
906 return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
907 time.gmtime(timeval))
908
909
910# When used as script, run a small test program.
911# The first command line argument must be a filename containing one
912# message in RFC-822 format.
913
914if __name__ == '__main__':
915 import sys, os
916 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
917 if sys.argv[1:]: file = sys.argv[1]
918 f = open(file, 'r')
919 m = Message(f)
920 print 'From:', m.getaddr('from')
921 print 'To:', m.getaddrlist('to')
922 print 'Subject:', m.getheader('subject')
923 print 'Date:', m.getheader('date')
924 date = m.getdate_tz('date')
925 if date:
926 print 'ParsedDate:', time.asctime(date[:-1]),
927 hhmmss = date[-1]
928 hhmm, ss = divmod(hhmmss, 60)
929 hh, mm = divmod(hhmm, 60)
930 print "%+03d%02d" % (hh, mm),
931 if ss: print ".%02d" % ss,
932 print
933 else:
934 print 'ParsedDate:', None
935 m.rewindbody()
936 n = 0
937 while f.readline():
938 n = n + 1
939 print 'Lines:', n
940 print '-'*70
941 print 'len =', len(m)
942 if m.has_key('Date'): print 'Date =', m['Date']
943 if m.has_key('X-Nonsense'): pass
944 print 'keys =', m.keys()
945 print 'values =', m.values()
946 print 'items =', m.items()