blob: 9d76f6d8cdc975643f878c88835ae06498c25984 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
11(or use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen()).
13Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
16To get the text of a particular header there are several methods:
17 str = m.getheader(name)
18 str = m.getrawheader(name)
19where name is the name of the header, e.g. 'Subject'.
20The difference is that getheader() strips the leading and trailing
21whitespace, while getrawheader() doesn't. Both functions retain
22embedded whitespace (including newlines) exactly as they are
23specified in the header, and leave the case of the text unchanged.
24
25For addresses and address lists there are functions
26 realname, mailaddress = m.getaddr(name) and
27 list = m.getaddrlist(name)
28where the latter returns a list of (realname, mailaddr) tuples.
29
30There is also a method
31 time = m.getdate(name)
32which parses a Date-like field and returns a time-compatible tuple,
33i.e. a tuple such as returned by time.localtime() or accepted by
34time.mktime().
35
36See the class definition for lower level access methods.
37
38There are also some utility functions here.
39"""
Guido van Rossum01ca3361992-07-13 14:28:59 +000040
Guido van Rossum01ca3361992-07-13 14:28:59 +000041import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000042import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000043
44
Guido van Rossum9ab94c11997-12-10 16:17:39 +000045_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000046
47
Guido van Rossum01ca3361992-07-13 14:28:59 +000048class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000049 """Represents a single RFC-822-compliant message."""
50
51 def __init__(self, fp, seekable = 1):
52 """Initialize the class instance and read the headers."""
53 self.fp = fp
54 self.seekable = seekable
55 self.startofheaders = None
56 self.startofbody = None
57 #
58 if self.seekable:
59 try:
60 self.startofheaders = self.fp.tell()
61 except IOError:
62 self.seekable = 0
63 #
64 self.readheaders()
65 #
66 if self.seekable:
67 try:
68 self.startofbody = self.fp.tell()
69 except IOError:
70 self.seekable = 0
71
72 def rewindbody(self):
73 """Rewind the file to the start of the body (if seekable)."""
74 if not self.seekable:
75 raise IOError, "unseekable file"
76 self.fp.seek(self.startofbody)
77
78 def readheaders(self):
79 """Read header lines.
80
81 Read header lines up to the entirely blank line that
82 terminates them. The (normally blank) line that ends the
83 headers is skipped, but not included in the returned list.
84 If a non-header line ends the headers, (which is an error),
85 an attempt is made to backspace over it; it is never
86 included in the returned list.
87
88 The variable self.status is set to the empty string if all
89 went well, otherwise it is an error message.
90 The variable self.headers is a completely uninterpreted list
91 of lines contained in the header (so printing them will
92 reproduce the header exactly as it appears in the file).
93 """
94 self.dict = {}
95 self.unixfrom = ''
96 self.headers = list = []
97 self.status = ''
98 headerseen = ""
99 firstline = 1
100 while 1:
101 line = self.fp.readline()
102 if not line:
103 self.status = 'EOF in headers'
104 break
105 # Skip unix From name time lines
106 if firstline and line[:5] == 'From ':
107 self.unixfrom = self.unixfrom + line
108 continue
109 firstline = 0
110 if self.islast(line):
111 break
112 elif headerseen and line[0] in ' \t':
113 # It's a continuation line.
114 list.append(line)
115 x = (self.dict[headerseen] + "\n " +
116 string.strip(line))
117 self.dict[headerseen] = string.strip(x)
118 elif ':' in line:
119 # It's a header line.
120 list.append(line)
121 i = string.find(line, ':')
122 headerseen = string.lower(line[:i])
123 self.dict[headerseen] = string.strip(
124 line[i+1:])
125 else:
126 # It's not a header line; stop here.
127 if not headerseen:
128 self.status = 'No headers'
129 else:
130 self.status = 'Bad header'
131 # Try to undo the read.
132 if self.seekable:
133 self.fp.seek(-len(line), 1)
134 else:
135 self.status = \
136 self.status + '; bad seek'
137 break
138
139 def islast(self, line):
140 """Determine whether a line is a legal end of RFC-822 headers.
141
142 You may override this method if your application wants
143 to bend the rules, e.g. to strip trailing whitespace,
144 or to recognise MH template separators ('--------').
145 For convenience (e.g. for code reading from sockets) a
146 line consisting of \r\n also matches.
147 """
148 return line in _blanklines
149
150 def getallmatchingheaders(self, name):
151 """Find all header lines matching a given header name.
152
153 Look through the list of headers and find all lines
154 matching a given header name (and their continuation
155 lines). A list of the lines is returned, without
156 interpretation. If the header does not occur, an
157 empty list is returned. If the header occurs multiple
158 times, all occurrences are returned. Case is not
159 important in the header name.
160 """
161 name = string.lower(name) + ':'
162 n = len(name)
163 list = []
164 hit = 0
165 for line in self.headers:
166 if string.lower(line[:n]) == name:
167 hit = 1
168 elif line[:1] not in string.whitespace:
169 hit = 0
170 if hit:
171 list.append(line)
172 return list
173
174 def getfirstmatchingheader(self, name):
175 """Get the first header line matching name.
176
177 This is similar to getallmatchingheaders, but it returns
178 only the first matching header (and its continuation
179 lines).
180 """
181 name = string.lower(name) + ':'
182 n = len(name)
183 list = []
184 hit = 0
185 for line in self.headers:
186 if hit:
187 if line[:1] not in string.whitespace:
188 break
189 elif string.lower(line[:n]) == name:
190 hit = 1
191 if hit:
192 list.append(line)
193 return list
194
195 def getrawheader(self, name):
196 """A higher-level interface to getfirstmatchingheader().
197
198 Return a string containing the literal text of the
199 header but with the keyword stripped. All leading,
200 trailing and embedded whitespace is kept in the
201 string, however.
202 Return None if the header does not occur.
203 """
204
205 list = self.getfirstmatchingheader(name)
206 if not list:
207 return None
208 list[0] = list[0][len(name) + 1:]
209 return string.joinfields(list, '')
210
211 def getheader(self, name):
212 """Get the header value for a name.
213
214 This is the normal interface: it return a stripped
215 version of the header value for a given header name,
216 or None if it doesn't exist. This uses the dictionary
217 version which finds the *last* such header.
218 """
219 try:
220 return self.dict[string.lower(name)]
221 except KeyError:
222 return None
223
224 def getaddr(self, name):
225 """Get a single address from a header, as a tuple.
226
227 An example return value:
228 ('Guido van Rossum', 'guido@cwi.nl')
229 """
230 # New, by Ben Escoto
231 alist = self.getaddrlist(name)
232 if alist:
233 return alist[0]
234 else:
235 return (None, None)
236
237 def getaddrlist(self, name):
238 """Get a list of addresses from a header.
239
240 Retrieves a list of addresses from a header, where each
241 address is a tuple as returned by getaddr().
242 """
243 # New, by Ben Escoto
244 try:
245 data = self[name]
246 except KeyError:
247 return []
248 a = AddrlistClass(data)
249 return a.getaddrlist()
250
251 def getdate(self, name):
252 """Retrieve a date field from a header.
253
254 Retrieves a date field from the named header, returning
255 a tuple compatible with time.mktime().
256 """
257 try:
258 data = self[name]
259 except KeyError:
260 return None
261 return parsedate(data)
262
263 def getdate_tz(self, name):
264 """Retrieve a date field from a header as a 10-tuple.
265
266 The first 9 elements make up a tuple compatible with
267 time.mktime(), and the 10th is the offset of the poster's
268 time zone from GMT/UTC.
269 """
270 try:
271 data = self[name]
272 except KeyError:
273 return None
274 return parsedate_tz(data)
275
276
277 # Access as a dictionary (only finds *last* header of each type):
278
279 def __len__(self):
280 """Get the number of headers in a message."""
281 return len(self.dict)
282
283 def __getitem__(self, name):
284 """Get a specific header, as from a dictionary."""
285 return self.dict[string.lower(name)]
286
Guido van Rossum75d92c11998-04-02 21:33:20 +0000287 def __delitem__(self, name):
288 """Delete all occurrences of a specific header, if it is present."""
289 name = string.lower(name)
290 if not self.dict.has_key(name):
291 return
292 del self.dict[name]
293 name = name + ':'
294 n = len(name)
295 list = []
296 hit = 0
297 for i in range(len(self.headers)):
298 line = self.headers[i]
299 if string.lower(line[:n]) == name:
300 hit = 1
301 elif line[:1] not in string.whitespace:
302 hit = 0
303 if hit:
304 list.append(i)
305 list.reverse()
306 for i in list:
307 del self.headers[i]
308
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000309 def has_key(self, name):
310 """Determine whether a message contains the named header."""
311 return self.dict.has_key(string.lower(name))
312
313 def keys(self):
314 """Get all of a message's header field names."""
315 return self.dict.keys()
316
317 def values(self):
318 """Get all of a message's header field values."""
319 return self.dict.values()
320
321 def items(self):
322 """Get all of a message's headers.
323
324 Returns a list of name, value tuples.
325 """
326 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000327
328
329
330# Utility functions
331# -----------------
332
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000333# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000334# XXX The inverses of the parse functions may also be useful.
335
Guido van Rossum01ca3361992-07-13 14:28:59 +0000336
Guido van Rossum01ca3361992-07-13 14:28:59 +0000337def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000338 """Remove quotes from a string."""
339 if len(str) > 1:
340 if str[0] == '"' and str[-1:] == '"':
341 return str[1:-1]
342 if str[0] == '<' and str[-1:] == '>':
343 return str[1:-1]
344 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000345
346
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000347def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000348 """Add quotes around a string."""
349 return '"%s"' % string.join(
350 string.split(
351 string.join(
352 string.split(str, '\\'),
353 '\\\\'),
354 '"'),
355 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000356
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000357
Guido van Rossumb6775db1994-08-01 11:34:53 +0000358def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000359 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000360 a = AddrlistClass(address)
361 list = a.getaddrlist()
362 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000363 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000364 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000365 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000366
367
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000368class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000369 """Address parser class by Ben Escoto.
370
371 To understand what this class does, it helps to have a copy of
372 RFC-822 in front of you.
373 """
374
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000375 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000376 """Initialize a new instance.
377
378 `field' is an unparsed address header field, containing
379 one or more addresses.
380 """
381 self.specials = '()<>@,:;.\"[]'
382 self.pos = 0
383 self.LWS = ' \t'
384 self.CR = '\r'
385 self.atomends = self.specials + self.LWS + self.CR
386
387 self.field = field
388 self.commentlist = []
389
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000390 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000391 """Parse up to the start of the next address."""
392 while self.pos < len(self.field):
393 if self.field[self.pos] in self.LWS + '\n\r':
394 self.pos = self.pos + 1
395 elif self.field[self.pos] == '(':
396 self.commentlist.append(self.getcomment())
397 else: break
398
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000399 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000400 """Parse all addresses.
401
402 Returns a list containing all of the addresses.
403 """
404 ad = self.getaddress()
405 if ad:
406 return ad + self.getaddrlist()
407 else: return []
408
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000409 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000410 """Parse the next address."""
411 self.commentlist = []
412 self.gotonext()
413
414 oldpos = self.pos
415 oldcl = self.commentlist
416 plist = self.getphraselist()
417
418 self.gotonext()
419 returnlist = []
420
421 if self.pos >= len(self.field):
422 # Bad email address technically, no domain.
423 if plist:
424 returnlist = [(string.join(self.commentlist), plist[0])]
425
426 elif self.field[self.pos] in '.@':
427 # email address is just an addrspec
428 # this isn't very efficient since we start over
429 self.pos = oldpos
430 self.commentlist = oldcl
431 addrspec = self.getaddrspec()
432 returnlist = [(string.join(self.commentlist), addrspec)]
433
434 elif self.field[self.pos] == ':':
435 # address is a group
436 returnlist = []
437
438 self.pos = self.pos + 1
439 while self.pos < len(self.field):
440 self.gotonext()
441 if self.field[self.pos] == ';':
442 self.pos = self.pos + 1
443 break
444 returnlist = returnlist + self.getaddress()
445
446 elif self.field[self.pos] == '<':
447 # Address is a phrase then a route addr
448 routeaddr = self.getrouteaddr()
449
450 if self.commentlist:
451 returnlist = [(string.join(plist) + ' (' + \
452 string.join(self.commentlist) + ')', routeaddr)]
453 else: returnlist = [(string.join(plist), routeaddr)]
454
455 else:
456 if plist:
457 returnlist = [(string.join(self.commentlist), plist[0])]
458
459 self.gotonext()
460 if self.pos < len(self.field) and self.field[self.pos] == ',':
461 self.pos = self.pos + 1
462 return returnlist
463
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000464 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000465 """Parse a route address (Return-path value).
466
467 This method just skips all the route stuff and returns the addrspec.
468 """
469 if self.field[self.pos] != '<':
470 return
471
472 expectroute = 0
473 self.pos = self.pos + 1
474 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000475 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000476 while self.pos < len(self.field):
477 if expectroute:
478 self.getdomain()
479 expectroute = 0
480 elif self.field[self.pos] == '>':
481 self.pos = self.pos + 1
482 break
483 elif self.field[self.pos] == '@':
484 self.pos = self.pos + 1
485 expectroute = 1
486 elif self.field[self.pos] == ':':
487 self.pos = self.pos + 1
488 expectaddrspec = 1
489 else:
490 adlist = self.getaddrspec()
491 self.pos = self.pos + 1
492 break
493 self.gotonext()
494
495 return adlist
496
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000497 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000498 """Parse an RFC-822 addr-spec."""
499 aslist = []
500
501 self.gotonext()
502 while self.pos < len(self.field):
503 if self.field[self.pos] == '.':
504 aslist.append('.')
505 self.pos = self.pos + 1
506 elif self.field[self.pos] == '"':
507 aslist.append(self.getquote())
508 elif self.field[self.pos] in self.atomends:
509 break
510 else: aslist.append(self.getatom())
511 self.gotonext()
512
513 if self.pos >= len(self.field) or self.field[self.pos] != '@':
514 return string.join(aslist, '')
515
516 aslist.append('@')
517 self.pos = self.pos + 1
518 self.gotonext()
519 return string.join(aslist, '') + self.getdomain()
520
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000521 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000522 """Get the complete domain name from an address."""
523 sdlist = []
524 while self.pos < len(self.field):
525 if self.field[self.pos] in self.LWS:
526 self.pos = self.pos + 1
527 elif self.field[self.pos] == '(':
528 self.commentlist.append(self.getcomment())
529 elif self.field[self.pos] == '[':
530 sdlist.append(self.getdomainliteral())
531 elif self.field[self.pos] == '.':
532 self.pos = self.pos + 1
533 sdlist.append('.')
534 elif self.field[self.pos] in self.atomends:
535 break
536 else: sdlist.append(self.getatom())
537
538 return string.join(sdlist, '')
539
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000540 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000541 """Parse a header fragment delimited by special characters.
542
543 `beginchar' is the start character for the fragment.
544 If self is not looking at an instance of `beginchar' then
545 getdelimited returns the empty string.
546
547 `endchars' is a sequence of allowable end-delimiting characters.
548 Parsing stops when one of these is encountered.
549
550 If `allowcomments' is non-zero, embedded RFC-822 comments
551 are allowed within the parsed fragment.
552 """
553 if self.field[self.pos] != beginchar:
554 return ''
555
556 slist = ['']
557 quote = 0
558 self.pos = self.pos + 1
559 while self.pos < len(self.field):
560 if quote == 1:
561 slist.append(self.field[self.pos])
562 quote = 0
563 elif self.field[self.pos] in endchars:
564 self.pos = self.pos + 1
565 break
566 elif allowcomments and self.field[self.pos] == '(':
567 slist.append(self.getcomment())
568 elif self.field[self.pos] == '\\':
569 quote = 1
570 else:
571 slist.append(self.field[self.pos])
572 self.pos = self.pos + 1
573
574 return string.join(slist, '')
575
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000576 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000577 """Get a quote-delimited fragment from self's field."""
578 return self.getdelimited('"', '"\r', 0)
579
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000580 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000581 """Get a parenthesis-delimited fragment from self's field."""
582 return self.getdelimited('(', ')\r', 1)
583
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000584 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000585 """Parse an RFC-822 domain-literal."""
586 return self.getdelimited('[', ']\r', 0)
587
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000588 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000589 """Parse an RFC-822 atom."""
590 atomlist = ['']
591
592 while self.pos < len(self.field):
593 if self.field[self.pos] in self.atomends:
594 break
595 else: atomlist.append(self.field[self.pos])
596 self.pos = self.pos + 1
597
598 return string.join(atomlist, '')
599
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000600 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000601 """Parse a sequence of RFC-822 phrases.
602
603 A phrase is a sequence of words, which are in turn either
604 RFC-822 atoms or quoted-strings.
605 """
606 plist = []
607
608 while self.pos < len(self.field):
609 if self.field[self.pos] in self.LWS:
610 self.pos = self.pos + 1
611 elif self.field[self.pos] == '"':
612 plist.append(self.getquote())
613 elif self.field[self.pos] == '(':
614 self.commentlist.append(self.getcomment())
615 elif self.field[self.pos] in self.atomends:
616 break
617 else: plist.append(self.getatom())
618
619 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000620
621
622# Parse a date field
623
624_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000625 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum9a876a41997-07-25 15:20:52 +0000626_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000627
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000628# The timezone table does not include the military time zones defined
629# in RFC822, other than Z. According to RFC1123, the description in
630# RFC822 gets the signs wrong, so we can't rely on any such time
631# zones. RFC1123 recommends that numeric timezone indicators be used
632# instead of timezone names.
633
634_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000635 'AST': -400, 'ADT': -300, # Atlantic standard
636 'EST': -500, 'EDT': -400, # Eastern
637 'CST': -600, 'CDT':-500, # Centreal
638 'MST':-700, 'MDT':-600, # Mountain
639 'PST':-800, 'PDT':-700 # Pacific
640 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000641
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000642
643def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000644 """Convert a date string to a time tuple.
645
646 Accounts for military timezones.
647 """
648 data = string.split(data)
649 if data[0][-1] == ',' or data[0] in _daynames:
650 # There's a dayname here. Skip it
651 del data[0]
652 if len(data) == 3: # RFC 850 date, deprecated
653 stuff = string.split(data[0], '-')
654 if len(stuff) == 3:
655 data = stuff + data[1:]
656 if len(data) == 4:
657 s = data[3]
658 i = string.find(s, '+')
659 if i > 0:
660 data[3:] = [s[:i], s[i+1:]]
661 else:
662 data.append('') # Dummy tz
663 if len(data) < 5:
664 return None
665 data = data[:5]
666 [dd, mm, yy, tm, tz] = data
667 if not mm in _monthnames:
668 dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
669 if not mm in _monthnames:
670 return None
671 mm = _monthnames.index(mm)+1
672 tm = string.splitfields(tm, ':')
673 if len(tm) == 2:
674 [thh, tmm] = tm
675 tss = '0'
676 else:
677 [thh, tmm, tss] = tm
678 try:
679 yy = string.atoi(yy)
680 dd = string.atoi(dd)
681 thh = string.atoi(thh)
682 tmm = string.atoi(tmm)
683 tss = string.atoi(tss)
684 except string.atoi_error:
685 return None
Guido van Rossuma73033f1998-02-19 00:28:58 +0000686 tzoffset=None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000687 tz=string.upper(tz)
688 if _timezones.has_key(tz):
689 tzoffset=_timezones[tz]
690 else:
691 try:
692 tzoffset=string.atoi(tz)
693 except string.atoi_error:
694 pass
695 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000696 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000697 if tzoffset < 0:
698 tzsign = -1
699 tzoffset = -tzoffset
700 else:
701 tzsign = 1
702 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000703 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
704 return tuple
705
Guido van Rossumb6775db1994-08-01 11:34:53 +0000706
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000707def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000708 """Convert a time string to a time tuple."""
709 t=parsedate_tz(data)
710 if type(t)==type( () ):
711 return t[:9]
712 else: return t
713
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000714
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000715def mktime_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000716 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
717
718 Minor glitch: this first interprets the first 8 elements as a
719 local time and then compensates for the timezone difference;
720 this may yield a slight error around daylight savings time
721 switch dates. Not enough to worry about for common use.
722
723 """
Guido van Rossuma73033f1998-02-19 00:28:58 +0000724 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000725 # No zone info, so localtime is better assumption than GMT
726 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000727 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000728 t = time.mktime(data[:8] + (0,))
729 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000730
Guido van Rossumb6775db1994-08-01 11:34:53 +0000731
732# When used as script, run a small test program.
733# The first command line argument must be a filename containing one
734# message in RFC-822 format.
735
736if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000737 import sys, os
738 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
739 if sys.argv[1:]: file = sys.argv[1]
740 f = open(file, 'r')
741 m = Message(f)
742 print 'From:', m.getaddr('from')
743 print 'To:', m.getaddrlist('to')
744 print 'Subject:', m.getheader('subject')
745 print 'Date:', m.getheader('date')
746 date = m.getdate_tz('date')
747 if date:
748 print 'ParsedDate:', time.asctime(date[:-1]),
749 hhmmss = date[-1]
750 hhmm, ss = divmod(hhmmss, 60)
751 hh, mm = divmod(hhmm, 60)
752 print "%+03d%02d" % (hh, mm),
753 if ss: print ".%02d" % ss,
754 print
755 else:
756 print 'ParsedDate:', None
757 m.rewindbody()
758 n = 0
759 while f.readline():
760 n = n + 1
761 print 'Lines:', n
762 print '-'*70
763 print 'len =', len(m)
764 if m.has_key('Date'): print 'Date =', m['Date']
765 if m.has_key('X-Nonsense'): pass
766 print 'keys =', m.keys()
767 print 'values =', m.values()
768 print 'items =', m.items()