blob: 967a85efd2d3956693dcd578f1874eac52967dfd [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
11(or use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen()).
13Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
16To get the text of a particular header there are several methods:
17 str = m.getheader(name)
18 str = m.getrawheader(name)
19where name is the name of the header, e.g. 'Subject'.
20The difference is that getheader() strips the leading and trailing
21whitespace, while getrawheader() doesn't. Both functions retain
22embedded whitespace (including newlines) exactly as they are
23specified in the header, and leave the case of the text unchanged.
24
25For addresses and address lists there are functions
26 realname, mailaddress = m.getaddr(name) and
27 list = m.getaddrlist(name)
28where the latter returns a list of (realname, mailaddr) tuples.
29
30There is also a method
31 time = m.getdate(name)
32which parses a Date-like field and returns a time-compatible tuple,
33i.e. a tuple such as returned by time.localtime() or accepted by
34time.mktime().
35
36See the class definition for lower level access methods.
37
38There are also some utility functions here.
39"""
Guido van Rossum01ca3361992-07-13 14:28:59 +000040
Guido van Rossum01ca3361992-07-13 14:28:59 +000041import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000042import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000043
44
Guido van Rossum9ab94c11997-12-10 16:17:39 +000045_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000046
47
Guido van Rossum01ca3361992-07-13 14:28:59 +000048class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000049 """Represents a single RFC-822-compliant message."""
50
51 def __init__(self, fp, seekable = 1):
52 """Initialize the class instance and read the headers."""
53 self.fp = fp
54 self.seekable = seekable
55 self.startofheaders = None
56 self.startofbody = None
57 #
58 if self.seekable:
59 try:
60 self.startofheaders = self.fp.tell()
61 except IOError:
62 self.seekable = 0
63 #
64 self.readheaders()
65 #
66 if self.seekable:
67 try:
68 self.startofbody = self.fp.tell()
69 except IOError:
70 self.seekable = 0
71
72 def rewindbody(self):
73 """Rewind the file to the start of the body (if seekable)."""
74 if not self.seekable:
75 raise IOError, "unseekable file"
76 self.fp.seek(self.startofbody)
77
78 def readheaders(self):
79 """Read header lines.
80
81 Read header lines up to the entirely blank line that
82 terminates them. The (normally blank) line that ends the
83 headers is skipped, but not included in the returned list.
84 If a non-header line ends the headers, (which is an error),
85 an attempt is made to backspace over it; it is never
86 included in the returned list.
87
88 The variable self.status is set to the empty string if all
89 went well, otherwise it is an error message.
90 The variable self.headers is a completely uninterpreted list
91 of lines contained in the header (so printing them will
92 reproduce the header exactly as it appears in the file).
93 """
94 self.dict = {}
95 self.unixfrom = ''
96 self.headers = list = []
97 self.status = ''
98 headerseen = ""
99 firstline = 1
100 while 1:
101 line = self.fp.readline()
102 if not line:
103 self.status = 'EOF in headers'
104 break
105 # Skip unix From name time lines
106 if firstline and line[:5] == 'From ':
107 self.unixfrom = self.unixfrom + line
108 continue
109 firstline = 0
110 if self.islast(line):
111 break
112 elif headerseen and line[0] in ' \t':
113 # It's a continuation line.
114 list.append(line)
115 x = (self.dict[headerseen] + "\n " +
116 string.strip(line))
117 self.dict[headerseen] = string.strip(x)
118 elif ':' in line:
119 # It's a header line.
120 list.append(line)
121 i = string.find(line, ':')
122 headerseen = string.lower(line[:i])
123 self.dict[headerseen] = string.strip(
124 line[i+1:])
125 else:
126 # It's not a header line; stop here.
127 if not headerseen:
128 self.status = 'No headers'
129 else:
130 self.status = 'Bad header'
131 # Try to undo the read.
132 if self.seekable:
133 self.fp.seek(-len(line), 1)
134 else:
135 self.status = \
136 self.status + '; bad seek'
137 break
138
139 def islast(self, line):
140 """Determine whether a line is a legal end of RFC-822 headers.
141
142 You may override this method if your application wants
143 to bend the rules, e.g. to strip trailing whitespace,
144 or to recognise MH template separators ('--------').
145 For convenience (e.g. for code reading from sockets) a
146 line consisting of \r\n also matches.
147 """
148 return line in _blanklines
149
150 def getallmatchingheaders(self, name):
151 """Find all header lines matching a given header name.
152
153 Look through the list of headers and find all lines
154 matching a given header name (and their continuation
155 lines). A list of the lines is returned, without
156 interpretation. If the header does not occur, an
157 empty list is returned. If the header occurs multiple
158 times, all occurrences are returned. Case is not
159 important in the header name.
160 """
161 name = string.lower(name) + ':'
162 n = len(name)
163 list = []
164 hit = 0
165 for line in self.headers:
166 if string.lower(line[:n]) == name:
167 hit = 1
168 elif line[:1] not in string.whitespace:
169 hit = 0
170 if hit:
171 list.append(line)
172 return list
173
174 def getfirstmatchingheader(self, name):
175 """Get the first header line matching name.
176
177 This is similar to getallmatchingheaders, but it returns
178 only the first matching header (and its continuation
179 lines).
180 """
181 name = string.lower(name) + ':'
182 n = len(name)
183 list = []
184 hit = 0
185 for line in self.headers:
186 if hit:
187 if line[:1] not in string.whitespace:
188 break
189 elif string.lower(line[:n]) == name:
190 hit = 1
191 if hit:
192 list.append(line)
193 return list
194
195 def getrawheader(self, name):
196 """A higher-level interface to getfirstmatchingheader().
197
198 Return a string containing the literal text of the
199 header but with the keyword stripped. All leading,
200 trailing and embedded whitespace is kept in the
201 string, however.
202 Return None if the header does not occur.
203 """
204
205 list = self.getfirstmatchingheader(name)
206 if not list:
207 return None
208 list[0] = list[0][len(name) + 1:]
209 return string.joinfields(list, '')
210
211 def getheader(self, name):
212 """Get the header value for a name.
213
214 This is the normal interface: it return a stripped
215 version of the header value for a given header name,
216 or None if it doesn't exist. This uses the dictionary
217 version which finds the *last* such header.
218 """
219 try:
220 return self.dict[string.lower(name)]
221 except KeyError:
222 return None
223
224 def getaddr(self, name):
225 """Get a single address from a header, as a tuple.
226
227 An example return value:
228 ('Guido van Rossum', 'guido@cwi.nl')
229 """
230 # New, by Ben Escoto
231 alist = self.getaddrlist(name)
232 if alist:
233 return alist[0]
234 else:
235 return (None, None)
236
237 def getaddrlist(self, name):
238 """Get a list of addresses from a header.
239
240 Retrieves a list of addresses from a header, where each
241 address is a tuple as returned by getaddr().
242 """
243 # New, by Ben Escoto
244 try:
245 data = self[name]
246 except KeyError:
247 return []
248 a = AddrlistClass(data)
249 return a.getaddrlist()
250
251 def getdate(self, name):
252 """Retrieve a date field from a header.
253
254 Retrieves a date field from the named header, returning
255 a tuple compatible with time.mktime().
256 """
257 try:
258 data = self[name]
259 except KeyError:
260 return None
261 return parsedate(data)
262
263 def getdate_tz(self, name):
264 """Retrieve a date field from a header as a 10-tuple.
265
266 The first 9 elements make up a tuple compatible with
267 time.mktime(), and the 10th is the offset of the poster's
268 time zone from GMT/UTC.
269 """
270 try:
271 data = self[name]
272 except KeyError:
273 return None
274 return parsedate_tz(data)
275
276
277 # Access as a dictionary (only finds *last* header of each type):
278
279 def __len__(self):
280 """Get the number of headers in a message."""
281 return len(self.dict)
282
283 def __getitem__(self, name):
284 """Get a specific header, as from a dictionary."""
285 return self.dict[string.lower(name)]
286
287 def has_key(self, name):
288 """Determine whether a message contains the named header."""
289 return self.dict.has_key(string.lower(name))
290
291 def keys(self):
292 """Get all of a message's header field names."""
293 return self.dict.keys()
294
295 def values(self):
296 """Get all of a message's header field values."""
297 return self.dict.values()
298
299 def items(self):
300 """Get all of a message's headers.
301
302 Returns a list of name, value tuples.
303 """
304 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000305
306
307
308# Utility functions
309# -----------------
310
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000311# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000312# XXX The inverses of the parse functions may also be useful.
313
Guido van Rossum01ca3361992-07-13 14:28:59 +0000314
Guido van Rossum01ca3361992-07-13 14:28:59 +0000315def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 """Remove quotes from a string."""
317 if len(str) > 1:
318 if str[0] == '"' and str[-1:] == '"':
319 return str[1:-1]
320 if str[0] == '<' and str[-1:] == '>':
321 return str[1:-1]
322 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000323
324
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000325def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000326 """Add quotes around a string."""
327 return '"%s"' % string.join(
328 string.split(
329 string.join(
330 string.split(str, '\\'),
331 '\\\\'),
332 '"'),
333 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000334
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000335
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000337 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000338 a = AddrlistClass(address)
339 list = a.getaddrlist()
340 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000341 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000342 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000343 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000344
345
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000346class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000347 """Address parser class by Ben Escoto.
348
349 To understand what this class does, it helps to have a copy of
350 RFC-822 in front of you.
351 """
352
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000353 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000354 """Initialize a new instance.
355
356 `field' is an unparsed address header field, containing
357 one or more addresses.
358 """
359 self.specials = '()<>@,:;.\"[]'
360 self.pos = 0
361 self.LWS = ' \t'
362 self.CR = '\r'
363 self.atomends = self.specials + self.LWS + self.CR
364
365 self.field = field
366 self.commentlist = []
367
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000368 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000369 """Parse up to the start of the next address."""
370 while self.pos < len(self.field):
371 if self.field[self.pos] in self.LWS + '\n\r':
372 self.pos = self.pos + 1
373 elif self.field[self.pos] == '(':
374 self.commentlist.append(self.getcomment())
375 else: break
376
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000377 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 """Parse all addresses.
379
380 Returns a list containing all of the addresses.
381 """
382 ad = self.getaddress()
383 if ad:
384 return ad + self.getaddrlist()
385 else: return []
386
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000387 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000388 """Parse the next address."""
389 self.commentlist = []
390 self.gotonext()
391
392 oldpos = self.pos
393 oldcl = self.commentlist
394 plist = self.getphraselist()
395
396 self.gotonext()
397 returnlist = []
398
399 if self.pos >= len(self.field):
400 # Bad email address technically, no domain.
401 if plist:
402 returnlist = [(string.join(self.commentlist), plist[0])]
403
404 elif self.field[self.pos] in '.@':
405 # email address is just an addrspec
406 # this isn't very efficient since we start over
407 self.pos = oldpos
408 self.commentlist = oldcl
409 addrspec = self.getaddrspec()
410 returnlist = [(string.join(self.commentlist), addrspec)]
411
412 elif self.field[self.pos] == ':':
413 # address is a group
414 returnlist = []
415
416 self.pos = self.pos + 1
417 while self.pos < len(self.field):
418 self.gotonext()
419 if self.field[self.pos] == ';':
420 self.pos = self.pos + 1
421 break
422 returnlist = returnlist + self.getaddress()
423
424 elif self.field[self.pos] == '<':
425 # Address is a phrase then a route addr
426 routeaddr = self.getrouteaddr()
427
428 if self.commentlist:
429 returnlist = [(string.join(plist) + ' (' + \
430 string.join(self.commentlist) + ')', routeaddr)]
431 else: returnlist = [(string.join(plist), routeaddr)]
432
433 else:
434 if plist:
435 returnlist = [(string.join(self.commentlist), plist[0])]
436
437 self.gotonext()
438 if self.pos < len(self.field) and self.field[self.pos] == ',':
439 self.pos = self.pos + 1
440 return returnlist
441
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000442 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000443 """Parse a route address (Return-path value).
444
445 This method just skips all the route stuff and returns the addrspec.
446 """
447 if self.field[self.pos] != '<':
448 return
449
450 expectroute = 0
451 self.pos = self.pos + 1
452 self.gotonext()
453 while self.pos < len(self.field):
454 if expectroute:
455 self.getdomain()
456 expectroute = 0
457 elif self.field[self.pos] == '>':
458 self.pos = self.pos + 1
459 break
460 elif self.field[self.pos] == '@':
461 self.pos = self.pos + 1
462 expectroute = 1
463 elif self.field[self.pos] == ':':
464 self.pos = self.pos + 1
465 expectaddrspec = 1
466 else:
467 adlist = self.getaddrspec()
468 self.pos = self.pos + 1
469 break
470 self.gotonext()
471
472 return adlist
473
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000474 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000475 """Parse an RFC-822 addr-spec."""
476 aslist = []
477
478 self.gotonext()
479 while self.pos < len(self.field):
480 if self.field[self.pos] == '.':
481 aslist.append('.')
482 self.pos = self.pos + 1
483 elif self.field[self.pos] == '"':
484 aslist.append(self.getquote())
485 elif self.field[self.pos] in self.atomends:
486 break
487 else: aslist.append(self.getatom())
488 self.gotonext()
489
490 if self.pos >= len(self.field) or self.field[self.pos] != '@':
491 return string.join(aslist, '')
492
493 aslist.append('@')
494 self.pos = self.pos + 1
495 self.gotonext()
496 return string.join(aslist, '') + self.getdomain()
497
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000498 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000499 """Get the complete domain name from an address."""
500 sdlist = []
501 while self.pos < len(self.field):
502 if self.field[self.pos] in self.LWS:
503 self.pos = self.pos + 1
504 elif self.field[self.pos] == '(':
505 self.commentlist.append(self.getcomment())
506 elif self.field[self.pos] == '[':
507 sdlist.append(self.getdomainliteral())
508 elif self.field[self.pos] == '.':
509 self.pos = self.pos + 1
510 sdlist.append('.')
511 elif self.field[self.pos] in self.atomends:
512 break
513 else: sdlist.append(self.getatom())
514
515 return string.join(sdlist, '')
516
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000517 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000518 """Parse a header fragment delimited by special characters.
519
520 `beginchar' is the start character for the fragment.
521 If self is not looking at an instance of `beginchar' then
522 getdelimited returns the empty string.
523
524 `endchars' is a sequence of allowable end-delimiting characters.
525 Parsing stops when one of these is encountered.
526
527 If `allowcomments' is non-zero, embedded RFC-822 comments
528 are allowed within the parsed fragment.
529 """
530 if self.field[self.pos] != beginchar:
531 return ''
532
533 slist = ['']
534 quote = 0
535 self.pos = self.pos + 1
536 while self.pos < len(self.field):
537 if quote == 1:
538 slist.append(self.field[self.pos])
539 quote = 0
540 elif self.field[self.pos] in endchars:
541 self.pos = self.pos + 1
542 break
543 elif allowcomments and self.field[self.pos] == '(':
544 slist.append(self.getcomment())
545 elif self.field[self.pos] == '\\':
546 quote = 1
547 else:
548 slist.append(self.field[self.pos])
549 self.pos = self.pos + 1
550
551 return string.join(slist, '')
552
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000553 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000554 """Get a quote-delimited fragment from self's field."""
555 return self.getdelimited('"', '"\r', 0)
556
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000557 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000558 """Get a parenthesis-delimited fragment from self's field."""
559 return self.getdelimited('(', ')\r', 1)
560
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000561 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000562 """Parse an RFC-822 domain-literal."""
563 return self.getdelimited('[', ']\r', 0)
564
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000565 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000566 """Parse an RFC-822 atom."""
567 atomlist = ['']
568
569 while self.pos < len(self.field):
570 if self.field[self.pos] in self.atomends:
571 break
572 else: atomlist.append(self.field[self.pos])
573 self.pos = self.pos + 1
574
575 return string.join(atomlist, '')
576
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000577 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000578 """Parse a sequence of RFC-822 phrases.
579
580 A phrase is a sequence of words, which are in turn either
581 RFC-822 atoms or quoted-strings.
582 """
583 plist = []
584
585 while self.pos < len(self.field):
586 if self.field[self.pos] in self.LWS:
587 self.pos = self.pos + 1
588 elif self.field[self.pos] == '"':
589 plist.append(self.getquote())
590 elif self.field[self.pos] == '(':
591 self.commentlist.append(self.getcomment())
592 elif self.field[self.pos] in self.atomends:
593 break
594 else: plist.append(self.getatom())
595
596 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000597
598
599# Parse a date field
600
601_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000602 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum9a876a41997-07-25 15:20:52 +0000603_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000604
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000605# The timezone table does not include the military time zones defined
606# in RFC822, other than Z. According to RFC1123, the description in
607# RFC822 gets the signs wrong, so we can't rely on any such time
608# zones. RFC1123 recommends that numeric timezone indicators be used
609# instead of timezone names.
610
611_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
612 'AST': -400, 'ADT': -300, # Atlantic standard
613 'EST': -500, 'EDT': -400, # Eastern
614 'CST': -600, 'CDT':-500, # Centreal
615 'MST':-700, 'MDT':-600, # Mountain
616 'PST':-800, 'PDT':-700 # Pacific
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000617 }
618
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000619
620def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000621 """Convert a date string to a time tuple.
622
623 Accounts for military timezones.
624 """
625 data = string.split(data)
626 if data[0][-1] == ',' or data[0] in _daynames:
627 # There's a dayname here. Skip it
628 del data[0]
629 if len(data) == 3: # RFC 850 date, deprecated
630 stuff = string.split(data[0], '-')
631 if len(stuff) == 3:
632 data = stuff + data[1:]
633 if len(data) == 4:
634 s = data[3]
635 i = string.find(s, '+')
636 if i > 0:
637 data[3:] = [s[:i], s[i+1:]]
638 else:
639 data.append('') # Dummy tz
640 if len(data) < 5:
641 return None
642 data = data[:5]
643 [dd, mm, yy, tm, tz] = data
644 if not mm in _monthnames:
645 dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
646 if not mm in _monthnames:
647 return None
648 mm = _monthnames.index(mm)+1
649 tm = string.splitfields(tm, ':')
650 if len(tm) == 2:
651 [thh, tmm] = tm
652 tss = '0'
653 else:
654 [thh, tmm, tss] = tm
655 try:
656 yy = string.atoi(yy)
657 dd = string.atoi(dd)
658 thh = string.atoi(thh)
659 tmm = string.atoi(tmm)
660 tss = string.atoi(tss)
661 except string.atoi_error:
662 return None
Guido van Rossuma73033f1998-02-19 00:28:58 +0000663 tzoffset=None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000664 tz=string.upper(tz)
665 if _timezones.has_key(tz):
666 tzoffset=_timezones[tz]
667 else:
668 try:
669 tzoffset=string.atoi(tz)
670 except string.atoi_error:
671 pass
672 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000673 if tzoffset:
674 if tzoffset < 0:
675 tzsign = -1
676 tzoffset = -tzoffset
677 else:
678 tzsign = 1
679 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000680 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
681 return tuple
682
Guido van Rossumb6775db1994-08-01 11:34:53 +0000683
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000684def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000685 """Convert a time string to a time tuple."""
686 t=parsedate_tz(data)
687 if type(t)==type( () ):
688 return t[:9]
689 else: return t
690
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000691
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000692def mktime_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000693 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
694
695 Minor glitch: this first interprets the first 8 elements as a
696 local time and then compensates for the timezone difference;
697 this may yield a slight error around daylight savings time
698 switch dates. Not enough to worry about for common use.
699
700 """
Guido van Rossuma73033f1998-02-19 00:28:58 +0000701 if data[9] is None:
702 # No zone info, so localtime is better assumption than GMT
703 return time.mktime(data[:8] + (-1,))
704 else:
705 t = time.mktime(data[:8] + (0,))
706 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000707
Guido van Rossumb6775db1994-08-01 11:34:53 +0000708
709# When used as script, run a small test program.
710# The first command line argument must be a filename containing one
711# message in RFC-822 format.
712
713if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000714 import sys, os
715 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
716 if sys.argv[1:]: file = sys.argv[1]
717 f = open(file, 'r')
718 m = Message(f)
719 print 'From:', m.getaddr('from')
720 print 'To:', m.getaddrlist('to')
721 print 'Subject:', m.getheader('subject')
722 print 'Date:', m.getheader('date')
723 date = m.getdate_tz('date')
724 if date:
725 print 'ParsedDate:', time.asctime(date[:-1]),
726 hhmmss = date[-1]
727 hhmm, ss = divmod(hhmmss, 60)
728 hh, mm = divmod(hhmm, 60)
729 print "%+03d%02d" % (hh, mm),
730 if ss: print ".%02d" % ss,
731 print
732 else:
733 print 'ParsedDate:', None
734 m.rewindbody()
735 n = 0
736 while f.readline():
737 n = n + 1
738 print 'Lines:', n
739 print '-'*70
740 print 'len =', len(m)
741 if m.has_key('Date'): print 'Date =', m['Date']
742 if m.has_key('X-Nonsense'): pass
743 print 'keys =', m.keys()
744 print 'values =', m.values()
745 print 'items =', m.items()