blob: e6e0696d406f3aae970c59937ddb78aa982caa16 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
11(or use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen()).
13Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
16To get the text of a particular header there are several methods:
17 str = m.getheader(name)
18 str = m.getrawheader(name)
19where name is the name of the header, e.g. 'Subject'.
20The difference is that getheader() strips the leading and trailing
21whitespace, while getrawheader() doesn't. Both functions retain
22embedded whitespace (including newlines) exactly as they are
23specified in the header, and leave the case of the text unchanged.
24
25For addresses and address lists there are functions
26 realname, mailaddress = m.getaddr(name) and
27 list = m.getaddrlist(name)
28where the latter returns a list of (realname, mailaddr) tuples.
29
30There is also a method
31 time = m.getdate(name)
32which parses a Date-like field and returns a time-compatible tuple,
33i.e. a tuple such as returned by time.localtime() or accepted by
34time.mktime().
35
36See the class definition for lower level access methods.
37
38There are also some utility functions here.
39"""
Guido van Rossum01ca3361992-07-13 14:28:59 +000040
Guido van Rossum01ca3361992-07-13 14:28:59 +000041import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000042import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000043
44
Guido van Rossum9ab94c11997-12-10 16:17:39 +000045_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000046
47
Guido van Rossum01ca3361992-07-13 14:28:59 +000048class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000049 """Represents a single RFC-822-compliant message."""
50
51 def __init__(self, fp, seekable = 1):
52 """Initialize the class instance and read the headers."""
53 self.fp = fp
54 self.seekable = seekable
55 self.startofheaders = None
56 self.startofbody = None
57 #
58 if self.seekable:
59 try:
60 self.startofheaders = self.fp.tell()
61 except IOError:
62 self.seekable = 0
63 #
64 self.readheaders()
65 #
66 if self.seekable:
67 try:
68 self.startofbody = self.fp.tell()
69 except IOError:
70 self.seekable = 0
71
72 def rewindbody(self):
73 """Rewind the file to the start of the body (if seekable)."""
74 if not self.seekable:
75 raise IOError, "unseekable file"
76 self.fp.seek(self.startofbody)
77
78 def readheaders(self):
79 """Read header lines.
80
81 Read header lines up to the entirely blank line that
82 terminates them. The (normally blank) line that ends the
83 headers is skipped, but not included in the returned list.
84 If a non-header line ends the headers, (which is an error),
85 an attempt is made to backspace over it; it is never
86 included in the returned list.
87
88 The variable self.status is set to the empty string if all
89 went well, otherwise it is an error message.
90 The variable self.headers is a completely uninterpreted list
91 of lines contained in the header (so printing them will
92 reproduce the header exactly as it appears in the file).
93 """
94 self.dict = {}
95 self.unixfrom = ''
96 self.headers = list = []
97 self.status = ''
98 headerseen = ""
99 firstline = 1
100 while 1:
101 line = self.fp.readline()
102 if not line:
103 self.status = 'EOF in headers'
104 break
105 # Skip unix From name time lines
106 if firstline and line[:5] == 'From ':
107 self.unixfrom = self.unixfrom + line
108 continue
109 firstline = 0
110 if self.islast(line):
111 break
112 elif headerseen and line[0] in ' \t':
113 # It's a continuation line.
114 list.append(line)
115 x = (self.dict[headerseen] + "\n " +
116 string.strip(line))
117 self.dict[headerseen] = string.strip(x)
118 elif ':' in line:
119 # It's a header line.
120 list.append(line)
121 i = string.find(line, ':')
122 headerseen = string.lower(line[:i])
123 self.dict[headerseen] = string.strip(
124 line[i+1:])
125 else:
126 # It's not a header line; stop here.
127 if not headerseen:
128 self.status = 'No headers'
129 else:
130 self.status = 'Bad header'
131 # Try to undo the read.
132 if self.seekable:
133 self.fp.seek(-len(line), 1)
134 else:
135 self.status = \
136 self.status + '; bad seek'
137 break
138
139 def islast(self, line):
140 """Determine whether a line is a legal end of RFC-822 headers.
141
142 You may override this method if your application wants
143 to bend the rules, e.g. to strip trailing whitespace,
144 or to recognise MH template separators ('--------').
145 For convenience (e.g. for code reading from sockets) a
146 line consisting of \r\n also matches.
147 """
148 return line in _blanklines
149
150 def getallmatchingheaders(self, name):
151 """Find all header lines matching a given header name.
152
153 Look through the list of headers and find all lines
154 matching a given header name (and their continuation
155 lines). A list of the lines is returned, without
156 interpretation. If the header does not occur, an
157 empty list is returned. If the header occurs multiple
158 times, all occurrences are returned. Case is not
159 important in the header name.
160 """
161 name = string.lower(name) + ':'
162 n = len(name)
163 list = []
164 hit = 0
165 for line in self.headers:
166 if string.lower(line[:n]) == name:
167 hit = 1
168 elif line[:1] not in string.whitespace:
169 hit = 0
170 if hit:
171 list.append(line)
172 return list
173
174 def getfirstmatchingheader(self, name):
175 """Get the first header line matching name.
176
177 This is similar to getallmatchingheaders, but it returns
178 only the first matching header (and its continuation
179 lines).
180 """
181 name = string.lower(name) + ':'
182 n = len(name)
183 list = []
184 hit = 0
185 for line in self.headers:
186 if hit:
187 if line[:1] not in string.whitespace:
188 break
189 elif string.lower(line[:n]) == name:
190 hit = 1
191 if hit:
192 list.append(line)
193 return list
194
195 def getrawheader(self, name):
196 """A higher-level interface to getfirstmatchingheader().
197
198 Return a string containing the literal text of the
199 header but with the keyword stripped. All leading,
200 trailing and embedded whitespace is kept in the
201 string, however.
202 Return None if the header does not occur.
203 """
204
205 list = self.getfirstmatchingheader(name)
206 if not list:
207 return None
208 list[0] = list[0][len(name) + 1:]
209 return string.joinfields(list, '')
210
211 def getheader(self, name):
212 """Get the header value for a name.
213
214 This is the normal interface: it return a stripped
215 version of the header value for a given header name,
216 or None if it doesn't exist. This uses the dictionary
217 version which finds the *last* such header.
218 """
219 try:
220 return self.dict[string.lower(name)]
221 except KeyError:
222 return None
223
224 def getaddr(self, name):
225 """Get a single address from a header, as a tuple.
226
227 An example return value:
228 ('Guido van Rossum', 'guido@cwi.nl')
229 """
230 # New, by Ben Escoto
231 alist = self.getaddrlist(name)
232 if alist:
233 return alist[0]
234 else:
235 return (None, None)
236
237 def getaddrlist(self, name):
238 """Get a list of addresses from a header.
239
240 Retrieves a list of addresses from a header, where each
241 address is a tuple as returned by getaddr().
242 """
243 # New, by Ben Escoto
244 try:
245 data = self[name]
246 except KeyError:
247 return []
248 a = AddrlistClass(data)
249 return a.getaddrlist()
250
251 def getdate(self, name):
252 """Retrieve a date field from a header.
253
254 Retrieves a date field from the named header, returning
255 a tuple compatible with time.mktime().
256 """
257 try:
258 data = self[name]
259 except KeyError:
260 return None
261 return parsedate(data)
262
263 def getdate_tz(self, name):
264 """Retrieve a date field from a header as a 10-tuple.
265
266 The first 9 elements make up a tuple compatible with
267 time.mktime(), and the 10th is the offset of the poster's
268 time zone from GMT/UTC.
269 """
270 try:
271 data = self[name]
272 except KeyError:
273 return None
274 return parsedate_tz(data)
275
276
277 # Access as a dictionary (only finds *last* header of each type):
278
279 def __len__(self):
280 """Get the number of headers in a message."""
281 return len(self.dict)
282
283 def __getitem__(self, name):
284 """Get a specific header, as from a dictionary."""
285 return self.dict[string.lower(name)]
286
287 def has_key(self, name):
288 """Determine whether a message contains the named header."""
289 return self.dict.has_key(string.lower(name))
290
291 def keys(self):
292 """Get all of a message's header field names."""
293 return self.dict.keys()
294
295 def values(self):
296 """Get all of a message's header field values."""
297 return self.dict.values()
298
299 def items(self):
300 """Get all of a message's headers.
301
302 Returns a list of name, value tuples.
303 """
304 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000305
306
307
308# Utility functions
309# -----------------
310
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000311# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000312# XXX The inverses of the parse functions may also be useful.
313
Guido van Rossum01ca3361992-07-13 14:28:59 +0000314
Guido van Rossum01ca3361992-07-13 14:28:59 +0000315def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 """Remove quotes from a string."""
317 if len(str) > 1:
318 if str[0] == '"' and str[-1:] == '"':
319 return str[1:-1]
320 if str[0] == '<' and str[-1:] == '>':
321 return str[1:-1]
322 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000323
324
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000325def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000326 """Add quotes around a string."""
327 return '"%s"' % string.join(
328 string.split(
329 string.join(
330 string.split(str, '\\'),
331 '\\\\'),
332 '"'),
333 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000334
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000335
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000337 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000338 a = AddrlistClass(address)
339 list = a.getaddrlist()
340 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000341 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000342 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000343 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000344
345
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000346class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000347 """Address parser class by Ben Escoto.
348
349 To understand what this class does, it helps to have a copy of
350 RFC-822 in front of you.
351 """
352
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000353 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000354 """Initialize a new instance.
355
356 `field' is an unparsed address header field, containing
357 one or more addresses.
358 """
359 self.specials = '()<>@,:;.\"[]'
360 self.pos = 0
361 self.LWS = ' \t'
362 self.CR = '\r'
363 self.atomends = self.specials + self.LWS + self.CR
364
365 self.field = field
366 self.commentlist = []
367
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000368 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000369 """Parse up to the start of the next address."""
370 while self.pos < len(self.field):
371 if self.field[self.pos] in self.LWS + '\n\r':
372 self.pos = self.pos + 1
373 elif self.field[self.pos] == '(':
374 self.commentlist.append(self.getcomment())
375 else: break
376
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000377 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 """Parse all addresses.
379
380 Returns a list containing all of the addresses.
381 """
382 ad = self.getaddress()
383 if ad:
384 return ad + self.getaddrlist()
385 else: return []
386
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000387 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000388 """Parse the next address."""
389 self.commentlist = []
390 self.gotonext()
391
392 oldpos = self.pos
393 oldcl = self.commentlist
394 plist = self.getphraselist()
395
396 self.gotonext()
397 returnlist = []
398
399 if self.pos >= len(self.field):
400 # Bad email address technically, no domain.
401 if plist:
402 returnlist = [(string.join(self.commentlist), plist[0])]
403
404 elif self.field[self.pos] in '.@':
405 # email address is just an addrspec
406 # this isn't very efficient since we start over
407 self.pos = oldpos
408 self.commentlist = oldcl
409 addrspec = self.getaddrspec()
410 returnlist = [(string.join(self.commentlist), addrspec)]
411
412 elif self.field[self.pos] == ':':
413 # address is a group
414 returnlist = []
415
416 self.pos = self.pos + 1
417 while self.pos < len(self.field):
418 self.gotonext()
419 if self.field[self.pos] == ';':
420 self.pos = self.pos + 1
421 break
422 returnlist = returnlist + self.getaddress()
423
424 elif self.field[self.pos] == '<':
425 # Address is a phrase then a route addr
426 routeaddr = self.getrouteaddr()
427
428 if self.commentlist:
429 returnlist = [(string.join(plist) + ' (' + \
430 string.join(self.commentlist) + ')', routeaddr)]
431 else: returnlist = [(string.join(plist), routeaddr)]
432
433 else:
434 if plist:
435 returnlist = [(string.join(self.commentlist), plist[0])]
436
437 self.gotonext()
438 if self.pos < len(self.field) and self.field[self.pos] == ',':
439 self.pos = self.pos + 1
440 return returnlist
441
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000442 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000443 """Parse a route address (Return-path value).
444
445 This method just skips all the route stuff and returns the addrspec.
446 """
447 if self.field[self.pos] != '<':
448 return
449
450 expectroute = 0
451 self.pos = self.pos + 1
452 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000453 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000454 while self.pos < len(self.field):
455 if expectroute:
456 self.getdomain()
457 expectroute = 0
458 elif self.field[self.pos] == '>':
459 self.pos = self.pos + 1
460 break
461 elif self.field[self.pos] == '@':
462 self.pos = self.pos + 1
463 expectroute = 1
464 elif self.field[self.pos] == ':':
465 self.pos = self.pos + 1
466 expectaddrspec = 1
467 else:
468 adlist = self.getaddrspec()
469 self.pos = self.pos + 1
470 break
471 self.gotonext()
472
473 return adlist
474
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000475 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000476 """Parse an RFC-822 addr-spec."""
477 aslist = []
478
479 self.gotonext()
480 while self.pos < len(self.field):
481 if self.field[self.pos] == '.':
482 aslist.append('.')
483 self.pos = self.pos + 1
484 elif self.field[self.pos] == '"':
485 aslist.append(self.getquote())
486 elif self.field[self.pos] in self.atomends:
487 break
488 else: aslist.append(self.getatom())
489 self.gotonext()
490
491 if self.pos >= len(self.field) or self.field[self.pos] != '@':
492 return string.join(aslist, '')
493
494 aslist.append('@')
495 self.pos = self.pos + 1
496 self.gotonext()
497 return string.join(aslist, '') + self.getdomain()
498
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000499 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000500 """Get the complete domain name from an address."""
501 sdlist = []
502 while self.pos < len(self.field):
503 if self.field[self.pos] in self.LWS:
504 self.pos = self.pos + 1
505 elif self.field[self.pos] == '(':
506 self.commentlist.append(self.getcomment())
507 elif self.field[self.pos] == '[':
508 sdlist.append(self.getdomainliteral())
509 elif self.field[self.pos] == '.':
510 self.pos = self.pos + 1
511 sdlist.append('.')
512 elif self.field[self.pos] in self.atomends:
513 break
514 else: sdlist.append(self.getatom())
515
516 return string.join(sdlist, '')
517
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000518 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000519 """Parse a header fragment delimited by special characters.
520
521 `beginchar' is the start character for the fragment.
522 If self is not looking at an instance of `beginchar' then
523 getdelimited returns the empty string.
524
525 `endchars' is a sequence of allowable end-delimiting characters.
526 Parsing stops when one of these is encountered.
527
528 If `allowcomments' is non-zero, embedded RFC-822 comments
529 are allowed within the parsed fragment.
530 """
531 if self.field[self.pos] != beginchar:
532 return ''
533
534 slist = ['']
535 quote = 0
536 self.pos = self.pos + 1
537 while self.pos < len(self.field):
538 if quote == 1:
539 slist.append(self.field[self.pos])
540 quote = 0
541 elif self.field[self.pos] in endchars:
542 self.pos = self.pos + 1
543 break
544 elif allowcomments and self.field[self.pos] == '(':
545 slist.append(self.getcomment())
546 elif self.field[self.pos] == '\\':
547 quote = 1
548 else:
549 slist.append(self.field[self.pos])
550 self.pos = self.pos + 1
551
552 return string.join(slist, '')
553
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000554 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000555 """Get a quote-delimited fragment from self's field."""
556 return self.getdelimited('"', '"\r', 0)
557
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000558 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000559 """Get a parenthesis-delimited fragment from self's field."""
560 return self.getdelimited('(', ')\r', 1)
561
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000562 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000563 """Parse an RFC-822 domain-literal."""
564 return self.getdelimited('[', ']\r', 0)
565
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000566 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000567 """Parse an RFC-822 atom."""
568 atomlist = ['']
569
570 while self.pos < len(self.field):
571 if self.field[self.pos] in self.atomends:
572 break
573 else: atomlist.append(self.field[self.pos])
574 self.pos = self.pos + 1
575
576 return string.join(atomlist, '')
577
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000578 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000579 """Parse a sequence of RFC-822 phrases.
580
581 A phrase is a sequence of words, which are in turn either
582 RFC-822 atoms or quoted-strings.
583 """
584 plist = []
585
586 while self.pos < len(self.field):
587 if self.field[self.pos] in self.LWS:
588 self.pos = self.pos + 1
589 elif self.field[self.pos] == '"':
590 plist.append(self.getquote())
591 elif self.field[self.pos] == '(':
592 self.commentlist.append(self.getcomment())
593 elif self.field[self.pos] in self.atomends:
594 break
595 else: plist.append(self.getatom())
596
597 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000598
599
600# Parse a date field
601
602_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000603 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum9a876a41997-07-25 15:20:52 +0000604_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000605
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000606# The timezone table does not include the military time zones defined
607# in RFC822, other than Z. According to RFC1123, the description in
608# RFC822 gets the signs wrong, so we can't rely on any such time
609# zones. RFC1123 recommends that numeric timezone indicators be used
610# instead of timezone names.
611
612_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000613 'AST': -400, 'ADT': -300, # Atlantic standard
614 'EST': -500, 'EDT': -400, # Eastern
615 'CST': -600, 'CDT':-500, # Centreal
616 'MST':-700, 'MDT':-600, # Mountain
617 'PST':-800, 'PDT':-700 # Pacific
618 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000619
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000620
621def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 """Convert a date string to a time tuple.
623
624 Accounts for military timezones.
625 """
626 data = string.split(data)
627 if data[0][-1] == ',' or data[0] in _daynames:
628 # There's a dayname here. Skip it
629 del data[0]
630 if len(data) == 3: # RFC 850 date, deprecated
631 stuff = string.split(data[0], '-')
632 if len(stuff) == 3:
633 data = stuff + data[1:]
634 if len(data) == 4:
635 s = data[3]
636 i = string.find(s, '+')
637 if i > 0:
638 data[3:] = [s[:i], s[i+1:]]
639 else:
640 data.append('') # Dummy tz
641 if len(data) < 5:
642 return None
643 data = data[:5]
644 [dd, mm, yy, tm, tz] = data
645 if not mm in _monthnames:
646 dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
647 if not mm in _monthnames:
648 return None
649 mm = _monthnames.index(mm)+1
650 tm = string.splitfields(tm, ':')
651 if len(tm) == 2:
652 [thh, tmm] = tm
653 tss = '0'
654 else:
655 [thh, tmm, tss] = tm
656 try:
657 yy = string.atoi(yy)
658 dd = string.atoi(dd)
659 thh = string.atoi(thh)
660 tmm = string.atoi(tmm)
661 tss = string.atoi(tss)
662 except string.atoi_error:
663 return None
Guido van Rossuma73033f1998-02-19 00:28:58 +0000664 tzoffset=None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000665 tz=string.upper(tz)
666 if _timezones.has_key(tz):
667 tzoffset=_timezones[tz]
668 else:
669 try:
670 tzoffset=string.atoi(tz)
671 except string.atoi_error:
672 pass
673 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000674 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000675 if tzoffset < 0:
676 tzsign = -1
677 tzoffset = -tzoffset
678 else:
679 tzsign = 1
680 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000681 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
682 return tuple
683
Guido van Rossumb6775db1994-08-01 11:34:53 +0000684
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000685def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000686 """Convert a time string to a time tuple."""
687 t=parsedate_tz(data)
688 if type(t)==type( () ):
689 return t[:9]
690 else: return t
691
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000692
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000693def mktime_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000694 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
695
696 Minor glitch: this first interprets the first 8 elements as a
697 local time and then compensates for the timezone difference;
698 this may yield a slight error around daylight savings time
699 switch dates. Not enough to worry about for common use.
700
701 """
Guido van Rossuma73033f1998-02-19 00:28:58 +0000702 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000703 # No zone info, so localtime is better assumption than GMT
704 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000705 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000706 t = time.mktime(data[:8] + (0,))
707 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000708
Guido van Rossumb6775db1994-08-01 11:34:53 +0000709
710# When used as script, run a small test program.
711# The first command line argument must be a filename containing one
712# message in RFC-822 format.
713
714if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000715 import sys, os
716 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
717 if sys.argv[1:]: file = sys.argv[1]
718 f = open(file, 'r')
719 m = Message(f)
720 print 'From:', m.getaddr('from')
721 print 'To:', m.getaddrlist('to')
722 print 'Subject:', m.getheader('subject')
723 print 'Date:', m.getheader('date')
724 date = m.getdate_tz('date')
725 if date:
726 print 'ParsedDate:', time.asctime(date[:-1]),
727 hhmmss = date[-1]
728 hhmm, ss = divmod(hhmmss, 60)
729 hh, mm = divmod(hhmm, 60)
730 print "%+03d%02d" % (hh, mm),
731 if ss: print ".%02d" % ss,
732 print
733 else:
734 print 'ParsedDate:', None
735 m.rewindbody()
736 n = 0
737 while f.readline():
738 n = n + 1
739 print 'Lines:', n
740 print '-'*70
741 print 'len =', len(m)
742 if m.has_key('Date'): print 'Date =', m['Date']
743 if m.has_key('X-Nonsense'): pass
744 print 'keys =', m.keys()
745 print 'values =', m.values()
746 print 'items =', m.items()