blob: 89392f6161fe948c29cdbee24c9b9ecc1e95bbad [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
11(or use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen()).
13Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
16To get the text of a particular header there are several methods:
17 str = m.getheader(name)
18 str = m.getrawheader(name)
19where name is the name of the header, e.g. 'Subject'.
20The difference is that getheader() strips the leading and trailing
21whitespace, while getrawheader() doesn't. Both functions retain
22embedded whitespace (including newlines) exactly as they are
23specified in the header, and leave the case of the text unchanged.
24
25For addresses and address lists there are functions
26 realname, mailaddress = m.getaddr(name) and
27 list = m.getaddrlist(name)
28where the latter returns a list of (realname, mailaddr) tuples.
29
30There is also a method
31 time = m.getdate(name)
32which parses a Date-like field and returns a time-compatible tuple,
33i.e. a tuple such as returned by time.localtime() or accepted by
34time.mktime().
35
36See the class definition for lower level access methods.
37
38There are also some utility functions here.
39"""
Guido van Rossum01ca3361992-07-13 14:28:59 +000040
Guido van Rossum9694fca1997-10-22 21:00:49 +000041import re
Guido van Rossum01ca3361992-07-13 14:28:59 +000042import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000043import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000044
45
Guido van Rossum9ab94c11997-12-10 16:17:39 +000046_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000047
48
Guido van Rossum01ca3361992-07-13 14:28:59 +000049class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000050 """Represents a single RFC-822-compliant message."""
51
52 def __init__(self, fp, seekable = 1):
53 """Initialize the class instance and read the headers."""
54 self.fp = fp
55 self.seekable = seekable
56 self.startofheaders = None
57 self.startofbody = None
58 #
59 if self.seekable:
60 try:
61 self.startofheaders = self.fp.tell()
62 except IOError:
63 self.seekable = 0
64 #
65 self.readheaders()
66 #
67 if self.seekable:
68 try:
69 self.startofbody = self.fp.tell()
70 except IOError:
71 self.seekable = 0
72
73 def rewindbody(self):
74 """Rewind the file to the start of the body (if seekable)."""
75 if not self.seekable:
76 raise IOError, "unseekable file"
77 self.fp.seek(self.startofbody)
78
79 def readheaders(self):
80 """Read header lines.
81
82 Read header lines up to the entirely blank line that
83 terminates them. The (normally blank) line that ends the
84 headers is skipped, but not included in the returned list.
85 If a non-header line ends the headers, (which is an error),
86 an attempt is made to backspace over it; it is never
87 included in the returned list.
88
89 The variable self.status is set to the empty string if all
90 went well, otherwise it is an error message.
91 The variable self.headers is a completely uninterpreted list
92 of lines contained in the header (so printing them will
93 reproduce the header exactly as it appears in the file).
94 """
95 self.dict = {}
96 self.unixfrom = ''
97 self.headers = list = []
98 self.status = ''
99 headerseen = ""
100 firstline = 1
101 while 1:
102 line = self.fp.readline()
103 if not line:
104 self.status = 'EOF in headers'
105 break
106 # Skip unix From name time lines
107 if firstline and line[:5] == 'From ':
108 self.unixfrom = self.unixfrom + line
109 continue
110 firstline = 0
111 if self.islast(line):
112 break
113 elif headerseen and line[0] in ' \t':
114 # It's a continuation line.
115 list.append(line)
116 x = (self.dict[headerseen] + "\n " +
117 string.strip(line))
118 self.dict[headerseen] = string.strip(x)
119 elif ':' in line:
120 # It's a header line.
121 list.append(line)
122 i = string.find(line, ':')
123 headerseen = string.lower(line[:i])
124 self.dict[headerseen] = string.strip(
125 line[i+1:])
126 else:
127 # It's not a header line; stop here.
128 if not headerseen:
129 self.status = 'No headers'
130 else:
131 self.status = 'Bad header'
132 # Try to undo the read.
133 if self.seekable:
134 self.fp.seek(-len(line), 1)
135 else:
136 self.status = \
137 self.status + '; bad seek'
138 break
139
140 def islast(self, line):
141 """Determine whether a line is a legal end of RFC-822 headers.
142
143 You may override this method if your application wants
144 to bend the rules, e.g. to strip trailing whitespace,
145 or to recognise MH template separators ('--------').
146 For convenience (e.g. for code reading from sockets) a
147 line consisting of \r\n also matches.
148 """
149 return line in _blanklines
150
151 def getallmatchingheaders(self, name):
152 """Find all header lines matching a given header name.
153
154 Look through the list of headers and find all lines
155 matching a given header name (and their continuation
156 lines). A list of the lines is returned, without
157 interpretation. If the header does not occur, an
158 empty list is returned. If the header occurs multiple
159 times, all occurrences are returned. Case is not
160 important in the header name.
161 """
162 name = string.lower(name) + ':'
163 n = len(name)
164 list = []
165 hit = 0
166 for line in self.headers:
167 if string.lower(line[:n]) == name:
168 hit = 1
169 elif line[:1] not in string.whitespace:
170 hit = 0
171 if hit:
172 list.append(line)
173 return list
174
175 def getfirstmatchingheader(self, name):
176 """Get the first header line matching name.
177
178 This is similar to getallmatchingheaders, but it returns
179 only the first matching header (and its continuation
180 lines).
181 """
182 name = string.lower(name) + ':'
183 n = len(name)
184 list = []
185 hit = 0
186 for line in self.headers:
187 if hit:
188 if line[:1] not in string.whitespace:
189 break
190 elif string.lower(line[:n]) == name:
191 hit = 1
192 if hit:
193 list.append(line)
194 return list
195
196 def getrawheader(self, name):
197 """A higher-level interface to getfirstmatchingheader().
198
199 Return a string containing the literal text of the
200 header but with the keyword stripped. All leading,
201 trailing and embedded whitespace is kept in the
202 string, however.
203 Return None if the header does not occur.
204 """
205
206 list = self.getfirstmatchingheader(name)
207 if not list:
208 return None
209 list[0] = list[0][len(name) + 1:]
210 return string.joinfields(list, '')
211
212 def getheader(self, name):
213 """Get the header value for a name.
214
215 This is the normal interface: it return a stripped
216 version of the header value for a given header name,
217 or None if it doesn't exist. This uses the dictionary
218 version which finds the *last* such header.
219 """
220 try:
221 return self.dict[string.lower(name)]
222 except KeyError:
223 return None
224
225 def getaddr(self, name):
226 """Get a single address from a header, as a tuple.
227
228 An example return value:
229 ('Guido van Rossum', 'guido@cwi.nl')
230 """
231 # New, by Ben Escoto
232 alist = self.getaddrlist(name)
233 if alist:
234 return alist[0]
235 else:
236 return (None, None)
237
238 def getaddrlist(self, name):
239 """Get a list of addresses from a header.
240
241 Retrieves a list of addresses from a header, where each
242 address is a tuple as returned by getaddr().
243 """
244 # New, by Ben Escoto
245 try:
246 data = self[name]
247 except KeyError:
248 return []
249 a = AddrlistClass(data)
250 return a.getaddrlist()
251
252 def getdate(self, name):
253 """Retrieve a date field from a header.
254
255 Retrieves a date field from the named header, returning
256 a tuple compatible with time.mktime().
257 """
258 try:
259 data = self[name]
260 except KeyError:
261 return None
262 return parsedate(data)
263
264 def getdate_tz(self, name):
265 """Retrieve a date field from a header as a 10-tuple.
266
267 The first 9 elements make up a tuple compatible with
268 time.mktime(), and the 10th is the offset of the poster's
269 time zone from GMT/UTC.
270 """
271 try:
272 data = self[name]
273 except KeyError:
274 return None
275 return parsedate_tz(data)
276
277
278 # Access as a dictionary (only finds *last* header of each type):
279
280 def __len__(self):
281 """Get the number of headers in a message."""
282 return len(self.dict)
283
284 def __getitem__(self, name):
285 """Get a specific header, as from a dictionary."""
286 return self.dict[string.lower(name)]
287
288 def has_key(self, name):
289 """Determine whether a message contains the named header."""
290 return self.dict.has_key(string.lower(name))
291
292 def keys(self):
293 """Get all of a message's header field names."""
294 return self.dict.keys()
295
296 def values(self):
297 """Get all of a message's header field values."""
298 return self.dict.values()
299
300 def items(self):
301 """Get all of a message's headers.
302
303 Returns a list of name, value tuples.
304 """
305 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000306
307
308
309# Utility functions
310# -----------------
311
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000312# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000313# XXX The inverses of the parse functions may also be useful.
314
Guido van Rossum01ca3361992-07-13 14:28:59 +0000315
Guido van Rossum01ca3361992-07-13 14:28:59 +0000316def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000317 """Remove quotes from a string."""
318 if len(str) > 1:
319 if str[0] == '"' and str[-1:] == '"':
320 return str[1:-1]
321 if str[0] == '<' and str[-1:] == '>':
322 return str[1:-1]
323 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000324
325
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000326def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000327 """Add quotes around a string."""
328 return '"%s"' % string.join(
329 string.split(
330 string.join(
331 string.split(str, '\\'),
332 '\\\\'),
333 '"'),
334 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000335
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000336
Guido van Rossumb6775db1994-08-01 11:34:53 +0000337def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000338 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000339 a = AddrlistClass(address)
340 list = a.getaddrlist()
341 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000342 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000343 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000344 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000345
346
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000347class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000348 """Address parser class by Ben Escoto.
349
350 To understand what this class does, it helps to have a copy of
351 RFC-822 in front of you.
352 """
353
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000354 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000355 """Initialize a new instance.
356
357 `field' is an unparsed address header field, containing
358 one or more addresses.
359 """
360 self.specials = '()<>@,:;.\"[]'
361 self.pos = 0
362 self.LWS = ' \t'
363 self.CR = '\r'
364 self.atomends = self.specials + self.LWS + self.CR
365
366 self.field = field
367 self.commentlist = []
368
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000369 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000370 """Parse up to the start of the next address."""
371 while self.pos < len(self.field):
372 if self.field[self.pos] in self.LWS + '\n\r':
373 self.pos = self.pos + 1
374 elif self.field[self.pos] == '(':
375 self.commentlist.append(self.getcomment())
376 else: break
377
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000378 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000379 """Parse all addresses.
380
381 Returns a list containing all of the addresses.
382 """
383 ad = self.getaddress()
384 if ad:
385 return ad + self.getaddrlist()
386 else: return []
387
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000388 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000389 """Parse the next address."""
390 self.commentlist = []
391 self.gotonext()
392
393 oldpos = self.pos
394 oldcl = self.commentlist
395 plist = self.getphraselist()
396
397 self.gotonext()
398 returnlist = []
399
400 if self.pos >= len(self.field):
401 # Bad email address technically, no domain.
402 if plist:
403 returnlist = [(string.join(self.commentlist), plist[0])]
404
405 elif self.field[self.pos] in '.@':
406 # email address is just an addrspec
407 # this isn't very efficient since we start over
408 self.pos = oldpos
409 self.commentlist = oldcl
410 addrspec = self.getaddrspec()
411 returnlist = [(string.join(self.commentlist), addrspec)]
412
413 elif self.field[self.pos] == ':':
414 # address is a group
415 returnlist = []
416
417 self.pos = self.pos + 1
418 while self.pos < len(self.field):
419 self.gotonext()
420 if self.field[self.pos] == ';':
421 self.pos = self.pos + 1
422 break
423 returnlist = returnlist + self.getaddress()
424
425 elif self.field[self.pos] == '<':
426 # Address is a phrase then a route addr
427 routeaddr = self.getrouteaddr()
428
429 if self.commentlist:
430 returnlist = [(string.join(plist) + ' (' + \
431 string.join(self.commentlist) + ')', routeaddr)]
432 else: returnlist = [(string.join(plist), routeaddr)]
433
434 else:
435 if plist:
436 returnlist = [(string.join(self.commentlist), plist[0])]
437
438 self.gotonext()
439 if self.pos < len(self.field) and self.field[self.pos] == ',':
440 self.pos = self.pos + 1
441 return returnlist
442
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000443 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000444 """Parse a route address (Return-path value).
445
446 This method just skips all the route stuff and returns the addrspec.
447 """
448 if self.field[self.pos] != '<':
449 return
450
451 expectroute = 0
452 self.pos = self.pos + 1
453 self.gotonext()
454 while self.pos < len(self.field):
455 if expectroute:
456 self.getdomain()
457 expectroute = 0
458 elif self.field[self.pos] == '>':
459 self.pos = self.pos + 1
460 break
461 elif self.field[self.pos] == '@':
462 self.pos = self.pos + 1
463 expectroute = 1
464 elif self.field[self.pos] == ':':
465 self.pos = self.pos + 1
466 expectaddrspec = 1
467 else:
468 adlist = self.getaddrspec()
469 self.pos = self.pos + 1
470 break
471 self.gotonext()
472
473 return adlist
474
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000475 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000476 """Parse an RFC-822 addr-spec."""
477 aslist = []
478
479 self.gotonext()
480 while self.pos < len(self.field):
481 if self.field[self.pos] == '.':
482 aslist.append('.')
483 self.pos = self.pos + 1
484 elif self.field[self.pos] == '"':
485 aslist.append(self.getquote())
486 elif self.field[self.pos] in self.atomends:
487 break
488 else: aslist.append(self.getatom())
489 self.gotonext()
490
491 if self.pos >= len(self.field) or self.field[self.pos] != '@':
492 return string.join(aslist, '')
493
494 aslist.append('@')
495 self.pos = self.pos + 1
496 self.gotonext()
497 return string.join(aslist, '') + self.getdomain()
498
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000499 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000500 """Get the complete domain name from an address."""
501 sdlist = []
502 while self.pos < len(self.field):
503 if self.field[self.pos] in self.LWS:
504 self.pos = self.pos + 1
505 elif self.field[self.pos] == '(':
506 self.commentlist.append(self.getcomment())
507 elif self.field[self.pos] == '[':
508 sdlist.append(self.getdomainliteral())
509 elif self.field[self.pos] == '.':
510 self.pos = self.pos + 1
511 sdlist.append('.')
512 elif self.field[self.pos] in self.atomends:
513 break
514 else: sdlist.append(self.getatom())
515
516 return string.join(sdlist, '')
517
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000518 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000519 """Parse a header fragment delimited by special characters.
520
521 `beginchar' is the start character for the fragment.
522 If self is not looking at an instance of `beginchar' then
523 getdelimited returns the empty string.
524
525 `endchars' is a sequence of allowable end-delimiting characters.
526 Parsing stops when one of these is encountered.
527
528 If `allowcomments' is non-zero, embedded RFC-822 comments
529 are allowed within the parsed fragment.
530 """
531 if self.field[self.pos] != beginchar:
532 return ''
533
534 slist = ['']
535 quote = 0
536 self.pos = self.pos + 1
537 while self.pos < len(self.field):
538 if quote == 1:
539 slist.append(self.field[self.pos])
540 quote = 0
541 elif self.field[self.pos] in endchars:
542 self.pos = self.pos + 1
543 break
544 elif allowcomments and self.field[self.pos] == '(':
545 slist.append(self.getcomment())
546 elif self.field[self.pos] == '\\':
547 quote = 1
548 else:
549 slist.append(self.field[self.pos])
550 self.pos = self.pos + 1
551
552 return string.join(slist, '')
553
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000554 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000555 """Get a quote-delimited fragment from self's field."""
556 return self.getdelimited('"', '"\r', 0)
557
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000558 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000559 """Get a parenthesis-delimited fragment from self's field."""
560 return self.getdelimited('(', ')\r', 1)
561
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000562 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000563 """Parse an RFC-822 domain-literal."""
564 return self.getdelimited('[', ']\r', 0)
565
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000566 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000567 """Parse an RFC-822 atom."""
568 atomlist = ['']
569
570 while self.pos < len(self.field):
571 if self.field[self.pos] in self.atomends:
572 break
573 else: atomlist.append(self.field[self.pos])
574 self.pos = self.pos + 1
575
576 return string.join(atomlist, '')
577
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000578 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000579 """Parse a sequence of RFC-822 phrases.
580
581 A phrase is a sequence of words, which are in turn either
582 RFC-822 atoms or quoted-strings.
583 """
584 plist = []
585
586 while self.pos < len(self.field):
587 if self.field[self.pos] in self.LWS:
588 self.pos = self.pos + 1
589 elif self.field[self.pos] == '"':
590 plist.append(self.getquote())
591 elif self.field[self.pos] == '(':
592 self.commentlist.append(self.getcomment())
593 elif self.field[self.pos] in self.atomends:
594 break
595 else: plist.append(self.getatom())
596
597 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000598
599
600# Parse a date field
601
602_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000603 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum9a876a41997-07-25 15:20:52 +0000604_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000605
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000606# The timezone table does not include the military time zones defined
607# in RFC822, other than Z. According to RFC1123, the description in
608# RFC822 gets the signs wrong, so we can't rely on any such time
609# zones. RFC1123 recommends that numeric timezone indicators be used
610# instead of timezone names.
611
612_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
613 'AST': -400, 'ADT': -300, # Atlantic standard
614 'EST': -500, 'EDT': -400, # Eastern
615 'CST': -600, 'CDT':-500, # Centreal
616 'MST':-700, 'MDT':-600, # Mountain
617 'PST':-800, 'PDT':-700 # Pacific
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000618 }
619
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000620
621def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 """Convert a date string to a time tuple.
623
624 Accounts for military timezones.
625 """
626 data = string.split(data)
627 if data[0][-1] == ',' or data[0] in _daynames:
628 # There's a dayname here. Skip it
629 del data[0]
630 if len(data) == 3: # RFC 850 date, deprecated
631 stuff = string.split(data[0], '-')
632 if len(stuff) == 3:
633 data = stuff + data[1:]
634 if len(data) == 4:
635 s = data[3]
636 i = string.find(s, '+')
637 if i > 0:
638 data[3:] = [s[:i], s[i+1:]]
639 else:
640 data.append('') # Dummy tz
641 if len(data) < 5:
642 return None
643 data = data[:5]
644 [dd, mm, yy, tm, tz] = data
645 if not mm in _monthnames:
646 dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
647 if not mm in _monthnames:
648 return None
649 mm = _monthnames.index(mm)+1
650 tm = string.splitfields(tm, ':')
651 if len(tm) == 2:
652 [thh, tmm] = tm
653 tss = '0'
654 else:
655 [thh, tmm, tss] = tm
656 try:
657 yy = string.atoi(yy)
658 dd = string.atoi(dd)
659 thh = string.atoi(thh)
660 tmm = string.atoi(tmm)
661 tss = string.atoi(tss)
662 except string.atoi_error:
663 return None
664 tzoffset=0
665 tz=string.upper(tz)
666 if _timezones.has_key(tz):
667 tzoffset=_timezones[tz]
668 else:
669 try:
670 tzoffset=string.atoi(tz)
671 except string.atoi_error:
672 pass
673 # Convert a timezone offset into seconds ; -0500 -> -18000
674 if tzoffset<0: tzsign=-1
675 else: tzsign=1
676 tzoffset=tzoffset*tzsign
677 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
678 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
679 return tuple
680
Guido van Rossumb6775db1994-08-01 11:34:53 +0000681
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000682def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000683 """Convert a time string to a time tuple."""
684 t=parsedate_tz(data)
685 if type(t)==type( () ):
686 return t[:9]
687 else: return t
688
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000689
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000690def mktime_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000691 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
692
693 Minor glitch: this first interprets the first 8 elements as a
694 local time and then compensates for the timezone difference;
695 this may yield a slight error around daylight savings time
696 switch dates. Not enough to worry about for common use.
697
698 """
699 t = time.mktime(data[:8] + (0,))
700 return t + data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000701
Guido van Rossumb6775db1994-08-01 11:34:53 +0000702
703# When used as script, run a small test program.
704# The first command line argument must be a filename containing one
705# message in RFC-822 format.
706
707if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000708 import sys, os
709 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
710 if sys.argv[1:]: file = sys.argv[1]
711 f = open(file, 'r')
712 m = Message(f)
713 print 'From:', m.getaddr('from')
714 print 'To:', m.getaddrlist('to')
715 print 'Subject:', m.getheader('subject')
716 print 'Date:', m.getheader('date')
717 date = m.getdate_tz('date')
718 if date:
719 print 'ParsedDate:', time.asctime(date[:-1]),
720 hhmmss = date[-1]
721 hhmm, ss = divmod(hhmmss, 60)
722 hh, mm = divmod(hhmm, 60)
723 print "%+03d%02d" % (hh, mm),
724 if ss: print ".%02d" % ss,
725 print
726 else:
727 print 'ParsedDate:', None
728 m.rewindbody()
729 n = 0
730 while f.readline():
731 n = n + 1
732 print 'Lines:', n
733 print '-'*70
734 print 'len =', len(m)
735 if m.has_key('Date'): print 'Date =', m['Date']
736 if m.has_key('X-Nonsense'): pass
737 print 'keys =', m.keys()
738 print 'values =', m.values()
739 print 'items =', m.items()