blob: 9631def4059ae537d453c018e59323aaebbfa127 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
Guido van Rossumc7bb8571998-06-10 21:31:01 +000011You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
Guido van Rossume894fc01998-06-11 13:58:40 +000016This class can work with any input object that supports a readline
17method. If the input object has seek and tell capability, the
18rewindbody method will work; also illegal lines will be pushed back
19onto the input stream. If the input object lacks seek but has an
20`unread' method that can push back a line of input, Message will use
21that to push back illegal lines. Thus this class can be used to parse
22messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000023
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
31called once; if this raises an exception, seekable is reset to 0. For
32other nonzero values of seekable, this test is not made.
33
Guido van Rossum9ab94c11997-12-10 16:17:39 +000034To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
Guido van Rossum01ca3361992-07-13 14:28:59 +000058
Guido van Rossum01ca3361992-07-13 14:28:59 +000059import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000060import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000061
62
Guido van Rossum9ab94c11997-12-10 16:17:39 +000063_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000064
65
Guido van Rossum01ca3361992-07-13 14:28:59 +000066class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000067 """Represents a single RFC-822-compliant message."""
68
69 def __init__(self, fp, seekable = 1):
70 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000071 if seekable == 1:
72 # Exercise tell() to make sure it works
73 # (and then assume seek() works, too)
74 try:
75 fp.tell()
76 except:
77 seekable = 0
78 else:
79 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000080 self.fp = fp
81 self.seekable = seekable
82 self.startofheaders = None
83 self.startofbody = None
84 #
85 if self.seekable:
86 try:
87 self.startofheaders = self.fp.tell()
88 except IOError:
89 self.seekable = 0
90 #
91 self.readheaders()
92 #
93 if self.seekable:
94 try:
95 self.startofbody = self.fp.tell()
96 except IOError:
97 self.seekable = 0
98
99 def rewindbody(self):
100 """Rewind the file to the start of the body (if seekable)."""
101 if not self.seekable:
102 raise IOError, "unseekable file"
103 self.fp.seek(self.startofbody)
104
105 def readheaders(self):
106 """Read header lines.
107
108 Read header lines up to the entirely blank line that
109 terminates them. The (normally blank) line that ends the
110 headers is skipped, but not included in the returned list.
111 If a non-header line ends the headers, (which is an error),
112 an attempt is made to backspace over it; it is never
113 included in the returned list.
114
115 The variable self.status is set to the empty string if all
116 went well, otherwise it is an error message.
117 The variable self.headers is a completely uninterpreted list
118 of lines contained in the header (so printing them will
119 reproduce the header exactly as it appears in the file).
120 """
121 self.dict = {}
122 self.unixfrom = ''
123 self.headers = list = []
124 self.status = ''
125 headerseen = ""
126 firstline = 1
127 while 1:
128 line = self.fp.readline()
129 if not line:
130 self.status = 'EOF in headers'
131 break
132 # Skip unix From name time lines
133 if firstline and line[:5] == 'From ':
134 self.unixfrom = self.unixfrom + line
135 continue
136 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000137 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000138 # It's a continuation line.
139 list.append(line)
Guido van Rossume894fc01998-06-11 13:58:40 +0000140 x = (self.dict[headerseen] + "\n " + string.strip(line))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000141 self.dict[headerseen] = string.strip(x)
Guido van Rossume894fc01998-06-11 13:58:40 +0000142 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000143 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000144 # It's a comment. Ignore it.
145 continue
146 elif self.islast(line):
147 # Note! No pushback here! The delimiter line gets eaten.
148 break
149 headerseen = self.isheader(line)
150 if headerseen:
151 # It's a legal header line, save it.
152 list.append(line)
153 self.dict[headerseen] = string.strip(line[len(headerseen)+2:])
154 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000155 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000156 # It's not a header line; throw it back and stop here.
157 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000158 self.status = 'No headers'
159 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000160 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000161 # Try to undo the read.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000162 if getattr(self.fp, 'unread'):
163 self.fp.unread(line)
164 elif self.seekable:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000165 self.fp.seek(-len(line), 1)
166 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000167 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000168 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000169
170 def isheader(self, line):
171 """Determine whether a given line is a legal header.
172
173 This method should return the header name, suitably canonicalized.
174 You may override this method in order to use Message parsing
175 on tagged data in RFC822-like formats with special header formats.
176 """
177 i = string.find(line, ':')
178 if i > 0:
179 return string.lower(line[:i])
180 else:
181 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000182
183 def islast(self, line):
184 """Determine whether a line is a legal end of RFC-822 headers.
185
186 You may override this method if your application wants
187 to bend the rules, e.g. to strip trailing whitespace,
188 or to recognise MH template separators ('--------').
189 For convenience (e.g. for code reading from sockets) a
190 line consisting of \r\n also matches.
191 """
192 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000193
194 def iscomment(self, line):
195 """Determine whether a line should be skipped entirely.
196
197 You may override this method in order to use Message parsing
198 on tagged data in RFC822-like formats that support embedded
199 comments or free-text data.
200 """
201 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000202
203 def getallmatchingheaders(self, name):
204 """Find all header lines matching a given header name.
205
206 Look through the list of headers and find all lines
207 matching a given header name (and their continuation
208 lines). A list of the lines is returned, without
209 interpretation. If the header does not occur, an
210 empty list is returned. If the header occurs multiple
211 times, all occurrences are returned. Case is not
212 important in the header name.
213 """
214 name = string.lower(name) + ':'
215 n = len(name)
216 list = []
217 hit = 0
218 for line in self.headers:
219 if string.lower(line[:n]) == name:
220 hit = 1
221 elif line[:1] not in string.whitespace:
222 hit = 0
223 if hit:
224 list.append(line)
225 return list
226
227 def getfirstmatchingheader(self, name):
228 """Get the first header line matching name.
229
230 This is similar to getallmatchingheaders, but it returns
231 only the first matching header (and its continuation
232 lines).
233 """
234 name = string.lower(name) + ':'
235 n = len(name)
236 list = []
237 hit = 0
238 for line in self.headers:
239 if hit:
240 if line[:1] not in string.whitespace:
241 break
242 elif string.lower(line[:n]) == name:
243 hit = 1
244 if hit:
245 list.append(line)
246 return list
247
248 def getrawheader(self, name):
249 """A higher-level interface to getfirstmatchingheader().
250
251 Return a string containing the literal text of the
252 header but with the keyword stripped. All leading,
253 trailing and embedded whitespace is kept in the
254 string, however.
255 Return None if the header does not occur.
256 """
257
258 list = self.getfirstmatchingheader(name)
259 if not list:
260 return None
261 list[0] = list[0][len(name) + 1:]
262 return string.joinfields(list, '')
263
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000264 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000265 """Get the header value for a name.
266
267 This is the normal interface: it return a stripped
268 version of the header value for a given header name,
269 or None if it doesn't exist. This uses the dictionary
270 version which finds the *last* such header.
271 """
272 try:
273 return self.dict[string.lower(name)]
274 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000275 return default
276 get = getheader
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277
278 def getaddr(self, name):
279 """Get a single address from a header, as a tuple.
280
281 An example return value:
282 ('Guido van Rossum', 'guido@cwi.nl')
283 """
284 # New, by Ben Escoto
285 alist = self.getaddrlist(name)
286 if alist:
287 return alist[0]
288 else:
289 return (None, None)
290
291 def getaddrlist(self, name):
292 """Get a list of addresses from a header.
293
294 Retrieves a list of addresses from a header, where each
295 address is a tuple as returned by getaddr().
296 """
297 # New, by Ben Escoto
298 try:
299 data = self[name]
300 except KeyError:
301 return []
302 a = AddrlistClass(data)
303 return a.getaddrlist()
304
305 def getdate(self, name):
306 """Retrieve a date field from a header.
307
308 Retrieves a date field from the named header, returning
309 a tuple compatible with time.mktime().
310 """
311 try:
312 data = self[name]
313 except KeyError:
314 return None
315 return parsedate(data)
316
317 def getdate_tz(self, name):
318 """Retrieve a date field from a header as a 10-tuple.
319
320 The first 9 elements make up a tuple compatible with
321 time.mktime(), and the 10th is the offset of the poster's
322 time zone from GMT/UTC.
323 """
324 try:
325 data = self[name]
326 except KeyError:
327 return None
328 return parsedate_tz(data)
329
330
331 # Access as a dictionary (only finds *last* header of each type):
332
333 def __len__(self):
334 """Get the number of headers in a message."""
335 return len(self.dict)
336
337 def __getitem__(self, name):
338 """Get a specific header, as from a dictionary."""
339 return self.dict[string.lower(name)]
Guido van Rossume894fc01998-06-11 13:58:40 +0000340
341 def __setitem__(self, name, value):
342 """Set the value of a header."""
343 del self[name] # Won't fail if it doesn't exist
344 self.dict[string.lower(name)] = value
345 text = name + ": " + value
346 lines = string.split(text, "\n")
347 for line in lines:
348 self.headers.append(line + "\n")
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000349
Guido van Rossum75d92c11998-04-02 21:33:20 +0000350 def __delitem__(self, name):
351 """Delete all occurrences of a specific header, if it is present."""
352 name = string.lower(name)
353 if not self.dict.has_key(name):
354 return
355 del self.dict[name]
356 name = name + ':'
357 n = len(name)
358 list = []
359 hit = 0
360 for i in range(len(self.headers)):
361 line = self.headers[i]
362 if string.lower(line[:n]) == name:
363 hit = 1
364 elif line[:1] not in string.whitespace:
365 hit = 0
366 if hit:
367 list.append(i)
368 list.reverse()
369 for i in list:
370 del self.headers[i]
371
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000372 def has_key(self, name):
373 """Determine whether a message contains the named header."""
374 return self.dict.has_key(string.lower(name))
375
376 def keys(self):
377 """Get all of a message's header field names."""
378 return self.dict.keys()
379
380 def values(self):
381 """Get all of a message's header field values."""
382 return self.dict.values()
383
384 def items(self):
385 """Get all of a message's headers.
386
387 Returns a list of name, value tuples.
388 """
389 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000390
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000391 def __str__(self):
392 str = ''
393 for hdr in self.headers:
394 str = str + hdr
395 return str
Guido van Rossum01ca3361992-07-13 14:28:59 +0000396
397
398# Utility functions
399# -----------------
400
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000401# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000402# XXX The inverses of the parse functions may also be useful.
403
Guido van Rossum01ca3361992-07-13 14:28:59 +0000404
Guido van Rossum01ca3361992-07-13 14:28:59 +0000405def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000406 """Remove quotes from a string."""
407 if len(str) > 1:
408 if str[0] == '"' and str[-1:] == '"':
409 return str[1:-1]
410 if str[0] == '<' and str[-1:] == '>':
411 return str[1:-1]
412 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000413
414
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000415def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000416 """Add quotes around a string."""
417 return '"%s"' % string.join(
418 string.split(
419 string.join(
420 string.split(str, '\\'),
421 '\\\\'),
422 '"'),
423 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000424
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000425
Guido van Rossumb6775db1994-08-01 11:34:53 +0000426def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000427 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000428 a = AddrlistClass(address)
429 list = a.getaddrlist()
430 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000431 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000432 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000433 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000434
435
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000436class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000437 """Address parser class by Ben Escoto.
438
439 To understand what this class does, it helps to have a copy of
440 RFC-822 in front of you.
441 """
442
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000443 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000444 """Initialize a new instance.
445
446 `field' is an unparsed address header field, containing
447 one or more addresses.
448 """
449 self.specials = '()<>@,:;.\"[]'
450 self.pos = 0
451 self.LWS = ' \t'
452 self.CR = '\r'
453 self.atomends = self.specials + self.LWS + self.CR
454
455 self.field = field
456 self.commentlist = []
457
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000458 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000459 """Parse up to the start of the next address."""
460 while self.pos < len(self.field):
461 if self.field[self.pos] in self.LWS + '\n\r':
462 self.pos = self.pos + 1
463 elif self.field[self.pos] == '(':
464 self.commentlist.append(self.getcomment())
465 else: break
466
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000467 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000468 """Parse all addresses.
469
470 Returns a list containing all of the addresses.
471 """
472 ad = self.getaddress()
473 if ad:
474 return ad + self.getaddrlist()
475 else: return []
476
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000477 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000478 """Parse the next address."""
479 self.commentlist = []
480 self.gotonext()
481
482 oldpos = self.pos
483 oldcl = self.commentlist
484 plist = self.getphraselist()
485
486 self.gotonext()
487 returnlist = []
488
489 if self.pos >= len(self.field):
490 # Bad email address technically, no domain.
491 if plist:
492 returnlist = [(string.join(self.commentlist), plist[0])]
493
494 elif self.field[self.pos] in '.@':
495 # email address is just an addrspec
496 # this isn't very efficient since we start over
497 self.pos = oldpos
498 self.commentlist = oldcl
499 addrspec = self.getaddrspec()
500 returnlist = [(string.join(self.commentlist), addrspec)]
501
502 elif self.field[self.pos] == ':':
503 # address is a group
504 returnlist = []
505
506 self.pos = self.pos + 1
507 while self.pos < len(self.field):
508 self.gotonext()
509 if self.field[self.pos] == ';':
510 self.pos = self.pos + 1
511 break
512 returnlist = returnlist + self.getaddress()
513
514 elif self.field[self.pos] == '<':
515 # Address is a phrase then a route addr
516 routeaddr = self.getrouteaddr()
517
518 if self.commentlist:
519 returnlist = [(string.join(plist) + ' (' + \
520 string.join(self.commentlist) + ')', routeaddr)]
521 else: returnlist = [(string.join(plist), routeaddr)]
522
523 else:
524 if plist:
525 returnlist = [(string.join(self.commentlist), plist[0])]
526
527 self.gotonext()
528 if self.pos < len(self.field) and self.field[self.pos] == ',':
529 self.pos = self.pos + 1
530 return returnlist
531
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000532 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000533 """Parse a route address (Return-path value).
534
535 This method just skips all the route stuff and returns the addrspec.
536 """
537 if self.field[self.pos] != '<':
538 return
539
540 expectroute = 0
541 self.pos = self.pos + 1
542 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000543 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000544 while self.pos < len(self.field):
545 if expectroute:
546 self.getdomain()
547 expectroute = 0
548 elif self.field[self.pos] == '>':
549 self.pos = self.pos + 1
550 break
551 elif self.field[self.pos] == '@':
552 self.pos = self.pos + 1
553 expectroute = 1
554 elif self.field[self.pos] == ':':
555 self.pos = self.pos + 1
556 expectaddrspec = 1
557 else:
558 adlist = self.getaddrspec()
559 self.pos = self.pos + 1
560 break
561 self.gotonext()
562
563 return adlist
564
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000565 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000566 """Parse an RFC-822 addr-spec."""
567 aslist = []
568
569 self.gotonext()
570 while self.pos < len(self.field):
571 if self.field[self.pos] == '.':
572 aslist.append('.')
573 self.pos = self.pos + 1
574 elif self.field[self.pos] == '"':
575 aslist.append(self.getquote())
576 elif self.field[self.pos] in self.atomends:
577 break
578 else: aslist.append(self.getatom())
579 self.gotonext()
580
581 if self.pos >= len(self.field) or self.field[self.pos] != '@':
582 return string.join(aslist, '')
583
584 aslist.append('@')
585 self.pos = self.pos + 1
586 self.gotonext()
587 return string.join(aslist, '') + self.getdomain()
588
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000589 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000590 """Get the complete domain name from an address."""
591 sdlist = []
592 while self.pos < len(self.field):
593 if self.field[self.pos] in self.LWS:
594 self.pos = self.pos + 1
595 elif self.field[self.pos] == '(':
596 self.commentlist.append(self.getcomment())
597 elif self.field[self.pos] == '[':
598 sdlist.append(self.getdomainliteral())
599 elif self.field[self.pos] == '.':
600 self.pos = self.pos + 1
601 sdlist.append('.')
602 elif self.field[self.pos] in self.atomends:
603 break
604 else: sdlist.append(self.getatom())
605
606 return string.join(sdlist, '')
607
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000608 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000609 """Parse a header fragment delimited by special characters.
610
611 `beginchar' is the start character for the fragment.
612 If self is not looking at an instance of `beginchar' then
613 getdelimited returns the empty string.
614
615 `endchars' is a sequence of allowable end-delimiting characters.
616 Parsing stops when one of these is encountered.
617
618 If `allowcomments' is non-zero, embedded RFC-822 comments
619 are allowed within the parsed fragment.
620 """
621 if self.field[self.pos] != beginchar:
622 return ''
623
624 slist = ['']
625 quote = 0
626 self.pos = self.pos + 1
627 while self.pos < len(self.field):
628 if quote == 1:
629 slist.append(self.field[self.pos])
630 quote = 0
631 elif self.field[self.pos] in endchars:
632 self.pos = self.pos + 1
633 break
634 elif allowcomments and self.field[self.pos] == '(':
635 slist.append(self.getcomment())
636 elif self.field[self.pos] == '\\':
637 quote = 1
638 else:
639 slist.append(self.field[self.pos])
640 self.pos = self.pos + 1
641
642 return string.join(slist, '')
643
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000644 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000645 """Get a quote-delimited fragment from self's field."""
646 return self.getdelimited('"', '"\r', 0)
647
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000648 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000649 """Get a parenthesis-delimited fragment from self's field."""
650 return self.getdelimited('(', ')\r', 1)
651
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000652 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000653 """Parse an RFC-822 domain-literal."""
654 return self.getdelimited('[', ']\r', 0)
655
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000656 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000657 """Parse an RFC-822 atom."""
658 atomlist = ['']
659
660 while self.pos < len(self.field):
661 if self.field[self.pos] in self.atomends:
662 break
663 else: atomlist.append(self.field[self.pos])
664 self.pos = self.pos + 1
665
666 return string.join(atomlist, '')
667
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000668 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000669 """Parse a sequence of RFC-822 phrases.
670
671 A phrase is a sequence of words, which are in turn either
Guido van Rossume894fc01998-06-11 13:58:40 +0000672 RFC-822 atoms or quoted-strings. Phrases are canonicalized
673 by squeezing all runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000674 """
675 plist = []
676
677 while self.pos < len(self.field):
678 if self.field[self.pos] in self.LWS:
679 self.pos = self.pos + 1
680 elif self.field[self.pos] == '"':
681 plist.append(self.getquote())
682 elif self.field[self.pos] == '(':
683 self.commentlist.append(self.getcomment())
684 elif self.field[self.pos] in self.atomends:
685 break
686 else: plist.append(self.getatom())
687
688 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000689
690
691# Parse a date field
692
693_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000694 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Guido van Rossum9a876a41997-07-25 15:20:52 +0000695_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000696
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000697# The timezone table does not include the military time zones defined
698# in RFC822, other than Z. According to RFC1123, the description in
699# RFC822 gets the signs wrong, so we can't rely on any such time
700# zones. RFC1123 recommends that numeric timezone indicators be used
701# instead of timezone names.
702
703_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000704 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000705 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000706 'CST': -600, 'CDT': -500, # Central
707 'MST': -700, 'MDT': -600, # Mountain
708 'PST': -800, 'PDT': -700 # Pacific
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000709 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000710
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000711
712def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000713 """Convert a date string to a time tuple.
714
715 Accounts for military timezones.
716 """
717 data = string.split(data)
718 if data[0][-1] == ',' or data[0] in _daynames:
719 # There's a dayname here. Skip it
720 del data[0]
721 if len(data) == 3: # RFC 850 date, deprecated
722 stuff = string.split(data[0], '-')
723 if len(stuff) == 3:
724 data = stuff + data[1:]
725 if len(data) == 4:
726 s = data[3]
727 i = string.find(s, '+')
728 if i > 0:
729 data[3:] = [s[:i], s[i+1:]]
730 else:
731 data.append('') # Dummy tz
732 if len(data) < 5:
733 return None
734 data = data[:5]
735 [dd, mm, yy, tm, tz] = data
736 if not mm in _monthnames:
737 dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
738 if not mm in _monthnames:
739 return None
740 mm = _monthnames.index(mm)+1
741 tm = string.splitfields(tm, ':')
742 if len(tm) == 2:
743 [thh, tmm] = tm
744 tss = '0'
745 else:
746 [thh, tmm, tss] = tm
747 try:
748 yy = string.atoi(yy)
749 dd = string.atoi(dd)
750 thh = string.atoi(thh)
751 tmm = string.atoi(tmm)
752 tss = string.atoi(tss)
753 except string.atoi_error:
754 return None
Guido van Rossuma73033f1998-02-19 00:28:58 +0000755 tzoffset=None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000756 tz=string.upper(tz)
757 if _timezones.has_key(tz):
758 tzoffset=_timezones[tz]
759 else:
760 try:
761 tzoffset=string.atoi(tz)
762 except string.atoi_error:
763 pass
764 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000765 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000766 if tzoffset < 0:
767 tzsign = -1
768 tzoffset = -tzoffset
769 else:
770 tzsign = 1
771 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000772 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
773 return tuple
774
Guido van Rossumb6775db1994-08-01 11:34:53 +0000775
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000776def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000777 """Convert a time string to a time tuple."""
778 t=parsedate_tz(data)
779 if type(t)==type( () ):
780 return t[:9]
781 else: return t
782
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000783
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000784def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000785 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000786 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000787 # No zone info, so localtime is better assumption than GMT
788 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000789 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000790 t = time.mktime(data[:8] + (0,))
791 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000792
Guido van Rossumb6775db1994-08-01 11:34:53 +0000793
794# When used as script, run a small test program.
795# The first command line argument must be a filename containing one
796# message in RFC-822 format.
797
798if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000799 import sys, os
800 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
801 if sys.argv[1:]: file = sys.argv[1]
802 f = open(file, 'r')
803 m = Message(f)
804 print 'From:', m.getaddr('from')
805 print 'To:', m.getaddrlist('to')
806 print 'Subject:', m.getheader('subject')
807 print 'Date:', m.getheader('date')
808 date = m.getdate_tz('date')
809 if date:
810 print 'ParsedDate:', time.asctime(date[:-1]),
811 hhmmss = date[-1]
812 hhmm, ss = divmod(hhmmss, 60)
813 hh, mm = divmod(hhmm, 60)
814 print "%+03d%02d" % (hh, mm),
815 if ss: print ".%02d" % ss,
816 print
817 else:
818 print 'ParsedDate:', None
819 m.rewindbody()
820 n = 0
821 while f.readline():
822 n = n + 1
823 print 'Lines:', n
824 print '-'*70
825 print 'len =', len(m)
826 if m.has_key('Date'): print 'Date =', m['Date']
827 if m.has_key('X-Nonsense'): pass
828 print 'keys =', m.keys()
829 print 'values =', m.values()
830 print 'items =', m.items()