blob: d8de862cfa6deab1333c1046eab9f3ffac9597a6 [file] [log] [blame]
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001"""RFC-822 message manipulation class.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Guido van Rossum9ab94c11997-12-10 16:17:39 +00003XXX This is only a very rough sketch of a full RFC-822 parser;
4in particular the tokenizing of addresses does not adhere to all the
5quoting rules.
6
7Directions for use:
8
9To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
Guido van Rossumc7bb8571998-06-10 21:31:01 +000011You can use any other legal way of getting an open file object, e.g. use
12sys.stdin or call os.popen().
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013Then pass the open file object to the Message() constructor:
14 m = Message(fp)
15
Guido van Rossume894fc01998-06-11 13:58:40 +000016This class can work with any input object that supports a readline
17method. If the input object has seek and tell capability, the
18rewindbody method will work; also illegal lines will be pushed back
19onto the input stream. If the input object lacks seek but has an
20`unread' method that can push back a line of input, Message will use
21that to push back illegal lines. Thus this class can be used to parse
22messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000023
24The optional `seekable' argument is provided as a workaround for
25certain stdio libraries in which tell() discards buffered data before
26discovering that the lseek() system call doesn't work. For maximum
27portability, you should set the seekable argument to zero to prevent
28that initial \code{tell} when passing in an unseekable object such as
29a a file object created from a socket object. If it is 1 on entry --
30which it is by default -- the tell() method of the open file object is
31called once; if this raises an exception, seekable is reset to 0. For
32other nonzero values of seekable, this test is not made.
33
Guido van Rossum9ab94c11997-12-10 16:17:39 +000034To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37where name is the name of the header, e.g. 'Subject'.
38The difference is that getheader() strips the leading and trailing
39whitespace, while getrawheader() doesn't. Both functions retain
40embedded whitespace (including newlines) exactly as they are
41specified in the header, and leave the case of the text unchanged.
42
43For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46where the latter returns a list of (realname, mailaddr) tuples.
47
48There is also a method
49 time = m.getdate(name)
50which parses a Date-like field and returns a time-compatible tuple,
51i.e. a tuple such as returned by time.localtime() or accepted by
52time.mktime().
53
54See the class definition for lower level access methods.
55
56There are also some utility functions here.
57"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000058# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000059
Guido van Rossum01ca3361992-07-13 14:28:59 +000060import string
Guido van Rossumb6775db1994-08-01 11:34:53 +000061import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000062
63
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000065
66
Guido van Rossum01ca3361992-07-13 14:28:59 +000067class Message:
Guido van Rossum9ab94c11997-12-10 16:17:39 +000068 """Represents a single RFC-822-compliant message."""
69
70 def __init__(self, fp, seekable = 1):
71 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000072 if seekable == 1:
73 # Exercise tell() to make sure it works
74 # (and then assume seek() works, too)
75 try:
76 fp.tell()
77 except:
78 seekable = 0
79 else:
80 seekable = 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +000081 self.fp = fp
82 self.seekable = seekable
83 self.startofheaders = None
84 self.startofbody = None
85 #
86 if self.seekable:
87 try:
88 self.startofheaders = self.fp.tell()
89 except IOError:
90 self.seekable = 0
91 #
92 self.readheaders()
93 #
94 if self.seekable:
95 try:
96 self.startofbody = self.fp.tell()
97 except IOError:
98 self.seekable = 0
99
100 def rewindbody(self):
101 """Rewind the file to the start of the body (if seekable)."""
102 if not self.seekable:
103 raise IOError, "unseekable file"
104 self.fp.seek(self.startofbody)
105
106 def readheaders(self):
107 """Read header lines.
108
109 Read header lines up to the entirely blank line that
110 terminates them. The (normally blank) line that ends the
111 headers is skipped, but not included in the returned list.
112 If a non-header line ends the headers, (which is an error),
113 an attempt is made to backspace over it; it is never
114 included in the returned list.
115
116 The variable self.status is set to the empty string if all
117 went well, otherwise it is an error message.
118 The variable self.headers is a completely uninterpreted list
119 of lines contained in the header (so printing them will
120 reproduce the header exactly as it appears in the file).
121 """
122 self.dict = {}
123 self.unixfrom = ''
124 self.headers = list = []
125 self.status = ''
126 headerseen = ""
127 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000128 startofline = unread = tell = None
129 if hasattr(self.fp, 'unread'):
130 unread = self.fp.unread
131 elif self.seekable:
132 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000133 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000134 if tell:
135 startofline = tell()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000136 line = self.fp.readline()
137 if not line:
138 self.status = 'EOF in headers'
139 break
140 # Skip unix From name time lines
141 if firstline and line[:5] == 'From ':
142 self.unixfrom = self.unixfrom + line
143 continue
144 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000145 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000146 # It's a continuation line.
147 list.append(line)
Guido van Rossume894fc01998-06-11 13:58:40 +0000148 x = (self.dict[headerseen] + "\n " + string.strip(line))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000149 self.dict[headerseen] = string.strip(x)
Guido van Rossume894fc01998-06-11 13:58:40 +0000150 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000151 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000152 # It's a comment. Ignore it.
153 continue
154 elif self.islast(line):
155 # Note! No pushback here! The delimiter line gets eaten.
156 break
157 headerseen = self.isheader(line)
158 if headerseen:
159 # It's a legal header line, save it.
160 list.append(line)
161 self.dict[headerseen] = string.strip(line[len(headerseen)+2:])
162 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000163 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000164 # It's not a header line; throw it back and stop here.
165 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000166 self.status = 'No headers'
167 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000168 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000169 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000170 if unread:
171 unread(line)
172 elif tell:
173 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000174 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000175 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000176 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000177
178 def isheader(self, line):
179 """Determine whether a given line is a legal header.
180
181 This method should return the header name, suitably canonicalized.
182 You may override this method in order to use Message parsing
183 on tagged data in RFC822-like formats with special header formats.
184 """
185 i = string.find(line, ':')
186 if i > 0:
187 return string.lower(line[:i])
188 else:
189 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000190
191 def islast(self, line):
192 """Determine whether a line is a legal end of RFC-822 headers.
193
194 You may override this method if your application wants
195 to bend the rules, e.g. to strip trailing whitespace,
196 or to recognise MH template separators ('--------').
197 For convenience (e.g. for code reading from sockets) a
198 line consisting of \r\n also matches.
199 """
200 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000201
202 def iscomment(self, line):
203 """Determine whether a line should be skipped entirely.
204
205 You may override this method in order to use Message parsing
206 on tagged data in RFC822-like formats that support embedded
207 comments or free-text data.
208 """
209 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000210
211 def getallmatchingheaders(self, name):
212 """Find all header lines matching a given header name.
213
214 Look through the list of headers and find all lines
215 matching a given header name (and their continuation
216 lines). A list of the lines is returned, without
217 interpretation. If the header does not occur, an
218 empty list is returned. If the header occurs multiple
219 times, all occurrences are returned. Case is not
220 important in the header name.
221 """
222 name = string.lower(name) + ':'
223 n = len(name)
224 list = []
225 hit = 0
226 for line in self.headers:
227 if string.lower(line[:n]) == name:
228 hit = 1
229 elif line[:1] not in string.whitespace:
230 hit = 0
231 if hit:
232 list.append(line)
233 return list
234
235 def getfirstmatchingheader(self, name):
236 """Get the first header line matching name.
237
238 This is similar to getallmatchingheaders, but it returns
239 only the first matching header (and its continuation
240 lines).
241 """
242 name = string.lower(name) + ':'
243 n = len(name)
244 list = []
245 hit = 0
246 for line in self.headers:
247 if hit:
248 if line[:1] not in string.whitespace:
249 break
250 elif string.lower(line[:n]) == name:
251 hit = 1
252 if hit:
253 list.append(line)
254 return list
255
256 def getrawheader(self, name):
257 """A higher-level interface to getfirstmatchingheader().
258
259 Return a string containing the literal text of the
260 header but with the keyword stripped. All leading,
261 trailing and embedded whitespace is kept in the
262 string, however.
263 Return None if the header does not occur.
264 """
265
266 list = self.getfirstmatchingheader(name)
267 if not list:
268 return None
269 list[0] = list[0][len(name) + 1:]
270 return string.joinfields(list, '')
271
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000272 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000273 """Get the header value for a name.
274
275 This is the normal interface: it return a stripped
276 version of the header value for a given header name,
277 or None if it doesn't exist. This uses the dictionary
278 version which finds the *last* such header.
279 """
280 try:
281 return self.dict[string.lower(name)]
282 except KeyError:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000283 return default
284 get = getheader
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000285
286 def getaddr(self, name):
287 """Get a single address from a header, as a tuple.
288
289 An example return value:
290 ('Guido van Rossum', 'guido@cwi.nl')
291 """
292 # New, by Ben Escoto
293 alist = self.getaddrlist(name)
294 if alist:
295 return alist[0]
296 else:
297 return (None, None)
298
299 def getaddrlist(self, name):
300 """Get a list of addresses from a header.
301
302 Retrieves a list of addresses from a header, where each
303 address is a tuple as returned by getaddr().
304 """
305 # New, by Ben Escoto
306 try:
307 data = self[name]
308 except KeyError:
309 return []
310 a = AddrlistClass(data)
311 return a.getaddrlist()
312
313 def getdate(self, name):
314 """Retrieve a date field from a header.
315
316 Retrieves a date field from the named header, returning
317 a tuple compatible with time.mktime().
318 """
319 try:
320 data = self[name]
321 except KeyError:
322 return None
323 return parsedate(data)
324
325 def getdate_tz(self, name):
326 """Retrieve a date field from a header as a 10-tuple.
327
328 The first 9 elements make up a tuple compatible with
329 time.mktime(), and the 10th is the offset of the poster's
330 time zone from GMT/UTC.
331 """
332 try:
333 data = self[name]
334 except KeyError:
335 return None
336 return parsedate_tz(data)
337
338
339 # Access as a dictionary (only finds *last* header of each type):
340
341 def __len__(self):
342 """Get the number of headers in a message."""
343 return len(self.dict)
344
345 def __getitem__(self, name):
346 """Get a specific header, as from a dictionary."""
347 return self.dict[string.lower(name)]
Guido van Rossume894fc01998-06-11 13:58:40 +0000348
349 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000350 """Set the value of a header.
351
352 Note: This is not a perfect inversion of __getitem__, because
353 any changed headers get stuck at the end of the raw-headers list
354 rather than where the altered header was.
355 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000356 del self[name] # Won't fail if it doesn't exist
357 self.dict[string.lower(name)] = value
358 text = name + ": " + value
359 lines = string.split(text, "\n")
360 for line in lines:
361 self.headers.append(line + "\n")
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000362
Guido van Rossum75d92c11998-04-02 21:33:20 +0000363 def __delitem__(self, name):
364 """Delete all occurrences of a specific header, if it is present."""
365 name = string.lower(name)
366 if not self.dict.has_key(name):
367 return
368 del self.dict[name]
369 name = name + ':'
370 n = len(name)
371 list = []
372 hit = 0
373 for i in range(len(self.headers)):
374 line = self.headers[i]
375 if string.lower(line[:n]) == name:
376 hit = 1
377 elif line[:1] not in string.whitespace:
378 hit = 0
379 if hit:
380 list.append(i)
381 list.reverse()
382 for i in list:
383 del self.headers[i]
384
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000385 def has_key(self, name):
386 """Determine whether a message contains the named header."""
387 return self.dict.has_key(string.lower(name))
388
389 def keys(self):
390 """Get all of a message's header field names."""
391 return self.dict.keys()
392
393 def values(self):
394 """Get all of a message's header field values."""
395 return self.dict.values()
396
397 def items(self):
398 """Get all of a message's headers.
399
400 Returns a list of name, value tuples.
401 """
402 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000403
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000404 def __str__(self):
405 str = ''
406 for hdr in self.headers:
407 str = str + hdr
408 return str
Guido van Rossum01ca3361992-07-13 14:28:59 +0000409
410
411# Utility functions
412# -----------------
413
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000414# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000415# XXX The inverses of the parse functions may also be useful.
416
Guido van Rossum01ca3361992-07-13 14:28:59 +0000417
Guido van Rossum01ca3361992-07-13 14:28:59 +0000418def unquote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000419 """Remove quotes from a string."""
420 if len(str) > 1:
421 if str[0] == '"' and str[-1:] == '"':
422 return str[1:-1]
423 if str[0] == '<' and str[-1:] == '>':
424 return str[1:-1]
425 return str
Guido van Rossumb6775db1994-08-01 11:34:53 +0000426
427
Guido van Rossum7883e1d1997-09-15 14:12:54 +0000428def quote(str):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000429 """Add quotes around a string."""
430 return '"%s"' % string.join(
431 string.split(
432 string.join(
433 string.split(str, '\\'),
434 '\\\\'),
435 '"'),
436 '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000437
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000438
Guido van Rossumb6775db1994-08-01 11:34:53 +0000439def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000440 """Parse an address into a (realname, mailaddr) tuple."""
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000441 a = AddrlistClass(address)
442 list = a.getaddrlist()
443 if not list:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000444 return (None, None)
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000445 else:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000446 return list[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000447
448
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000449class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000450 """Address parser class by Ben Escoto.
451
452 To understand what this class does, it helps to have a copy of
453 RFC-822 in front of you.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000454
455 Note: this class interface is deprecated and may be removed in the future.
456 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000457 """
458
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000459 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000460 """Initialize a new instance.
461
462 `field' is an unparsed address header field, containing
463 one or more addresses.
464 """
465 self.specials = '()<>@,:;.\"[]'
466 self.pos = 0
467 self.LWS = ' \t'
468 self.CR = '\r'
469 self.atomends = self.specials + self.LWS + self.CR
470
471 self.field = field
472 self.commentlist = []
473
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000474 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000475 """Parse up to the start of the next address."""
476 while self.pos < len(self.field):
477 if self.field[self.pos] in self.LWS + '\n\r':
478 self.pos = self.pos + 1
479 elif self.field[self.pos] == '(':
480 self.commentlist.append(self.getcomment())
481 else: break
482
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000483 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000484 """Parse all addresses.
485
486 Returns a list containing all of the addresses.
487 """
488 ad = self.getaddress()
489 if ad:
490 return ad + self.getaddrlist()
491 else: return []
492
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000493 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000494 """Parse the next address."""
495 self.commentlist = []
496 self.gotonext()
497
498 oldpos = self.pos
499 oldcl = self.commentlist
500 plist = self.getphraselist()
501
502 self.gotonext()
503 returnlist = []
504
505 if self.pos >= len(self.field):
506 # Bad email address technically, no domain.
507 if plist:
508 returnlist = [(string.join(self.commentlist), plist[0])]
509
510 elif self.field[self.pos] in '.@':
511 # email address is just an addrspec
512 # this isn't very efficient since we start over
513 self.pos = oldpos
514 self.commentlist = oldcl
515 addrspec = self.getaddrspec()
516 returnlist = [(string.join(self.commentlist), addrspec)]
517
518 elif self.field[self.pos] == ':':
519 # address is a group
520 returnlist = []
521
522 self.pos = self.pos + 1
523 while self.pos < len(self.field):
524 self.gotonext()
525 if self.field[self.pos] == ';':
526 self.pos = self.pos + 1
527 break
528 returnlist = returnlist + self.getaddress()
529
530 elif self.field[self.pos] == '<':
531 # Address is a phrase then a route addr
532 routeaddr = self.getrouteaddr()
533
534 if self.commentlist:
535 returnlist = [(string.join(plist) + ' (' + \
536 string.join(self.commentlist) + ')', routeaddr)]
537 else: returnlist = [(string.join(plist), routeaddr)]
538
539 else:
540 if plist:
541 returnlist = [(string.join(self.commentlist), plist[0])]
542
543 self.gotonext()
544 if self.pos < len(self.field) and self.field[self.pos] == ',':
545 self.pos = self.pos + 1
546 return returnlist
547
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000548 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000549 """Parse a route address (Return-path value).
550
551 This method just skips all the route stuff and returns the addrspec.
552 """
553 if self.field[self.pos] != '<':
554 return
555
556 expectroute = 0
557 self.pos = self.pos + 1
558 self.gotonext()
Guido van Rossum9e43adb1998-03-03 16:17:52 +0000559 adlist = None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000560 while self.pos < len(self.field):
561 if expectroute:
562 self.getdomain()
563 expectroute = 0
564 elif self.field[self.pos] == '>':
565 self.pos = self.pos + 1
566 break
567 elif self.field[self.pos] == '@':
568 self.pos = self.pos + 1
569 expectroute = 1
570 elif self.field[self.pos] == ':':
571 self.pos = self.pos + 1
572 expectaddrspec = 1
573 else:
574 adlist = self.getaddrspec()
575 self.pos = self.pos + 1
576 break
577 self.gotonext()
578
579 return adlist
580
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000581 def getaddrspec(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000582 """Parse an RFC-822 addr-spec."""
583 aslist = []
584
585 self.gotonext()
586 while self.pos < len(self.field):
587 if self.field[self.pos] == '.':
588 aslist.append('.')
589 self.pos = self.pos + 1
590 elif self.field[self.pos] == '"':
591 aslist.append(self.getquote())
592 elif self.field[self.pos] in self.atomends:
593 break
594 else: aslist.append(self.getatom())
595 self.gotonext()
596
597 if self.pos >= len(self.field) or self.field[self.pos] != '@':
598 return string.join(aslist, '')
599
600 aslist.append('@')
601 self.pos = self.pos + 1
602 self.gotonext()
603 return string.join(aslist, '') + self.getdomain()
604
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000605 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000606 """Get the complete domain name from an address."""
607 sdlist = []
608 while self.pos < len(self.field):
609 if self.field[self.pos] in self.LWS:
610 self.pos = self.pos + 1
611 elif self.field[self.pos] == '(':
612 self.commentlist.append(self.getcomment())
613 elif self.field[self.pos] == '[':
614 sdlist.append(self.getdomainliteral())
615 elif self.field[self.pos] == '.':
616 self.pos = self.pos + 1
617 sdlist.append('.')
618 elif self.field[self.pos] in self.atomends:
619 break
620 else: sdlist.append(self.getatom())
621
622 return string.join(sdlist, '')
623
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000624 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000625 """Parse a header fragment delimited by special characters.
626
627 `beginchar' is the start character for the fragment.
628 If self is not looking at an instance of `beginchar' then
629 getdelimited returns the empty string.
630
631 `endchars' is a sequence of allowable end-delimiting characters.
632 Parsing stops when one of these is encountered.
633
634 If `allowcomments' is non-zero, embedded RFC-822 comments
635 are allowed within the parsed fragment.
636 """
637 if self.field[self.pos] != beginchar:
638 return ''
639
640 slist = ['']
641 quote = 0
642 self.pos = self.pos + 1
643 while self.pos < len(self.field):
644 if quote == 1:
645 slist.append(self.field[self.pos])
646 quote = 0
647 elif self.field[self.pos] in endchars:
648 self.pos = self.pos + 1
649 break
650 elif allowcomments and self.field[self.pos] == '(':
651 slist.append(self.getcomment())
652 elif self.field[self.pos] == '\\':
653 quote = 1
654 else:
655 slist.append(self.field[self.pos])
656 self.pos = self.pos + 1
657
658 return string.join(slist, '')
659
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000660 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000661 """Get a quote-delimited fragment from self's field."""
662 return self.getdelimited('"', '"\r', 0)
663
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000664 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000665 """Get a parenthesis-delimited fragment from self's field."""
666 return self.getdelimited('(', ')\r', 1)
667
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000668 def getdomainliteral(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000669 """Parse an RFC-822 domain-literal."""
670 return self.getdelimited('[', ']\r', 0)
671
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000672 def getatom(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000673 """Parse an RFC-822 atom."""
674 atomlist = ['']
675
676 while self.pos < len(self.field):
677 if self.field[self.pos] in self.atomends:
678 break
679 else: atomlist.append(self.field[self.pos])
680 self.pos = self.pos + 1
681
682 return string.join(atomlist, '')
683
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000684 def getphraselist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000685 """Parse a sequence of RFC-822 phrases.
686
687 A phrase is a sequence of words, which are in turn either
Guido van Rossume894fc01998-06-11 13:58:40 +0000688 RFC-822 atoms or quoted-strings. Phrases are canonicalized
689 by squeezing all runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000690 """
691 plist = []
692
693 while self.pos < len(self.field):
694 if self.field[self.pos] in self.LWS:
695 self.pos = self.pos + 1
696 elif self.field[self.pos] == '"':
697 plist.append(self.getquote())
698 elif self.field[self.pos] == '(':
699 self.commentlist.append(self.getcomment())
700 elif self.field[self.pos] in self.atomends:
701 break
702 else: plist.append(self.getatom())
703
704 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000705
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000706class AddressList(AddrlistClass):
707 """An AddressList encapsulates a list of parsed RFC822 addresses."""
708 def __init__(self, field):
709 AddrlistClass.__init__(self, field)
710 if field:
711 self.addresslist = self.getaddrlist()
712 else:
713 self.addresslist = []
714
715 def __len__(self):
716 return len(self.addresslist)
717
718 def __str__(self):
719 return string.joinfields(map(dump_address_pair, self.addresslist),", ")
720
721 def __add__(self, other):
722 # Set union
723 newaddr = AddressList(None)
724 newaddr.addresslist = self.addresslist[:]
725 for x in other.addresslist:
726 if not x in self.addresslist:
727 newaddr.addresslist.append(x)
728 return newaddr
729
730 def __sub__(self, other):
731 # Set difference
732 newaddr = AddressList(None)
733 for x in self.addresslist:
734 if not x in other.addresslist:
735 newaddr.addresslist.append(x)
736 return newaddr
737
Guido van Rossum81d10b41998-06-16 22:29:03 +0000738 def __getitem__(self, index):
739 # Make indexing, slices, and 'in' work
740 return self.addrlist[index]
741
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000742def dump_address_pair(pair):
743 """Dump a (name, address) pair in a canonicalized form."""
744 if pair[0]:
745 return '"' + pair[0] + '" <' + pair[1] + '>'
746 else:
747 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000748
749# Parse a date field
750
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000751_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
752 'aug', 'sep', 'oct', 'nov', 'dec',
753 'january', 'february', 'march', 'april', 'may', 'june', 'july',
754 'august', 'september', 'october', 'november', 'december']
755_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000756
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000757# The timezone table does not include the military time zones defined
758# in RFC822, other than Z. According to RFC1123, the description in
759# RFC822 gets the signs wrong, so we can't rely on any such time
760# zones. RFC1123 recommends that numeric timezone indicators be used
761# instead of timezone names.
762
763_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000764 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000765 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000766 'CST': -600, 'CDT': -500, # Central
767 'MST': -700, 'MDT': -600, # Mountain
768 'PST': -800, 'PDT': -700 # Pacific
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000769 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000770
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000771
772def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000773 """Convert a date string to a time tuple.
774
775 Accounts for military timezones.
776 """
777 data = string.split(data)
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000778 if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000779 # There's a dayname here. Skip it
780 del data[0]
781 if len(data) == 3: # RFC 850 date, deprecated
782 stuff = string.split(data[0], '-')
783 if len(stuff) == 3:
784 data = stuff + data[1:]
785 if len(data) == 4:
786 s = data[3]
787 i = string.find(s, '+')
788 if i > 0:
789 data[3:] = [s[:i], s[i+1:]]
790 else:
791 data.append('') # Dummy tz
792 if len(data) < 5:
793 return None
794 data = data[:5]
795 [dd, mm, yy, tm, tz] = data
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000796 mm = string.lower(mm)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000797 if not mm in _monthnames:
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000798 dd, mm = mm, string.lower(dd)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000799 if not mm in _monthnames:
800 return None
801 mm = _monthnames.index(mm)+1
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000802 if dd[-1] == ',':
803 dd = dd[:-1]
804 i = string.find(yy, ':')
805 if i > 0:
806 yy, tm = tm, yy
807 if yy[-1] == ',':
808 yy = yy[:-1]
809 if yy[0] not in string.digits:
810 yy, tz = tz, yy
811 if tm[-1] == ',':
812 tm = tm[:-1]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000813 tm = string.splitfields(tm, ':')
814 if len(tm) == 2:
815 [thh, tmm] = tm
816 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000817 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000818 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000819 else:
820 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000821 try:
822 yy = string.atoi(yy)
823 dd = string.atoi(dd)
824 thh = string.atoi(thh)
825 tmm = string.atoi(tmm)
826 tss = string.atoi(tss)
827 except string.atoi_error:
828 return None
Guido van Rossuma73033f1998-02-19 00:28:58 +0000829 tzoffset=None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000830 tz=string.upper(tz)
831 if _timezones.has_key(tz):
832 tzoffset=_timezones[tz]
833 else:
834 try:
835 tzoffset=string.atoi(tz)
836 except string.atoi_error:
837 pass
838 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000839 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000840 if tzoffset < 0:
841 tzsign = -1
842 tzoffset = -tzoffset
843 else:
844 tzsign = 1
845 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000846 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
847 return tuple
848
Guido van Rossumb6775db1994-08-01 11:34:53 +0000849
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000850def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000851 """Convert a time string to a time tuple."""
852 t=parsedate_tz(data)
853 if type(t)==type( () ):
854 return t[:9]
855 else: return t
856
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000857
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000858def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000859 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000860 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000861 # No zone info, so localtime is better assumption than GMT
862 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000863 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000864 t = time.mktime(data[:8] + (0,))
865 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000866
Guido van Rossumb6775db1994-08-01 11:34:53 +0000867
868# When used as script, run a small test program.
869# The first command line argument must be a filename containing one
870# message in RFC-822 format.
871
872if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000873 import sys, os
874 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
875 if sys.argv[1:]: file = sys.argv[1]
876 f = open(file, 'r')
877 m = Message(f)
878 print 'From:', m.getaddr('from')
879 print 'To:', m.getaddrlist('to')
880 print 'Subject:', m.getheader('subject')
881 print 'Date:', m.getheader('date')
882 date = m.getdate_tz('date')
883 if date:
884 print 'ParsedDate:', time.asctime(date[:-1]),
885 hhmmss = date[-1]
886 hhmm, ss = divmod(hhmmss, 60)
887 hh, mm = divmod(hhmm, 60)
888 print "%+03d%02d" % (hh, mm),
889 if ss: print ".%02d" % ss,
890 print
891 else:
892 print 'ParsedDate:', None
893 m.rewindbody()
894 n = 0
895 while f.readline():
896 n = n + 1
897 print 'Lines:', n
898 print '-'*70
899 print 'len =', len(m)
900 if m.has_key('Date'): print 'Date =', m['Date']
901 if m.has_key('X-Nonsense'): pass
902 print 'keys =', m.keys()
903 print 'values =', m.values()
904 print 'items =', m.items()