blob: 4382a51b70d30ebdd4130db99cfeccc0114edc32 [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Benjamin Peterson26305a02008-06-12 22:33:06 +000076from warnings import warnpy3k
77warnpy3k("in 3.x, rfc822 has been removed in favor of the email package")
78
Skip Montanaro0de65802001-02-15 22:15:14 +000079__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000080
Guido van Rossum9ab94c11997-12-10 16:17:39 +000081_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000082
83
Guido van Rossum01ca3361992-07-13 14:28:59 +000084class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000085 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000086
Guido van Rossum9ab94c11997-12-10 16:17:39 +000087 def __init__(self, fp, seekable = 1):
88 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000089 if seekable == 1:
90 # Exercise tell() to make sure it works
91 # (and then assume seek() works, too)
92 try:
93 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000094 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000095 seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +000096 self.fp = fp
97 self.seekable = seekable
98 self.startofheaders = None
99 self.startofbody = None
100 #
101 if self.seekable:
102 try:
103 self.startofheaders = self.fp.tell()
104 except IOError:
105 self.seekable = 0
106 #
107 self.readheaders()
108 #
109 if self.seekable:
110 try:
111 self.startofbody = self.fp.tell()
112 except IOError:
113 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000114
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000115 def rewindbody(self):
116 """Rewind the file to the start of the body (if seekable)."""
117 if not self.seekable:
118 raise IOError, "unseekable file"
119 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000120
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000121 def readheaders(self):
122 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000123
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000124 Read header lines up to the entirely blank line that terminates them.
125 The (normally blank) line that ends the headers is skipped, but not
126 included in the returned list. If a non-header line ends the headers,
127 (which is an error), an attempt is made to backspace over it; it is
128 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000129
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000130 The variable self.status is set to the empty string if all went well,
131 otherwise it is an error message. The variable self.headers is a
132 completely uninterpreted list of lines contained in the header (so
133 printing them will reproduce the header exactly as it appears in the
134 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000135 """
136 self.dict = {}
137 self.unixfrom = ''
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000138 self.headers = lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000139 self.status = ''
140 headerseen = ""
141 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000142 startofline = unread = tell = None
143 if hasattr(self.fp, 'unread'):
144 unread = self.fp.unread
145 elif self.seekable:
146 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000147 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000148 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000149 try:
150 startofline = tell()
151 except IOError:
152 startofline = tell = None
153 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000154 line = self.fp.readline()
155 if not line:
156 self.status = 'EOF in headers'
157 break
158 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000159 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000160 self.unixfrom = self.unixfrom + line
161 continue
162 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000163 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000164 # It's a continuation line.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000165 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000166 x = (self.dict[headerseen] + "\n " + line.strip())
167 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000168 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000169 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000170 # It's a comment. Ignore it.
171 continue
172 elif self.islast(line):
173 # Note! No pushback here! The delimiter line gets eaten.
174 break
175 headerseen = self.isheader(line)
176 if headerseen:
177 # It's a legal header line, save it.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000178 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000179 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000180 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000181 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000182 # It's not a header line; throw it back and stop here.
183 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000184 self.status = 'No headers'
185 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000186 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000187 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000188 if unread:
189 unread(line)
190 elif tell:
191 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000192 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000193 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000194 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000195
196 def isheader(self, line):
197 """Determine whether a given line is a legal header.
198
199 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000200 You may override this method in order to use Message parsing on tagged
201 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000202 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000203 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000204 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000205 return line[:i].lower()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000206 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000207
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000208 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000209 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000210
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000211 You may override this method if your application wants to bend the
212 rules, e.g. to strip trailing whitespace, or to recognize MH template
213 separators ('--------'). For convenience (e.g. for code reading from
214 sockets) a line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000215 """
216 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000217
218 def iscomment(self, line):
219 """Determine whether a line should be skipped entirely.
220
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000221 You may override this method in order to use Message parsing on tagged
222 data in RFC 2822-like formats that support embedded comments or
223 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000224 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000225 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000226
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000227 def getallmatchingheaders(self, name):
228 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000229
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000230 Look through the list of headers and find all lines matching a given
231 header name (and their continuation lines). A list of the lines is
232 returned, without interpretation. If the header does not occur, an
233 empty list is returned. If the header occurs multiple times, all
234 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000235 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000236 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000237 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000238 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000239 hit = 0
240 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000241 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000242 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000243 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000244 hit = 0
245 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000246 lst.append(line)
247 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000248
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000249 def getfirstmatchingheader(self, name):
250 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000251
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000252 This is similar to getallmatchingheaders, but it returns only the
253 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000254 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000255 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000256 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000257 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000258 hit = 0
259 for line in self.headers:
260 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000261 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000262 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000263 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000264 hit = 1
265 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000266 lst.append(line)
267 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000268
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000269 def getrawheader(self, name):
270 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000271
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000272 Return a string containing the literal text of the header but with the
273 keyword stripped. All leading, trailing and embedded whitespace is
274 kept in the string, however. Return None if the header does not
275 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000276 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000277
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000278 lst = self.getfirstmatchingheader(name)
279 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000280 return None
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000281 lst[0] = lst[0][len(name) + 1:]
282 return ''.join(lst)
Tim Peters0c9886d2001-01-15 01:18:21 +0000283
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000284 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000285 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000286
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000287 This is the normal interface: it returns a stripped version of the
288 header value for a given header name, or None if it doesn't exist.
289 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000290 """
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000291 return self.dict.get(name.lower(), default)
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000292 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000293
294 def getheaders(self, name):
295 """Get all values for a header.
296
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000297 This returns a list of values for headers given more than once; each
298 value in the result list is stripped in the same way as the result of
299 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000300 """
301 result = []
302 current = ''
303 have_header = 0
304 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000305 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000306 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000307 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000308 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000309 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000310 else:
311 if have_header:
312 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000313 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000314 have_header = 1
315 if have_header:
316 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000317 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000318
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000319 def getaddr(self, name):
320 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000321
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000322 An example return value:
323 ('Guido van Rossum', 'guido@cwi.nl')
324 """
325 # New, by Ben Escoto
326 alist = self.getaddrlist(name)
327 if alist:
328 return alist[0]
329 else:
330 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000331
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000332 def getaddrlist(self, name):
333 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000334
335 Retrieves a list of addresses from a header, where each address is a
336 tuple as returned by getaddr(). Scans all named headers, so it works
337 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000338 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000339 raw = []
340 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000341 if h[0] in ' \t':
342 raw.append(h)
343 else:
344 if raw:
345 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000346 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000347 if i > 0:
348 addr = h[i+1:]
349 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000350 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000351 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000352 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000353
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000354 def getdate(self, name):
355 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000356
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000357 Retrieves a date field from the named header, returning a tuple
358 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000359 """
360 try:
361 data = self[name]
362 except KeyError:
363 return None
364 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000365
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000366 def getdate_tz(self, name):
367 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000368
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000369 The first 9 elements make up a tuple compatible with time.mktime(),
370 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000371 """
372 try:
373 data = self[name]
374 except KeyError:
375 return None
376 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000377
378
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000379 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000380
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000381 def __len__(self):
382 """Get the number of headers in a message."""
383 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000384
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000385 def __getitem__(self, name):
386 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000387 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000388
389 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000390 """Set the value of a header.
391
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000392 Note: This is not a perfect inversion of __getitem__, because any
393 changed headers get stuck at the end of the raw-headers list rather
394 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000395 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000396 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000397 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000398 text = name + ": " + value
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000399 for line in text.split("\n"):
400 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000401
Guido van Rossum75d92c11998-04-02 21:33:20 +0000402 def __delitem__(self, name):
403 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000404 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000405 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000406 return
407 del self.dict[name]
408 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000409 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000410 lst = []
Guido van Rossum75d92c11998-04-02 21:33:20 +0000411 hit = 0
412 for i in range(len(self.headers)):
413 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000414 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000415 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000416 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000417 hit = 0
418 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000419 lst.append(i)
420 for i in reversed(lst):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000421 del self.headers[i]
422
Fred Drake233226e2001-05-22 19:36:50 +0000423 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000424 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000425 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000426 return self.dict[lowername]
427 else:
Fred Drake233226e2001-05-22 19:36:50 +0000428 text = name + ": " + default
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000429 for line in text.split("\n"):
430 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000431 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000432 return default
433
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000434 def has_key(self, name):
435 """Determine whether a message contains the named header."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000436 return name.lower() in self.dict
437
438 def __contains__(self, name):
439 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000440 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000441
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000442 def __iter__(self):
443 return iter(self.dict)
444
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000445 def keys(self):
446 """Get all of a message's header field names."""
447 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000448
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000449 def values(self):
450 """Get all of a message's header field values."""
451 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000452
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000453 def items(self):
454 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000455
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000456 Returns a list of name, value tuples.
457 """
458 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000459
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000460 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000461 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000462
463
464# Utility functions
465# -----------------
466
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000467# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000468# XXX The inverses of the parse functions may also be useful.
469
Guido van Rossum01ca3361992-07-13 14:28:59 +0000470
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000471def unquote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000472 """Remove quotes from a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000473 if len(s) > 1:
474 if s.startswith('"') and s.endswith('"'):
475 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
476 if s.startswith('<') and s.endswith('>'):
477 return s[1:-1]
478 return s
Guido van Rossumb6775db1994-08-01 11:34:53 +0000479
480
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000481def quote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000482 """Add quotes around a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000483 return s.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000484
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000485
Guido van Rossumb6775db1994-08-01 11:34:53 +0000486def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000487 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000488 a = AddressList(address)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000489 lst = a.addresslist
490 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000491 return (None, None)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000492 return lst[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000493
494
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000495class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000496 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000497
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000498 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000499 RFC 2822 in front of you.
500
501 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000502
503 Note: this class interface is deprecated and may be removed in the future.
504 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000505 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000506
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000507 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000508 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000509
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000510 `field' is an unparsed address header field, containing one or more
511 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000512 """
513 self.specials = '()<>@,:;.\"[]'
514 self.pos = 0
515 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000516 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000517 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000518 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
519 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
520 # syntax, so allow dots in phrases.
521 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000522 self.field = field
523 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000524
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000525 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000526 """Parse up to the start of the next address."""
527 while self.pos < len(self.field):
528 if self.field[self.pos] in self.LWS + '\n\r':
529 self.pos = self.pos + 1
530 elif self.field[self.pos] == '(':
531 self.commentlist.append(self.getcomment())
532 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000533
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000534 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000535 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000536
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000537 Returns a list containing all of the addresses.
538 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000539 result = []
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000540 ad = self.getaddress()
541 while ad:
542 result += ad
Barry Warsawf1fd2822001-11-13 21:30:37 +0000543 ad = self.getaddress()
Barry Warsawf1fd2822001-11-13 21:30:37 +0000544 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000545
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000546 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000547 """Parse the next address."""
548 self.commentlist = []
549 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000550
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000551 oldpos = self.pos
552 oldcl = self.commentlist
553 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000554
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000555 self.gotonext()
556 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000557
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000558 if self.pos >= len(self.field):
559 # Bad email address technically, no domain.
560 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000561 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000562
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000563 elif self.field[self.pos] in '.@':
564 # email address is just an addrspec
565 # this isn't very efficient since we start over
566 self.pos = oldpos
567 self.commentlist = oldcl
568 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000569 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000570
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000571 elif self.field[self.pos] == ':':
572 # address is a group
573 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000574
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000575 fieldlen = len(self.field)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000576 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000577 while self.pos < len(self.field):
578 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000579 if self.pos < fieldlen and self.field[self.pos] == ';':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000580 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000581 break
582 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000583
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000584 elif self.field[self.pos] == '<':
585 # Address is a phrase then a route addr
586 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000587
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000588 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000589 returnlist = [(' '.join(plist) + ' (' + \
590 ' '.join(self.commentlist) + ')', routeaddr)]
591 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000592
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000593 else:
594 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000595 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000596 elif self.field[self.pos] in self.specials:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000597 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000598
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000599 self.gotonext()
600 if self.pos < len(self.field) and self.field[self.pos] == ',':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000601 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000602 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000603
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000604 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000605 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000606
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000607 This method just skips all the route stuff and returns the addrspec.
608 """
609 if self.field[self.pos] != '<':
610 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000611
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000612 expectroute = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000613 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000614 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000615 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000616 while self.pos < len(self.field):
617 if expectroute:
618 self.getdomain()
619 expectroute = 0
620 elif self.field[self.pos] == '>':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000621 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 break
623 elif self.field[self.pos] == '@':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000624 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000625 expectroute = 1
626 elif self.field[self.pos] == ':':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000627 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000628 else:
629 adlist = self.getaddrspec()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000630 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000631 break
632 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000633
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000634 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000635
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000636 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000637 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000638 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000639
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000640 self.gotonext()
641 while self.pos < len(self.field):
642 if self.field[self.pos] == '.':
643 aslist.append('.')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000644 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000645 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000646 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000647 elif self.field[self.pos] in self.atomends:
648 break
649 else: aslist.append(self.getatom())
650 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000651
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000652 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000653 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000654
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000655 aslist.append('@')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000656 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000657 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000658 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000659
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000660 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000661 """Get the complete domain name from an address."""
662 sdlist = []
663 while self.pos < len(self.field):
664 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000665 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000666 elif self.field[self.pos] == '(':
667 self.commentlist.append(self.getcomment())
668 elif self.field[self.pos] == '[':
669 sdlist.append(self.getdomainliteral())
670 elif self.field[self.pos] == '.':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000671 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000672 sdlist.append('.')
673 elif self.field[self.pos] in self.atomends:
674 break
675 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000676 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000677
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000678 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000679 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000680
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000681 `beginchar' is the start character for the fragment. If self is not
682 looking at an instance of `beginchar' then getdelimited returns the
683 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000684
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000685 `endchars' is a sequence of allowable end-delimiting characters.
686 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000687
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000688 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
689 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000690 """
691 if self.field[self.pos] != beginchar:
692 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000693
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000694 slist = ['']
695 quote = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000696 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000697 while self.pos < len(self.field):
698 if quote == 1:
699 slist.append(self.field[self.pos])
700 quote = 0
701 elif self.field[self.pos] in endchars:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000702 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000703 break
704 elif allowcomments and self.field[self.pos] == '(':
705 slist.append(self.getcomment())
Barry Warsawdbcc8d92006-05-01 03:03:02 +0000706 continue # have already advanced pos from getcomment
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000707 elif self.field[self.pos] == '\\':
708 quote = 1
709 else:
710 slist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000711 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000712
Guido van Rossumc80f1822000-12-15 15:37:48 +0000713 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000714
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000715 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000716 """Get a quote-delimited fragment from self's field."""
717 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000718
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000719 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000720 """Get a parenthesis-delimited fragment from self's field."""
721 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000722
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000723 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000724 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000725 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000726
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000727 def getatom(self, atomends=None):
728 """Parse an RFC 2822 atom.
729
730 Optional atomends specifies a different set of end token delimiters
731 (the default is to use self.atomends). This is used e.g. in
732 getphraselist() since phrase endings must not include the `.' (which
733 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000734 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000735 if atomends is None:
736 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000737
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000738 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000739 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000740 break
741 else: atomlist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000742 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000743
Guido van Rossumc80f1822000-12-15 15:37:48 +0000744 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000745
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000746 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000747 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000748
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000749 A phrase is a sequence of words, which are in turn either RFC 2822
750 atoms or quoted-strings. Phrases are canonicalized by squeezing all
751 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000752 """
753 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000754
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000755 while self.pos < len(self.field):
756 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000757 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000758 elif self.field[self.pos] == '"':
759 plist.append(self.getquote())
760 elif self.field[self.pos] == '(':
761 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000762 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000763 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000764 else:
765 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000766
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000767 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000768
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000769class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000770 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000771 def __init__(self, field):
772 AddrlistClass.__init__(self, field)
773 if field:
774 self.addresslist = self.getaddrlist()
775 else:
776 self.addresslist = []
777
778 def __len__(self):
779 return len(self.addresslist)
780
781 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000782 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000783
784 def __add__(self, other):
785 # Set union
786 newaddr = AddressList(None)
787 newaddr.addresslist = self.addresslist[:]
788 for x in other.addresslist:
789 if not x in self.addresslist:
790 newaddr.addresslist.append(x)
791 return newaddr
792
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000793 def __iadd__(self, other):
794 # Set union, in-place
795 for x in other.addresslist:
796 if not x in self.addresslist:
797 self.addresslist.append(x)
798 return self
799
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000800 def __sub__(self, other):
801 # Set difference
802 newaddr = AddressList(None)
803 for x in self.addresslist:
804 if not x in other.addresslist:
805 newaddr.addresslist.append(x)
806 return newaddr
807
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000808 def __isub__(self, other):
809 # Set difference, in-place
810 for x in other.addresslist:
811 if x in self.addresslist:
812 self.addresslist.remove(x)
813 return self
814
Guido van Rossum81d10b41998-06-16 22:29:03 +0000815 def __getitem__(self, index):
816 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000817 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000818
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000819def dump_address_pair(pair):
820 """Dump a (name, address) pair in a canonicalized form."""
821 if pair[0]:
822 return '"' + pair[0] + '" <' + pair[1] + '>'
823 else:
824 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000825
826# Parse a date field
827
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000828_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
829 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000830 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000831 'august', 'september', 'october', 'november', 'december']
832_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000833
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000834# The timezone table does not include the military time zones defined
835# in RFC822, other than Z. According to RFC1123, the description in
836# RFC822 gets the signs wrong, so we can't rely on any such time
837# zones. RFC1123 recommends that numeric timezone indicators be used
838# instead of timezone names.
839
Tim Peters0c9886d2001-01-15 01:18:21 +0000840_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000841 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000842 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000843 'CST': -600, 'CDT': -500, # Central
844 'MST': -700, 'MDT': -600, # Mountain
845 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000846 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000847
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000848
849def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000850 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000851
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000852 Accounts for military timezones.
853 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000854 if not data:
855 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000856 data = data.split()
857 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000858 # There's a dayname here. Skip it
859 del data[0]
Georg Brandl62634952007-01-22 21:10:33 +0000860 else:
861 # no space after the "weekday,"?
862 i = data[0].rfind(',')
863 if i >= 0:
864 data[0] = data[0][i+1:]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000865 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000866 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000867 if len(stuff) == 3:
868 data = stuff + data[1:]
869 if len(data) == 4:
870 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000871 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000872 if i > 0:
873 data[3:] = [s[:i], s[i+1:]]
874 else:
875 data.append('') # Dummy tz
876 if len(data) < 5:
877 return None
878 data = data[:5]
879 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000880 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000881 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000882 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000883 if not mm in _monthnames:
884 return None
885 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000886 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000887 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000888 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000889 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000890 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000891 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000892 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000893 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000894 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000895 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000896 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000897 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000898 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000899 if len(tm) == 2:
900 [thh, tmm] = tm
901 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000902 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000903 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000904 else:
905 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000906 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000907 yy = int(yy)
908 dd = int(dd)
909 thh = int(thh)
910 tmm = int(tmm)
911 tss = int(tss)
912 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000913 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000914 tzoffset = None
915 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000916 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000917 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000918 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000919 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000920 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000921 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000922 pass
923 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000924 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000925 if tzoffset < 0:
926 tzsign = -1
927 tzoffset = -tzoffset
928 else:
929 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000930 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000931 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000932
Guido van Rossumb6775db1994-08-01 11:34:53 +0000933
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000934def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000935 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000936 t = parsedate_tz(data)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000937 if t is None:
938 return t
939 return t[:9]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000940
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000941
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000942def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000943 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000944 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000945 # No zone info, so localtime is better assumption than GMT
946 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000947 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000948 t = time.mktime(data[:8] + (0,))
949 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000950
Guido van Rossum247a78a1999-04-19 18:04:38 +0000951def formatdate(timeval=None):
952 """Returns time format preferred for Internet standards.
953
954 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000955
956 According to RFC 1123, day and month names must always be in
957 English. If not for that, this code could use strftime(). It
958 can't because strftime() honors the locale and could generated
959 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000960 """
961 if timeval is None:
962 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000963 timeval = time.gmtime(timeval)
964 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000965 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000966 timeval[2],
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000967 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
968 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000969 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000970
Guido van Rossumb6775db1994-08-01 11:34:53 +0000971
972# When used as script, run a small test program.
973# The first command line argument must be a filename containing one
974# message in RFC-822 format.
975
976if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000977 import sys, os
978 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
979 if sys.argv[1:]: file = sys.argv[1]
980 f = open(file, 'r')
981 m = Message(f)
982 print 'From:', m.getaddr('from')
983 print 'To:', m.getaddrlist('to')
984 print 'Subject:', m.getheader('subject')
985 print 'Date:', m.getheader('date')
986 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000987 tz = date[-1]
988 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000989 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000990 print 'ParsedDate:', time.asctime(date),
991 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000992 hhmm, ss = divmod(hhmmss, 60)
993 hh, mm = divmod(hhmm, 60)
994 print "%+03d%02d" % (hh, mm),
995 if ss: print ".%02d" % ss,
996 print
997 else:
998 print 'ParsedDate:', None
999 m.rewindbody()
1000 n = 0
1001 while f.readline():
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +00001002 n += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001003 print 'Lines:', n
1004 print '-'*70
1005 print 'len =', len(m)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001006 if 'Date' in m: print 'Date =', m['Date']
1007 if 'X-Nonsense' in m: pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001008 print 'keys =', m.keys()
1009 print 'values =', m.values()
1010 print 'items =', m.items()