blob: 64cd702cd0c6b7cef1d9e4adf215a69bfd823a3f [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Benjamin Peterson26305a02008-06-12 22:33:06 +000076from warnings import warnpy3k
Benjamin Petersona6864e02008-07-14 17:42:17 +000077warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
78 stacklevel=2)
Benjamin Peterson26305a02008-06-12 22:33:06 +000079
Skip Montanaro0de65802001-02-15 22:15:14 +000080__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000081
Guido van Rossum9ab94c11997-12-10 16:17:39 +000082_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000083
84
Guido van Rossum01ca3361992-07-13 14:28:59 +000085class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000086 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000087
Guido van Rossum9ab94c11997-12-10 16:17:39 +000088 def __init__(self, fp, seekable = 1):
89 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000090 if seekable == 1:
91 # Exercise tell() to make sure it works
92 # (and then assume seek() works, too)
93 try:
94 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000095 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000096 seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +000097 self.fp = fp
98 self.seekable = seekable
99 self.startofheaders = None
100 self.startofbody = None
101 #
102 if self.seekable:
103 try:
104 self.startofheaders = self.fp.tell()
105 except IOError:
106 self.seekable = 0
107 #
108 self.readheaders()
109 #
110 if self.seekable:
111 try:
112 self.startofbody = self.fp.tell()
113 except IOError:
114 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000115
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000116 def rewindbody(self):
117 """Rewind the file to the start of the body (if seekable)."""
118 if not self.seekable:
119 raise IOError, "unseekable file"
120 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000121
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000122 def readheaders(self):
123 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000124
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000125 Read header lines up to the entirely blank line that terminates them.
126 The (normally blank) line that ends the headers is skipped, but not
127 included in the returned list. If a non-header line ends the headers,
128 (which is an error), an attempt is made to backspace over it; it is
129 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000130
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000131 The variable self.status is set to the empty string if all went well,
132 otherwise it is an error message. The variable self.headers is a
133 completely uninterpreted list of lines contained in the header (so
134 printing them will reproduce the header exactly as it appears in the
135 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000136 """
137 self.dict = {}
138 self.unixfrom = ''
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000139 self.headers = lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000140 self.status = ''
141 headerseen = ""
142 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000143 startofline = unread = tell = None
144 if hasattr(self.fp, 'unread'):
145 unread = self.fp.unread
146 elif self.seekable:
147 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000148 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000149 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000150 try:
151 startofline = tell()
152 except IOError:
153 startofline = tell = None
154 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000155 line = self.fp.readline()
156 if not line:
157 self.status = 'EOF in headers'
158 break
159 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000160 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000161 self.unixfrom = self.unixfrom + line
162 continue
163 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000164 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000165 # It's a continuation line.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000166 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000167 x = (self.dict[headerseen] + "\n " + line.strip())
168 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000169 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000170 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000171 # It's a comment. Ignore it.
172 continue
173 elif self.islast(line):
174 # Note! No pushback here! The delimiter line gets eaten.
175 break
176 headerseen = self.isheader(line)
177 if headerseen:
178 # It's a legal header line, save it.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000179 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000180 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000181 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000182 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000183 # It's not a header line; throw it back and stop here.
184 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000185 self.status = 'No headers'
186 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000187 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000188 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000189 if unread:
190 unread(line)
191 elif tell:
192 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000193 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000194 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000195 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000196
197 def isheader(self, line):
198 """Determine whether a given line is a legal header.
199
200 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000201 You may override this method in order to use Message parsing on tagged
202 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000203 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000204 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000205 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000206 return line[:i].lower()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000207 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000208
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000209 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000210 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000211
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000212 You may override this method if your application wants to bend the
213 rules, e.g. to strip trailing whitespace, or to recognize MH template
214 separators ('--------'). For convenience (e.g. for code reading from
215 sockets) a line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000216 """
217 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000218
219 def iscomment(self, line):
220 """Determine whether a line should be skipped entirely.
221
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000222 You may override this method in order to use Message parsing on tagged
223 data in RFC 2822-like formats that support embedded comments or
224 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000225 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000226 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000227
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000228 def getallmatchingheaders(self, name):
229 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000230
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000231 Look through the list of headers and find all lines matching a given
232 header name (and their continuation lines). A list of the lines is
233 returned, without interpretation. If the header does not occur, an
234 empty list is returned. If the header occurs multiple times, all
235 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000236 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000237 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000238 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000239 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000240 hit = 0
241 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000242 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000243 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000244 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000245 hit = 0
246 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000247 lst.append(line)
248 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000249
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000250 def getfirstmatchingheader(self, name):
251 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000252
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000253 This is similar to getallmatchingheaders, but it returns only the
254 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000256 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000257 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000258 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000259 hit = 0
260 for line in self.headers:
261 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000262 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000263 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000264 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000265 hit = 1
266 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000267 lst.append(line)
268 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000269
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000270 def getrawheader(self, name):
271 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000272
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000273 Return a string containing the literal text of the header but with the
274 keyword stripped. All leading, trailing and embedded whitespace is
275 kept in the string, however. Return None if the header does not
276 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000278
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000279 lst = self.getfirstmatchingheader(name)
280 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000281 return None
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000282 lst[0] = lst[0][len(name) + 1:]
283 return ''.join(lst)
Tim Peters0c9886d2001-01-15 01:18:21 +0000284
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000285 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000286 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000287
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000288 This is the normal interface: it returns a stripped version of the
289 header value for a given header name, or None if it doesn't exist.
290 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000291 """
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000292 return self.dict.get(name.lower(), default)
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000293 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000294
295 def getheaders(self, name):
296 """Get all values for a header.
297
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000298 This returns a list of values for headers given more than once; each
299 value in the result list is stripped in the same way as the result of
300 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000301 """
302 result = []
303 current = ''
304 have_header = 0
305 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000306 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000307 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000308 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000309 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 else:
312 if have_header:
313 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000314 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000315 have_header = 1
316 if have_header:
317 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000318 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000319
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000320 def getaddr(self, name):
321 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000322
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000323 An example return value:
324 ('Guido van Rossum', 'guido@cwi.nl')
325 """
326 # New, by Ben Escoto
327 alist = self.getaddrlist(name)
328 if alist:
329 return alist[0]
330 else:
331 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000332
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000333 def getaddrlist(self, name):
334 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000335
336 Retrieves a list of addresses from a header, where each address is a
337 tuple as returned by getaddr(). Scans all named headers, so it works
338 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000339 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000340 raw = []
341 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000342 if h[0] in ' \t':
343 raw.append(h)
344 else:
345 if raw:
346 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000347 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000348 if i > 0:
349 addr = h[i+1:]
350 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000351 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000352 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000353 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000354
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000355 def getdate(self, name):
356 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000357
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000358 Retrieves a date field from the named header, returning a tuple
359 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000360 """
361 try:
362 data = self[name]
363 except KeyError:
364 return None
365 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000366
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000367 def getdate_tz(self, name):
368 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000369
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000370 The first 9 elements make up a tuple compatible with time.mktime(),
371 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000372 """
373 try:
374 data = self[name]
375 except KeyError:
376 return None
377 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000378
379
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000380 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000381
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000382 def __len__(self):
383 """Get the number of headers in a message."""
384 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000385
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000386 def __getitem__(self, name):
387 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000388 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000389
390 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000391 """Set the value of a header.
392
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000393 Note: This is not a perfect inversion of __getitem__, because any
394 changed headers get stuck at the end of the raw-headers list rather
395 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000396 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000397 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000398 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000399 text = name + ": " + value
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000400 for line in text.split("\n"):
401 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000402
Guido van Rossum75d92c11998-04-02 21:33:20 +0000403 def __delitem__(self, name):
404 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000405 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000406 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000407 return
408 del self.dict[name]
409 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000410 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000411 lst = []
Guido van Rossum75d92c11998-04-02 21:33:20 +0000412 hit = 0
413 for i in range(len(self.headers)):
414 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000415 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000416 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000417 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000418 hit = 0
419 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000420 lst.append(i)
421 for i in reversed(lst):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000422 del self.headers[i]
423
Fred Drake233226e2001-05-22 19:36:50 +0000424 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000425 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000426 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000427 return self.dict[lowername]
428 else:
Fred Drake233226e2001-05-22 19:36:50 +0000429 text = name + ": " + default
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000430 for line in text.split("\n"):
431 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000432 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000433 return default
434
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000435 def has_key(self, name):
436 """Determine whether a message contains the named header."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000437 return name.lower() in self.dict
438
439 def __contains__(self, name):
440 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000441 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000442
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000443 def __iter__(self):
444 return iter(self.dict)
445
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000446 def keys(self):
447 """Get all of a message's header field names."""
448 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000449
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000450 def values(self):
451 """Get all of a message's header field values."""
452 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000453
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000454 def items(self):
455 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000456
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000457 Returns a list of name, value tuples.
458 """
459 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000460
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000461 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000462 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000463
464
465# Utility functions
466# -----------------
467
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000468# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000469# XXX The inverses of the parse functions may also be useful.
470
Guido van Rossum01ca3361992-07-13 14:28:59 +0000471
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000472def unquote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000473 """Remove quotes from a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000474 if len(s) > 1:
475 if s.startswith('"') and s.endswith('"'):
476 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
477 if s.startswith('<') and s.endswith('>'):
478 return s[1:-1]
479 return s
Guido van Rossumb6775db1994-08-01 11:34:53 +0000480
481
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000482def quote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000483 """Add quotes around a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000484 return s.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000485
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000486
Guido van Rossumb6775db1994-08-01 11:34:53 +0000487def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000488 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000489 a = AddressList(address)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000490 lst = a.addresslist
491 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000492 return (None, None)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000493 return lst[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000494
495
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000496class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000497 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000498
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000499 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000500 RFC 2822 in front of you.
501
502 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000503
504 Note: this class interface is deprecated and may be removed in the future.
505 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000506 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000507
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000508 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000509 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000510
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000511 `field' is an unparsed address header field, containing one or more
512 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000513 """
514 self.specials = '()<>@,:;.\"[]'
515 self.pos = 0
516 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000517 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000518 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000519 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
520 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
521 # syntax, so allow dots in phrases.
522 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000523 self.field = field
524 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000525
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000526 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000527 """Parse up to the start of the next address."""
528 while self.pos < len(self.field):
529 if self.field[self.pos] in self.LWS + '\n\r':
530 self.pos = self.pos + 1
531 elif self.field[self.pos] == '(':
532 self.commentlist.append(self.getcomment())
533 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000534
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000535 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000536 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000537
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000538 Returns a list containing all of the addresses.
539 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000540 result = []
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000541 ad = self.getaddress()
542 while ad:
543 result += ad
Barry Warsawf1fd2822001-11-13 21:30:37 +0000544 ad = self.getaddress()
Barry Warsawf1fd2822001-11-13 21:30:37 +0000545 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000546
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000547 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000548 """Parse the next address."""
549 self.commentlist = []
550 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000551
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000552 oldpos = self.pos
553 oldcl = self.commentlist
554 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000555
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000556 self.gotonext()
557 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000558
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000559 if self.pos >= len(self.field):
560 # Bad email address technically, no domain.
561 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000562 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000563
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000564 elif self.field[self.pos] in '.@':
565 # email address is just an addrspec
566 # this isn't very efficient since we start over
567 self.pos = oldpos
568 self.commentlist = oldcl
569 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000570 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000571
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000572 elif self.field[self.pos] == ':':
573 # address is a group
574 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000575
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000576 fieldlen = len(self.field)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000577 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000578 while self.pos < len(self.field):
579 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000580 if self.pos < fieldlen and self.field[self.pos] == ';':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000581 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000582 break
583 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000584
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000585 elif self.field[self.pos] == '<':
586 # Address is a phrase then a route addr
587 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000588
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000589 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000590 returnlist = [(' '.join(plist) + ' (' + \
591 ' '.join(self.commentlist) + ')', routeaddr)]
592 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000593
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000594 else:
595 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000596 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000597 elif self.field[self.pos] in self.specials:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000598 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000599
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000600 self.gotonext()
601 if self.pos < len(self.field) and self.field[self.pos] == ',':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000602 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000603 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000604
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000605 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000606 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000607
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000608 This method just skips all the route stuff and returns the addrspec.
609 """
610 if self.field[self.pos] != '<':
611 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000612
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000613 expectroute = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000614 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000615 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000616 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000617 while self.pos < len(self.field):
618 if expectroute:
619 self.getdomain()
620 expectroute = 0
621 elif self.field[self.pos] == '>':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000622 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000623 break
624 elif self.field[self.pos] == '@':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000625 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000626 expectroute = 1
627 elif self.field[self.pos] == ':':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000628 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000629 else:
630 adlist = self.getaddrspec()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000631 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000632 break
633 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000634
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000635 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000636
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000637 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000638 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000639 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000640
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000641 self.gotonext()
642 while self.pos < len(self.field):
643 if self.field[self.pos] == '.':
644 aslist.append('.')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000645 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000646 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000647 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000648 elif self.field[self.pos] in self.atomends:
649 break
650 else: aslist.append(self.getatom())
651 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000652
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000653 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000654 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000655
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000656 aslist.append('@')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000657 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000658 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000659 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000660
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000661 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000662 """Get the complete domain name from an address."""
663 sdlist = []
664 while self.pos < len(self.field):
665 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000666 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000667 elif self.field[self.pos] == '(':
668 self.commentlist.append(self.getcomment())
669 elif self.field[self.pos] == '[':
670 sdlist.append(self.getdomainliteral())
671 elif self.field[self.pos] == '.':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000672 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000673 sdlist.append('.')
674 elif self.field[self.pos] in self.atomends:
675 break
676 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000677 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000678
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000679 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000680 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000681
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000682 `beginchar' is the start character for the fragment. If self is not
683 looking at an instance of `beginchar' then getdelimited returns the
684 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000685
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000686 `endchars' is a sequence of allowable end-delimiting characters.
687 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000688
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000689 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
690 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000691 """
692 if self.field[self.pos] != beginchar:
693 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000694
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000695 slist = ['']
696 quote = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000697 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000698 while self.pos < len(self.field):
699 if quote == 1:
700 slist.append(self.field[self.pos])
701 quote = 0
702 elif self.field[self.pos] in endchars:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000703 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000704 break
705 elif allowcomments and self.field[self.pos] == '(':
706 slist.append(self.getcomment())
Barry Warsawdbcc8d92006-05-01 03:03:02 +0000707 continue # have already advanced pos from getcomment
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000708 elif self.field[self.pos] == '\\':
709 quote = 1
710 else:
711 slist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000712 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000713
Guido van Rossumc80f1822000-12-15 15:37:48 +0000714 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000715
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000716 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000717 """Get a quote-delimited fragment from self's field."""
718 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000719
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000720 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000721 """Get a parenthesis-delimited fragment from self's field."""
722 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000723
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000724 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000725 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000726 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000727
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000728 def getatom(self, atomends=None):
729 """Parse an RFC 2822 atom.
730
731 Optional atomends specifies a different set of end token delimiters
732 (the default is to use self.atomends). This is used e.g. in
733 getphraselist() since phrase endings must not include the `.' (which
734 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000735 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000736 if atomends is None:
737 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000738
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000739 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000740 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000741 break
742 else: atomlist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000743 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000744
Guido van Rossumc80f1822000-12-15 15:37:48 +0000745 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000746
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000747 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000748 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000749
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000750 A phrase is a sequence of words, which are in turn either RFC 2822
751 atoms or quoted-strings. Phrases are canonicalized by squeezing all
752 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000753 """
754 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000755
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000756 while self.pos < len(self.field):
757 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000758 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000759 elif self.field[self.pos] == '"':
760 plist.append(self.getquote())
761 elif self.field[self.pos] == '(':
762 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000763 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000764 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000765 else:
766 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000767
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000768 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000769
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000770class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000771 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000772 def __init__(self, field):
773 AddrlistClass.__init__(self, field)
774 if field:
775 self.addresslist = self.getaddrlist()
776 else:
777 self.addresslist = []
778
779 def __len__(self):
780 return len(self.addresslist)
781
782 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000783 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000784
785 def __add__(self, other):
786 # Set union
787 newaddr = AddressList(None)
788 newaddr.addresslist = self.addresslist[:]
789 for x in other.addresslist:
790 if not x in self.addresslist:
791 newaddr.addresslist.append(x)
792 return newaddr
793
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000794 def __iadd__(self, other):
795 # Set union, in-place
796 for x in other.addresslist:
797 if not x in self.addresslist:
798 self.addresslist.append(x)
799 return self
800
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000801 def __sub__(self, other):
802 # Set difference
803 newaddr = AddressList(None)
804 for x in self.addresslist:
805 if not x in other.addresslist:
806 newaddr.addresslist.append(x)
807 return newaddr
808
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000809 def __isub__(self, other):
810 # Set difference, in-place
811 for x in other.addresslist:
812 if x in self.addresslist:
813 self.addresslist.remove(x)
814 return self
815
Guido van Rossum81d10b41998-06-16 22:29:03 +0000816 def __getitem__(self, index):
817 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000818 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000819
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000820def dump_address_pair(pair):
821 """Dump a (name, address) pair in a canonicalized form."""
822 if pair[0]:
823 return '"' + pair[0] + '" <' + pair[1] + '>'
824 else:
825 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000826
827# Parse a date field
828
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000829_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
830 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000831 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000832 'august', 'september', 'october', 'november', 'december']
833_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000834
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000835# The timezone table does not include the military time zones defined
836# in RFC822, other than Z. According to RFC1123, the description in
837# RFC822 gets the signs wrong, so we can't rely on any such time
838# zones. RFC1123 recommends that numeric timezone indicators be used
839# instead of timezone names.
840
Tim Peters0c9886d2001-01-15 01:18:21 +0000841_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000842 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000843 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000844 'CST': -600, 'CDT': -500, # Central
845 'MST': -700, 'MDT': -600, # Mountain
846 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000847 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000848
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000849
850def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000851 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000852
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000853 Accounts for military timezones.
854 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000855 if not data:
856 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000857 data = data.split()
858 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000859 # There's a dayname here. Skip it
860 del data[0]
Georg Brandl62634952007-01-22 21:10:33 +0000861 else:
862 # no space after the "weekday,"?
863 i = data[0].rfind(',')
864 if i >= 0:
865 data[0] = data[0][i+1:]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000866 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000867 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000868 if len(stuff) == 3:
869 data = stuff + data[1:]
870 if len(data) == 4:
871 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000872 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000873 if i > 0:
874 data[3:] = [s[:i], s[i+1:]]
875 else:
876 data.append('') # Dummy tz
877 if len(data) < 5:
878 return None
879 data = data[:5]
880 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000881 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000882 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000883 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000884 if not mm in _monthnames:
885 return None
886 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000887 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000888 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000889 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000890 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000891 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000892 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000893 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000894 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000895 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000896 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000897 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000898 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000899 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000900 if len(tm) == 2:
901 [thh, tmm] = tm
902 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000903 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000904 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000905 else:
906 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000907 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000908 yy = int(yy)
909 dd = int(dd)
910 thh = int(thh)
911 tmm = int(tmm)
912 tss = int(tss)
913 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000914 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000915 tzoffset = None
916 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000917 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000918 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000919 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000920 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000921 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000922 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000923 pass
924 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000925 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000926 if tzoffset < 0:
927 tzsign = -1
928 tzoffset = -tzoffset
929 else:
930 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000931 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000932 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000933
Guido van Rossumb6775db1994-08-01 11:34:53 +0000934
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000935def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000936 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000937 t = parsedate_tz(data)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000938 if t is None:
939 return t
940 return t[:9]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000941
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000942
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000943def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000944 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000945 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000946 # No zone info, so localtime is better assumption than GMT
947 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000948 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000949 t = time.mktime(data[:8] + (0,))
950 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000951
Guido van Rossum247a78a1999-04-19 18:04:38 +0000952def formatdate(timeval=None):
953 """Returns time format preferred for Internet standards.
954
955 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000956
957 According to RFC 1123, day and month names must always be in
958 English. If not for that, this code could use strftime(). It
959 can't because strftime() honors the locale and could generated
960 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000961 """
962 if timeval is None:
963 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000964 timeval = time.gmtime(timeval)
965 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000966 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000967 timeval[2],
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000968 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
969 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000970 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000971
Guido van Rossumb6775db1994-08-01 11:34:53 +0000972
973# When used as script, run a small test program.
974# The first command line argument must be a filename containing one
975# message in RFC-822 format.
976
977if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000978 import sys, os
979 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
980 if sys.argv[1:]: file = sys.argv[1]
981 f = open(file, 'r')
982 m = Message(f)
983 print 'From:', m.getaddr('from')
984 print 'To:', m.getaddrlist('to')
985 print 'Subject:', m.getheader('subject')
986 print 'Date:', m.getheader('date')
987 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000988 tz = date[-1]
989 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000990 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000991 print 'ParsedDate:', time.asctime(date),
992 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000993 hhmm, ss = divmod(hhmmss, 60)
994 hh, mm = divmod(hhmm, 60)
995 print "%+03d%02d" % (hh, mm),
996 if ss: print ".%02d" % ss,
997 print
998 else:
999 print 'ParsedDate:', None
1000 m.rewindbody()
1001 n = 0
1002 while f.readline():
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +00001003 n += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001004 print 'Lines:', n
1005 print '-'*70
1006 print 'len =', len(m)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001007 if 'Date' in m: print 'Date =', m['Date']
1008 if 'X-Nonsense' in m: pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001009 print 'keys =', m.keys()
1010 print 'values =', m.values()
1011 print 'items =', m.items()