blob: 9a5b8a23f2c7a1e5129c64126cc2f6dbb51ac508 [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
Ezio Melotti1e87da12011-10-19 10:39:35 +030037an unseekable object such as a file object created from a socket object. If
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000038it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Benjamin Peterson26305a02008-06-12 22:33:06 +000076from warnings import warnpy3k
Benjamin Petersona6864e02008-07-14 17:42:17 +000077warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
78 stacklevel=2)
Benjamin Peterson26305a02008-06-12 22:33:06 +000079
Skip Montanaro0de65802001-02-15 22:15:14 +000080__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000081
Guido van Rossum9ab94c11997-12-10 16:17:39 +000082_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000083
84
Guido van Rossum01ca3361992-07-13 14:28:59 +000085class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000086 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000087
Guido van Rossum9ab94c11997-12-10 16:17:39 +000088 def __init__(self, fp, seekable = 1):
89 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000090 if seekable == 1:
91 # Exercise tell() to make sure it works
92 # (and then assume seek() works, too)
93 try:
94 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000095 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000096 seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +000097 self.fp = fp
98 self.seekable = seekable
99 self.startofheaders = None
100 self.startofbody = None
101 #
102 if self.seekable:
103 try:
104 self.startofheaders = self.fp.tell()
105 except IOError:
106 self.seekable = 0
107 #
108 self.readheaders()
109 #
110 if self.seekable:
111 try:
112 self.startofbody = self.fp.tell()
113 except IOError:
114 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000115
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000116 def rewindbody(self):
117 """Rewind the file to the start of the body (if seekable)."""
118 if not self.seekable:
119 raise IOError, "unseekable file"
120 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000121
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000122 def readheaders(self):
123 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000124
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000125 Read header lines up to the entirely blank line that terminates them.
126 The (normally blank) line that ends the headers is skipped, but not
127 included in the returned list. If a non-header line ends the headers,
128 (which is an error), an attempt is made to backspace over it; it is
129 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000130
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000131 The variable self.status is set to the empty string if all went well,
132 otherwise it is an error message. The variable self.headers is a
133 completely uninterpreted list of lines contained in the header (so
134 printing them will reproduce the header exactly as it appears in the
135 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000136 """
137 self.dict = {}
138 self.unixfrom = ''
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000139 self.headers = lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000140 self.status = ''
141 headerseen = ""
142 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000143 startofline = unread = tell = None
144 if hasattr(self.fp, 'unread'):
145 unread = self.fp.unread
146 elif self.seekable:
147 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000148 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000149 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000150 try:
151 startofline = tell()
152 except IOError:
153 startofline = tell = None
154 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000155 line = self.fp.readline()
156 if not line:
157 self.status = 'EOF in headers'
158 break
159 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000160 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000161 self.unixfrom = self.unixfrom + line
162 continue
163 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000164 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000165 # It's a continuation line.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000166 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000167 x = (self.dict[headerseen] + "\n " + line.strip())
168 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000169 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000170 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000171 # It's a comment. Ignore it.
172 continue
173 elif self.islast(line):
174 # Note! No pushback here! The delimiter line gets eaten.
175 break
176 headerseen = self.isheader(line)
177 if headerseen:
178 # It's a legal header line, save it.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000179 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000180 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000181 continue
Benjamin Petersonbfd976f2015-01-25 23:34:42 -0500182 elif headerseen is not None:
183 # An empty header name. These aren't allowed in HTTP, but it's
184 # probably a benign mistake. Don't add the header, just keep
185 # going.
186 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000187 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000188 # It's not a header line; throw it back and stop here.
189 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000190 self.status = 'No headers'
191 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000192 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000193 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000194 if unread:
195 unread(line)
196 elif tell:
197 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000198 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000199 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000200 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000201
202 def isheader(self, line):
203 """Determine whether a given line is a legal header.
204
205 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000206 You may override this method in order to use Message parsing on tagged
207 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000208 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000209 i = line.find(':')
Benjamin Petersonbfd976f2015-01-25 23:34:42 -0500210 if i > -1:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000211 return line[:i].lower()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000212 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000213
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000214 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000215 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000216
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000217 You may override this method if your application wants to bend the
218 rules, e.g. to strip trailing whitespace, or to recognize MH template
219 separators ('--------'). For convenience (e.g. for code reading from
Ezio Melotti003014b2012-09-21 16:27:45 +0300220 sockets) a line consisting of \\r\\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000221 """
222 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000223
224 def iscomment(self, line):
225 """Determine whether a line should be skipped entirely.
226
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000227 You may override this method in order to use Message parsing on tagged
228 data in RFC 2822-like formats that support embedded comments or
229 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000230 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000231 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000232
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000233 def getallmatchingheaders(self, name):
234 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000235
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000236 Look through the list of headers and find all lines matching a given
237 header name (and their continuation lines). A list of the lines is
238 returned, without interpretation. If the header does not occur, an
239 empty list is returned. If the header occurs multiple times, all
240 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000241 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000242 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000243 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000244 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000245 hit = 0
246 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000247 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000248 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000249 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000250 hit = 0
251 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000252 lst.append(line)
253 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000254
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 def getfirstmatchingheader(self, name):
256 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000257
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000258 This is similar to getallmatchingheaders, but it returns only the
259 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000260 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000261 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000262 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000263 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000264 hit = 0
265 for line in self.headers:
266 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000267 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000268 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000269 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000270 hit = 1
271 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000272 lst.append(line)
273 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000274
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000275 def getrawheader(self, name):
276 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000277
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000278 Return a string containing the literal text of the header but with the
279 keyword stripped. All leading, trailing and embedded whitespace is
280 kept in the string, however. Return None if the header does not
281 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000282 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000283
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000284 lst = self.getfirstmatchingheader(name)
285 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000286 return None
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000287 lst[0] = lst[0][len(name) + 1:]
288 return ''.join(lst)
Tim Peters0c9886d2001-01-15 01:18:21 +0000289
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000290 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000291 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000292
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000293 This is the normal interface: it returns a stripped version of the
294 header value for a given header name, or None if it doesn't exist.
295 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000296 """
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000297 return self.dict.get(name.lower(), default)
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000298 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000299
300 def getheaders(self, name):
301 """Get all values for a header.
302
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000303 This returns a list of values for headers given more than once; each
304 value in the result list is stripped in the same way as the result of
305 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000306 """
307 result = []
308 current = ''
309 have_header = 0
310 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000311 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000312 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000313 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000314 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000315 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000316 else:
317 if have_header:
318 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000319 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000320 have_header = 1
321 if have_header:
322 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000323 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000324
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000325 def getaddr(self, name):
326 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000327
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000328 An example return value:
329 ('Guido van Rossum', 'guido@cwi.nl')
330 """
331 # New, by Ben Escoto
332 alist = self.getaddrlist(name)
333 if alist:
334 return alist[0]
335 else:
336 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000337
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000338 def getaddrlist(self, name):
339 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000340
341 Retrieves a list of addresses from a header, where each address is a
342 tuple as returned by getaddr(). Scans all named headers, so it works
343 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000344 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000345 raw = []
346 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000347 if h[0] in ' \t':
348 raw.append(h)
349 else:
350 if raw:
351 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000352 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000353 if i > 0:
354 addr = h[i+1:]
355 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000356 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000357 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000358 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000359
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000360 def getdate(self, name):
361 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000362
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000363 Retrieves a date field from the named header, returning a tuple
364 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000365 """
366 try:
367 data = self[name]
368 except KeyError:
369 return None
370 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000371
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000372 def getdate_tz(self, name):
373 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000374
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000375 The first 9 elements make up a tuple compatible with time.mktime(),
376 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000377 """
378 try:
379 data = self[name]
380 except KeyError:
381 return None
382 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000383
384
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000385 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000386
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000387 def __len__(self):
388 """Get the number of headers in a message."""
389 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000390
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000391 def __getitem__(self, name):
392 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000393 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000394
395 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000396 """Set the value of a header.
397
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000398 Note: This is not a perfect inversion of __getitem__, because any
399 changed headers get stuck at the end of the raw-headers list rather
400 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000401 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000402 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000403 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000404 text = name + ": " + value
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000405 for line in text.split("\n"):
406 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000407
Guido van Rossum75d92c11998-04-02 21:33:20 +0000408 def __delitem__(self, name):
409 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000410 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000411 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000412 return
413 del self.dict[name]
414 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000415 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000416 lst = []
Guido van Rossum75d92c11998-04-02 21:33:20 +0000417 hit = 0
418 for i in range(len(self.headers)):
419 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000420 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000421 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000422 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000423 hit = 0
424 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000425 lst.append(i)
426 for i in reversed(lst):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000427 del self.headers[i]
428
Fred Drake233226e2001-05-22 19:36:50 +0000429 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000430 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000431 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000432 return self.dict[lowername]
433 else:
Fred Drake233226e2001-05-22 19:36:50 +0000434 text = name + ": " + default
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000435 for line in text.split("\n"):
436 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000437 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000438 return default
439
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000440 def has_key(self, name):
441 """Determine whether a message contains the named header."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000442 return name.lower() in self.dict
443
444 def __contains__(self, name):
445 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000446 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000447
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000448 def __iter__(self):
449 return iter(self.dict)
450
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000451 def keys(self):
452 """Get all of a message's header field names."""
453 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000454
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000455 def values(self):
456 """Get all of a message's header field values."""
457 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000458
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000459 def items(self):
460 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000461
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000462 Returns a list of name, value tuples.
463 """
464 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000465
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000466 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000467 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000468
469
470# Utility functions
471# -----------------
472
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000473# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000474# XXX The inverses of the parse functions may also be useful.
475
Guido van Rossum01ca3361992-07-13 14:28:59 +0000476
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000477def unquote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000478 """Remove quotes from a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000479 if len(s) > 1:
480 if s.startswith('"') and s.endswith('"'):
481 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
482 if s.startswith('<') and s.endswith('>'):
483 return s[1:-1]
484 return s
Guido van Rossumb6775db1994-08-01 11:34:53 +0000485
486
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000487def quote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000488 """Add quotes around a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000489 return s.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000490
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000491
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000493 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000494 a = AddressList(address)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000495 lst = a.addresslist
496 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000497 return (None, None)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000498 return lst[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000499
500
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000501class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000502 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000503
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000504 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000505 RFC 2822 in front of you.
506
507 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000508
509 Note: this class interface is deprecated and may be removed in the future.
510 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000511 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000512
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000513 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000514 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000515
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000516 `field' is an unparsed address header field, containing one or more
517 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000518 """
519 self.specials = '()<>@,:;.\"[]'
520 self.pos = 0
521 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000522 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000523 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000524 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
525 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
526 # syntax, so allow dots in phrases.
527 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000528 self.field = field
529 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000530
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000531 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000532 """Parse up to the start of the next address."""
533 while self.pos < len(self.field):
534 if self.field[self.pos] in self.LWS + '\n\r':
535 self.pos = self.pos + 1
536 elif self.field[self.pos] == '(':
537 self.commentlist.append(self.getcomment())
538 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000539
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000540 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000541 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000542
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000543 Returns a list containing all of the addresses.
544 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000545 result = []
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000546 ad = self.getaddress()
547 while ad:
548 result += ad
Barry Warsawf1fd2822001-11-13 21:30:37 +0000549 ad = self.getaddress()
Barry Warsawf1fd2822001-11-13 21:30:37 +0000550 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000551
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000552 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000553 """Parse the next address."""
554 self.commentlist = []
555 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000556
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000557 oldpos = self.pos
558 oldcl = self.commentlist
559 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000560
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000561 self.gotonext()
562 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000563
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000564 if self.pos >= len(self.field):
565 # Bad email address technically, no domain.
566 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000567 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000568
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000569 elif self.field[self.pos] in '.@':
570 # email address is just an addrspec
571 # this isn't very efficient since we start over
572 self.pos = oldpos
573 self.commentlist = oldcl
574 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000575 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000576
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000577 elif self.field[self.pos] == ':':
578 # address is a group
579 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000580
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000581 fieldlen = len(self.field)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000582 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000583 while self.pos < len(self.field):
584 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000585 if self.pos < fieldlen and self.field[self.pos] == ';':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000586 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000587 break
588 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000589
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000590 elif self.field[self.pos] == '<':
591 # Address is a phrase then a route addr
592 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000593
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000594 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000595 returnlist = [(' '.join(plist) + ' (' + \
596 ' '.join(self.commentlist) + ')', routeaddr)]
597 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000598
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000599 else:
600 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000601 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000602 elif self.field[self.pos] in self.specials:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000603 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000604
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000605 self.gotonext()
606 if self.pos < len(self.field) and self.field[self.pos] == ',':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000607 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000608 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000609
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000610 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000611 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000612
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000613 This method just skips all the route stuff and returns the addrspec.
614 """
615 if self.field[self.pos] != '<':
616 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000617
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000618 expectroute = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000619 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000620 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000621 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 while self.pos < len(self.field):
623 if expectroute:
624 self.getdomain()
625 expectroute = 0
626 elif self.field[self.pos] == '>':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000627 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000628 break
629 elif self.field[self.pos] == '@':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000630 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000631 expectroute = 1
632 elif self.field[self.pos] == ':':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000633 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000634 else:
635 adlist = self.getaddrspec()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000636 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000637 break
638 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000639
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000640 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000641
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000642 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000643 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000644 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000645
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000646 self.gotonext()
647 while self.pos < len(self.field):
648 if self.field[self.pos] == '.':
649 aslist.append('.')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000650 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000651 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000652 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000653 elif self.field[self.pos] in self.atomends:
654 break
655 else: aslist.append(self.getatom())
656 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000657
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000658 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000659 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000660
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000661 aslist.append('@')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000662 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000663 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000664 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000665
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000666 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000667 """Get the complete domain name from an address."""
668 sdlist = []
669 while self.pos < len(self.field):
670 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000671 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000672 elif self.field[self.pos] == '(':
673 self.commentlist.append(self.getcomment())
674 elif self.field[self.pos] == '[':
675 sdlist.append(self.getdomainliteral())
676 elif self.field[self.pos] == '.':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000677 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000678 sdlist.append('.')
679 elif self.field[self.pos] in self.atomends:
680 break
681 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000682 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000683
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000684 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000685 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000686
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000687 `beginchar' is the start character for the fragment. If self is not
688 looking at an instance of `beginchar' then getdelimited returns the
689 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000690
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000691 `endchars' is a sequence of allowable end-delimiting characters.
692 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000693
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000694 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
695 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000696 """
697 if self.field[self.pos] != beginchar:
698 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000699
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000700 slist = ['']
701 quote = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000702 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000703 while self.pos < len(self.field):
704 if quote == 1:
705 slist.append(self.field[self.pos])
706 quote = 0
707 elif self.field[self.pos] in endchars:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000708 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000709 break
710 elif allowcomments and self.field[self.pos] == '(':
711 slist.append(self.getcomment())
Barry Warsawdbcc8d92006-05-01 03:03:02 +0000712 continue # have already advanced pos from getcomment
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000713 elif self.field[self.pos] == '\\':
714 quote = 1
715 else:
716 slist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000717 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000718
Guido van Rossumc80f1822000-12-15 15:37:48 +0000719 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000720
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000721 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000722 """Get a quote-delimited fragment from self's field."""
723 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000724
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000725 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000726 """Get a parenthesis-delimited fragment from self's field."""
727 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000728
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000729 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000730 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000731 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000732
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000733 def getatom(self, atomends=None):
734 """Parse an RFC 2822 atom.
735
736 Optional atomends specifies a different set of end token delimiters
737 (the default is to use self.atomends). This is used e.g. in
738 getphraselist() since phrase endings must not include the `.' (which
739 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000740 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000741 if atomends is None:
742 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000743
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000744 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000745 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000746 break
747 else: atomlist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000748 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000749
Guido van Rossumc80f1822000-12-15 15:37:48 +0000750 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000751
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000752 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000753 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000754
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000755 A phrase is a sequence of words, which are in turn either RFC 2822
756 atoms or quoted-strings. Phrases are canonicalized by squeezing all
757 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000758 """
759 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000760
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000761 while self.pos < len(self.field):
762 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000763 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000764 elif self.field[self.pos] == '"':
765 plist.append(self.getquote())
766 elif self.field[self.pos] == '(':
767 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000768 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000769 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000770 else:
771 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000772
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000773 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000774
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000775class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000776 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000777 def __init__(self, field):
778 AddrlistClass.__init__(self, field)
779 if field:
780 self.addresslist = self.getaddrlist()
781 else:
782 self.addresslist = []
783
784 def __len__(self):
785 return len(self.addresslist)
786
787 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000788 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000789
790 def __add__(self, other):
791 # Set union
792 newaddr = AddressList(None)
793 newaddr.addresslist = self.addresslist[:]
794 for x in other.addresslist:
795 if not x in self.addresslist:
796 newaddr.addresslist.append(x)
797 return newaddr
798
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000799 def __iadd__(self, other):
800 # Set union, in-place
801 for x in other.addresslist:
802 if not x in self.addresslist:
803 self.addresslist.append(x)
804 return self
805
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000806 def __sub__(self, other):
807 # Set difference
808 newaddr = AddressList(None)
809 for x in self.addresslist:
810 if not x in other.addresslist:
811 newaddr.addresslist.append(x)
812 return newaddr
813
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000814 def __isub__(self, other):
815 # Set difference, in-place
816 for x in other.addresslist:
817 if x in self.addresslist:
818 self.addresslist.remove(x)
819 return self
820
Guido van Rossum81d10b41998-06-16 22:29:03 +0000821 def __getitem__(self, index):
822 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000823 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000824
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000825def dump_address_pair(pair):
826 """Dump a (name, address) pair in a canonicalized form."""
827 if pair[0]:
828 return '"' + pair[0] + '" <' + pair[1] + '>'
829 else:
830 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000831
832# Parse a date field
833
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000834_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
835 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000836 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000837 'august', 'september', 'october', 'november', 'december']
838_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000839
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000840# The timezone table does not include the military time zones defined
841# in RFC822, other than Z. According to RFC1123, the description in
842# RFC822 gets the signs wrong, so we can't rely on any such time
843# zones. RFC1123 recommends that numeric timezone indicators be used
844# instead of timezone names.
845
Tim Peters0c9886d2001-01-15 01:18:21 +0000846_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000847 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000848 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000849 'CST': -600, 'CDT': -500, # Central
850 'MST': -700, 'MDT': -600, # Mountain
851 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000852 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000853
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000854
855def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000856 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000857
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000858 Accounts for military timezones.
859 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000860 if not data:
861 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000862 data = data.split()
863 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000864 # There's a dayname here. Skip it
865 del data[0]
Georg Brandl62634952007-01-22 21:10:33 +0000866 else:
867 # no space after the "weekday,"?
868 i = data[0].rfind(',')
869 if i >= 0:
870 data[0] = data[0][i+1:]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000871 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000872 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000873 if len(stuff) == 3:
874 data = stuff + data[1:]
875 if len(data) == 4:
876 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000877 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000878 if i > 0:
879 data[3:] = [s[:i], s[i+1:]]
880 else:
881 data.append('') # Dummy tz
882 if len(data) < 5:
883 return None
884 data = data[:5]
885 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000886 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000887 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000888 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000889 if not mm in _monthnames:
890 return None
891 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000892 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000893 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000894 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000895 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000896 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000897 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000898 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000899 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000900 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000901 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000902 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000903 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000904 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000905 if len(tm) == 2:
906 [thh, tmm] = tm
907 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000908 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000909 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000910 else:
911 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000912 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000913 yy = int(yy)
914 dd = int(dd)
915 thh = int(thh)
916 tmm = int(tmm)
917 tss = int(tss)
918 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000919 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000920 tzoffset = None
921 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000922 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000923 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000924 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000925 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000926 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000927 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000928 pass
929 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000930 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000931 if tzoffset < 0:
932 tzsign = -1
933 tzoffset = -tzoffset
934 else:
935 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000936 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000937 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000938
Guido van Rossumb6775db1994-08-01 11:34:53 +0000939
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000940def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000941 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000942 t = parsedate_tz(data)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000943 if t is None:
944 return t
945 return t[:9]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000946
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000947
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000948def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000949 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000950 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000951 # No zone info, so localtime is better assumption than GMT
952 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000953 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000954 t = time.mktime(data[:8] + (0,))
955 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000956
Guido van Rossum247a78a1999-04-19 18:04:38 +0000957def formatdate(timeval=None):
958 """Returns time format preferred for Internet standards.
959
960 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000961
962 According to RFC 1123, day and month names must always be in
963 English. If not for that, this code could use strftime(). It
Martin Panterb44c4522016-05-29 08:13:58 +0000964 can't because strftime() honors the locale and could generate
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000965 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000966 """
967 if timeval is None:
968 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000969 timeval = time.gmtime(timeval)
970 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000971 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000972 timeval[2],
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000973 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
974 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000975 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000976
Guido van Rossumb6775db1994-08-01 11:34:53 +0000977
978# When used as script, run a small test program.
979# The first command line argument must be a filename containing one
980# message in RFC-822 format.
981
982if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000983 import sys, os
984 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
985 if sys.argv[1:]: file = sys.argv[1]
986 f = open(file, 'r')
987 m = Message(f)
988 print 'From:', m.getaddr('from')
989 print 'To:', m.getaddrlist('to')
990 print 'Subject:', m.getheader('subject')
991 print 'Date:', m.getheader('date')
992 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000993 tz = date[-1]
994 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000995 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000996 print 'ParsedDate:', time.asctime(date),
997 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000998 hhmm, ss = divmod(hhmmss, 60)
999 hh, mm = divmod(hhmm, 60)
1000 print "%+03d%02d" % (hh, mm),
1001 if ss: print ".%02d" % ss,
1002 print
1003 else:
1004 print 'ParsedDate:', None
1005 m.rewindbody()
1006 n = 0
1007 while f.readline():
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +00001008 n += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001009 print 'Lines:', n
1010 print '-'*70
1011 print 'len =', len(m)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001012 if 'Date' in m: print 'Date =', m['Date']
1013 if 'X-Nonsense' in m: pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001014 print 'keys =', m.keys()
1015 print 'values =', m.values()
1016 print 'items =', m.items()