blob: c4b7d373b0cff096ad28bcb1cd9a8247f3a64f3b [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Skip Montanaro0de65802001-02-15 22:15:14 +000076__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000077
Guido van Rossum9ab94c11997-12-10 16:17:39 +000078_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000079
80
Guido van Rossum01ca3361992-07-13 14:28:59 +000081class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000082 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000083
Guido van Rossum9ab94c11997-12-10 16:17:39 +000084 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000086 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000091 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000092 seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +000093 self.fp = fp
94 self.seekable = seekable
95 self.startofheaders = None
96 self.startofbody = None
97 #
98 if self.seekable:
99 try:
100 self.startofheaders = self.fp.tell()
101 except IOError:
102 self.seekable = 0
103 #
104 self.readheaders()
105 #
106 if self.seekable:
107 try:
108 self.startofbody = self.fp.tell()
109 except IOError:
110 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000111
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000112 def rewindbody(self):
113 """Rewind the file to the start of the body (if seekable)."""
114 if not self.seekable:
115 raise IOError, "unseekable file"
116 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000117
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000118 def readheaders(self):
119 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000120
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000121 Read header lines up to the entirely blank line that terminates them.
122 The (normally blank) line that ends the headers is skipped, but not
123 included in the returned list. If a non-header line ends the headers,
124 (which is an error), an attempt is made to backspace over it; it is
125 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000126
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000127 The variable self.status is set to the empty string if all went well,
128 otherwise it is an error message. The variable self.headers is a
129 completely uninterpreted list of lines contained in the header (so
130 printing them will reproduce the header exactly as it appears in the
131 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000132 """
133 self.dict = {}
134 self.unixfrom = ''
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000135 self.headers = lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000136 self.status = ''
137 headerseen = ""
138 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000139 startofline = unread = tell = None
140 if hasattr(self.fp, 'unread'):
141 unread = self.fp.unread
142 elif self.seekable:
143 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000144 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000145 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000146 try:
147 startofline = tell()
148 except IOError:
149 startofline = tell = None
150 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000151 line = self.fp.readline()
152 if not line:
153 self.status = 'EOF in headers'
154 break
155 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000156 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000157 self.unixfrom = self.unixfrom + line
158 continue
159 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000160 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000161 # It's a continuation line.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000162 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000163 x = (self.dict[headerseen] + "\n " + line.strip())
164 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000165 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000166 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000167 # It's a comment. Ignore it.
168 continue
169 elif self.islast(line):
170 # Note! No pushback here! The delimiter line gets eaten.
171 break
172 headerseen = self.isheader(line)
173 if headerseen:
174 # It's a legal header line, save it.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000175 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000176 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000177 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000178 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000179 # It's not a header line; throw it back and stop here.
180 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000181 self.status = 'No headers'
182 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000183 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000184 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000185 if unread:
186 unread(line)
187 elif tell:
188 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000189 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000190 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000191 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000192
193 def isheader(self, line):
194 """Determine whether a given line is a legal header.
195
196 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000197 You may override this method in order to use Message parsing on tagged
198 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000199 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000200 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000201 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000202 return line[:i].lower()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000203 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000204
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000205 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000206 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000207
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000208 You may override this method if your application wants to bend the
209 rules, e.g. to strip trailing whitespace, or to recognize MH template
210 separators ('--------'). For convenience (e.g. for code reading from
211 sockets) a line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000212 """
213 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000214
215 def iscomment(self, line):
216 """Determine whether a line should be skipped entirely.
217
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000218 You may override this method in order to use Message parsing on tagged
219 data in RFC 2822-like formats that support embedded comments or
220 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000221 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000222 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000223
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000224 def getallmatchingheaders(self, name):
225 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000226
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000227 Look through the list of headers and find all lines matching a given
228 header name (and their continuation lines). A list of the lines is
229 returned, without interpretation. If the header does not occur, an
230 empty list is returned. If the header occurs multiple times, all
231 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000232 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000233 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000234 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000235 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000236 hit = 0
237 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000238 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000239 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000240 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000241 hit = 0
242 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000243 lst.append(line)
244 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000245
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000246 def getfirstmatchingheader(self, name):
247 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000248
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000249 This is similar to getallmatchingheaders, but it returns only the
250 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000251 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000252 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000253 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000254 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 hit = 0
256 for line in self.headers:
257 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000258 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000259 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000260 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000261 hit = 1
262 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000263 lst.append(line)
264 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000265
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000266 def getrawheader(self, name):
267 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000268
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000269 Return a string containing the literal text of the header but with the
270 keyword stripped. All leading, trailing and embedded whitespace is
271 kept in the string, however. Return None if the header does not
272 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000273 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000274
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000275 lst = self.getfirstmatchingheader(name)
276 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277 return None
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000278 lst[0] = lst[0][len(name) + 1:]
279 return ''.join(lst)
Tim Peters0c9886d2001-01-15 01:18:21 +0000280
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000281 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000282 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000283
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000284 This is the normal interface: it returns a stripped version of the
285 header value for a given header name, or None if it doesn't exist.
286 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000287 """
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000288 return self.dict.get(name.lower(), default)
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000289 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000290
291 def getheaders(self, name):
292 """Get all values for a header.
293
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000294 This returns a list of values for headers given more than once; each
295 value in the result list is stripped in the same way as the result of
296 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000297 """
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000302 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000303 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000304 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000305 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000306 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000307 else:
308 if have_header:
309 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 have_header = 1
312 if have_header:
313 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000314 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000315
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000318
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
321 """
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000328
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000331
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000335 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000336 raw = []
337 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000343 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000347 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000348 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000349 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000350
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000351 def getdate(self, name):
352 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000353
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000354 Retrieves a date field from the named header, returning a tuple
355 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000356 """
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000362
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000365
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000366 The first 9 elements make up a tuple compatible with time.mktime(),
367 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000368 """
369 try:
370 data = self[name]
371 except KeyError:
372 return None
373 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000374
375
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000376 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000377
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 def __len__(self):
379 """Get the number of headers in a message."""
380 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000381
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000382 def __getitem__(self, name):
383 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000384 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000385
386 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000387 """Set the value of a header.
388
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000389 Note: This is not a perfect inversion of __getitem__, because any
390 changed headers get stuck at the end of the raw-headers list rather
391 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000392 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000393 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000394 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000395 text = name + ": " + value
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000396 for line in text.split("\n"):
397 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000398
Guido van Rossum75d92c11998-04-02 21:33:20 +0000399 def __delitem__(self, name):
400 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000401 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000402 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000403 return
404 del self.dict[name]
405 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000406 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000407 lst = []
Guido van Rossum75d92c11998-04-02 21:33:20 +0000408 hit = 0
409 for i in range(len(self.headers)):
410 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000411 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000412 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000413 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000414 hit = 0
415 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000416 lst.append(i)
417 for i in reversed(lst):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000418 del self.headers[i]
419
Fred Drake233226e2001-05-22 19:36:50 +0000420 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000421 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000422 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000423 return self.dict[lowername]
424 else:
Fred Drake233226e2001-05-22 19:36:50 +0000425 text = name + ": " + default
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000426 for line in text.split("\n"):
427 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000428 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000429 return default
430
Raymond Hettinger54f02222002-06-01 14:18:47 +0000431 def __contains__(self, name):
432 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000433 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000434
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000435 def __iter__(self):
436 return iter(self.dict)
437
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000438 def keys(self):
439 """Get all of a message's header field names."""
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000440 return list(self.dict.keys())
Tim Peters0c9886d2001-01-15 01:18:21 +0000441
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000442 def values(self):
443 """Get all of a message's header field values."""
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000444 return list(self.dict.values())
Tim Peters0c9886d2001-01-15 01:18:21 +0000445
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000446 def items(self):
447 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000448
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000449 Returns a list of name, value tuples.
450 """
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000451 return list(self.dict.items())
Guido van Rossum01ca3361992-07-13 14:28:59 +0000452
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000453 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000454 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000455
456
457# Utility functions
458# -----------------
459
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000460# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000461# XXX The inverses of the parse functions may also be useful.
462
Guido van Rossum01ca3361992-07-13 14:28:59 +0000463
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000464def unquote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000465 """Remove quotes from a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000466 if len(s) > 1:
467 if s.startswith('"') and s.endswith('"'):
468 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
469 if s.startswith('<') and s.endswith('>'):
470 return s[1:-1]
471 return s
Guido van Rossumb6775db1994-08-01 11:34:53 +0000472
473
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000474def quote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000475 """Add quotes around a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000476 return s.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000477
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000478
Guido van Rossumb6775db1994-08-01 11:34:53 +0000479def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000480 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000481 a = AddressList(address)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000482 lst = a.addresslist
483 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000484 return (None, None)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000485 return lst[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000486
487
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000488class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000489 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000490
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000491 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000492 RFC 2822 in front of you.
493
494 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000495
496 Note: this class interface is deprecated and may be removed in the future.
497 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000498 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000499
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000500 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000501 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000502
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000503 `field' is an unparsed address header field, containing one or more
504 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000505 """
506 self.specials = '()<>@,:;.\"[]'
507 self.pos = 0
508 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000509 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000510 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000511 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
512 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
513 # syntax, so allow dots in phrases.
514 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000515 self.field = field
516 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000517
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000518 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000519 """Parse up to the start of the next address."""
520 while self.pos < len(self.field):
521 if self.field[self.pos] in self.LWS + '\n\r':
522 self.pos = self.pos + 1
523 elif self.field[self.pos] == '(':
524 self.commentlist.append(self.getcomment())
525 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000526
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000527 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000528 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000529
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000530 Returns a list containing all of the addresses.
531 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000532 result = []
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000533 ad = self.getaddress()
534 while ad:
535 result += ad
Barry Warsawf1fd2822001-11-13 21:30:37 +0000536 ad = self.getaddress()
Barry Warsawf1fd2822001-11-13 21:30:37 +0000537 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000538
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000539 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000540 """Parse the next address."""
541 self.commentlist = []
542 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000543
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000544 oldpos = self.pos
545 oldcl = self.commentlist
546 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000547
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000548 self.gotonext()
549 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000550
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000551 if self.pos >= len(self.field):
552 # Bad email address technically, no domain.
553 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000554 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000555
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000556 elif self.field[self.pos] in '.@':
557 # email address is just an addrspec
558 # this isn't very efficient since we start over
559 self.pos = oldpos
560 self.commentlist = oldcl
561 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000562 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000563
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000564 elif self.field[self.pos] == ':':
565 # address is a group
566 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000567
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000568 fieldlen = len(self.field)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000569 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000570 while self.pos < len(self.field):
571 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000572 if self.pos < fieldlen and self.field[self.pos] == ';':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000573 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000574 break
575 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000576
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000577 elif self.field[self.pos] == '<':
578 # Address is a phrase then a route addr
579 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000580
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000581 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000582 returnlist = [(' '.join(plist) + ' (' + \
583 ' '.join(self.commentlist) + ')', routeaddr)]
584 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000585
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000586 else:
587 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000588 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000589 elif self.field[self.pos] in self.specials:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000590 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000591
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000592 self.gotonext()
593 if self.pos < len(self.field) and self.field[self.pos] == ',':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000594 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000595 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000596
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000597 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000598 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000599
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000600 This method just skips all the route stuff and returns the addrspec.
601 """
602 if self.field[self.pos] != '<':
603 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000604
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000605 expectroute = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000606 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000607 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000608 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000609 while self.pos < len(self.field):
610 if expectroute:
611 self.getdomain()
612 expectroute = 0
613 elif self.field[self.pos] == '>':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000614 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000615 break
616 elif self.field[self.pos] == '@':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000617 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000618 expectroute = 1
619 elif self.field[self.pos] == ':':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000620 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000621 else:
622 adlist = self.getaddrspec()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000623 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000624 break
625 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000626
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000627 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000628
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000629 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000630 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000631 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000632
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000633 self.gotonext()
634 while self.pos < len(self.field):
635 if self.field[self.pos] == '.':
636 aslist.append('.')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000637 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000638 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000639 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000640 elif self.field[self.pos] in self.atomends:
641 break
642 else: aslist.append(self.getatom())
643 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000644
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000645 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000646 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000647
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000648 aslist.append('@')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000649 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000650 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000651 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000652
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000653 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000654 """Get the complete domain name from an address."""
655 sdlist = []
656 while self.pos < len(self.field):
657 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000658 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000659 elif self.field[self.pos] == '(':
660 self.commentlist.append(self.getcomment())
661 elif self.field[self.pos] == '[':
662 sdlist.append(self.getdomainliteral())
663 elif self.field[self.pos] == '.':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000664 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000665 sdlist.append('.')
666 elif self.field[self.pos] in self.atomends:
667 break
668 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000669 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000670
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000671 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000672 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000673
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000674 `beginchar' is the start character for the fragment. If self is not
675 looking at an instance of `beginchar' then getdelimited returns the
676 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000677
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000678 `endchars' is a sequence of allowable end-delimiting characters.
679 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000680
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000681 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
682 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000683 """
684 if self.field[self.pos] != beginchar:
685 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000686
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000687 slist = ['']
688 quote = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000689 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000690 while self.pos < len(self.field):
691 if quote == 1:
692 slist.append(self.field[self.pos])
693 quote = 0
694 elif self.field[self.pos] in endchars:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000695 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000696 break
697 elif allowcomments and self.field[self.pos] == '(':
698 slist.append(self.getcomment())
Thomas Wouters477c8d52006-05-27 19:21:47 +0000699 continue # have already advanced pos from getcomment
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000700 elif self.field[self.pos] == '\\':
701 quote = 1
702 else:
703 slist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000704 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000705
Guido van Rossumc80f1822000-12-15 15:37:48 +0000706 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000707
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000708 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000709 """Get a quote-delimited fragment from self's field."""
710 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000711
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000712 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000713 """Get a parenthesis-delimited fragment from self's field."""
714 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000715
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000716 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000717 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000718 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000719
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000720 def getatom(self, atomends=None):
721 """Parse an RFC 2822 atom.
722
723 Optional atomends specifies a different set of end token delimiters
724 (the default is to use self.atomends). This is used e.g. in
725 getphraselist() since phrase endings must not include the `.' (which
726 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000727 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000728 if atomends is None:
729 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000730
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000731 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000732 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000733 break
734 else: atomlist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000735 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000736
Guido van Rossumc80f1822000-12-15 15:37:48 +0000737 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000738
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000739 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000740 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000741
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000742 A phrase is a sequence of words, which are in turn either RFC 2822
743 atoms or quoted-strings. Phrases are canonicalized by squeezing all
744 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000745 """
746 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000747
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000748 while self.pos < len(self.field):
749 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000750 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000751 elif self.field[self.pos] == '"':
752 plist.append(self.getquote())
753 elif self.field[self.pos] == '(':
754 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000755 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000756 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000757 else:
758 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000759
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000760 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000761
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000762class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000763 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000764 def __init__(self, field):
765 AddrlistClass.__init__(self, field)
766 if field:
767 self.addresslist = self.getaddrlist()
768 else:
769 self.addresslist = []
770
771 def __len__(self):
772 return len(self.addresslist)
773
774 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000775 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000776
777 def __add__(self, other):
778 # Set union
779 newaddr = AddressList(None)
780 newaddr.addresslist = self.addresslist[:]
781 for x in other.addresslist:
782 if not x in self.addresslist:
783 newaddr.addresslist.append(x)
784 return newaddr
785
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000786 def __iadd__(self, other):
787 # Set union, in-place
788 for x in other.addresslist:
789 if not x in self.addresslist:
790 self.addresslist.append(x)
791 return self
792
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000793 def __sub__(self, other):
794 # Set difference
795 newaddr = AddressList(None)
796 for x in self.addresslist:
797 if not x in other.addresslist:
798 newaddr.addresslist.append(x)
799 return newaddr
800
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000801 def __isub__(self, other):
802 # Set difference, in-place
803 for x in other.addresslist:
804 if x in self.addresslist:
805 self.addresslist.remove(x)
806 return self
807
Guido van Rossum81d10b41998-06-16 22:29:03 +0000808 def __getitem__(self, index):
809 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000810 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000811
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000812def dump_address_pair(pair):
813 """Dump a (name, address) pair in a canonicalized form."""
814 if pair[0]:
815 return '"' + pair[0] + '" <' + pair[1] + '>'
816 else:
817 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000818
819# Parse a date field
820
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000821_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
822 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000823 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000824 'august', 'september', 'october', 'november', 'december']
825_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000826
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000827# The timezone table does not include the military time zones defined
828# in RFC822, other than Z. According to RFC1123, the description in
829# RFC822 gets the signs wrong, so we can't rely on any such time
830# zones. RFC1123 recommends that numeric timezone indicators be used
831# instead of timezone names.
832
Tim Peters0c9886d2001-01-15 01:18:21 +0000833_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000834 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000835 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000836 'CST': -600, 'CDT': -500, # Central
837 'MST': -700, 'MDT': -600, # Mountain
838 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000839 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000840
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000841
842def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000843 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000844
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000845 Accounts for military timezones.
846 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000847 if not data:
848 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000849 data = data.split()
850 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000851 # There's a dayname here. Skip it
852 del data[0]
Thomas Woutersb2137042007-02-01 18:02:27 +0000853 else:
854 # no space after the "weekday,"?
855 i = data[0].rfind(',')
856 if i >= 0:
857 data[0] = data[0][i+1:]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000858 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000859 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000860 if len(stuff) == 3:
861 data = stuff + data[1:]
862 if len(data) == 4:
863 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000864 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000865 if i > 0:
866 data[3:] = [s[:i], s[i+1:]]
867 else:
868 data.append('') # Dummy tz
869 if len(data) < 5:
870 return None
871 data = data[:5]
872 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000873 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000874 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000875 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000876 if not mm in _monthnames:
877 return None
878 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000879 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000880 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000881 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000882 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000883 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000884 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000885 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000886 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000887 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000888 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000889 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000890 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000891 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000892 if len(tm) == 2:
893 [thh, tmm] = tm
894 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000895 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000896 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000897 else:
898 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000899 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000900 yy = int(yy)
901 dd = int(dd)
902 thh = int(thh)
903 tmm = int(tmm)
904 tss = int(tss)
905 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000906 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000907 tzoffset = None
908 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000909 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000910 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000911 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000912 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000913 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000914 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000915 pass
916 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000917 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000918 if tzoffset < 0:
919 tzsign = -1
920 tzoffset = -tzoffset
921 else:
922 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000923 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000924 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000925
Guido van Rossumb6775db1994-08-01 11:34:53 +0000926
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000927def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000928 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000929 t = parsedate_tz(data)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000930 if t is None:
931 return t
932 return t[:9]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000933
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000934
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000935def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000936 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000937 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000938 # No zone info, so localtime is better assumption than GMT
939 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000940 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000941 t = time.mktime(data[:8] + (0,))
942 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000943
Guido van Rossum247a78a1999-04-19 18:04:38 +0000944def formatdate(timeval=None):
945 """Returns time format preferred for Internet standards.
946
947 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000948
949 According to RFC 1123, day and month names must always be in
950 English. If not for that, this code could use strftime(). It
951 can't because strftime() honors the locale and could generated
952 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000953 """
954 if timeval is None:
955 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000956 timeval = time.gmtime(timeval)
957 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000958 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000959 timeval[2],
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000960 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
961 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000962 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000963
Guido van Rossumb6775db1994-08-01 11:34:53 +0000964
965# When used as script, run a small test program.
966# The first command line argument must be a filename containing one
967# message in RFC-822 format.
968
969if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000970 import sys, os
971 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
972 if sys.argv[1:]: file = sys.argv[1]
973 f = open(file, 'r')
974 m = Message(f)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000975 print('From:', m.getaddr('from'))
976 print('To:', m.getaddrlist('to'))
977 print('Subject:', m.getheader('subject'))
978 print('Date:', m.getheader('date'))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000979 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000980 tz = date[-1]
981 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000982 if date:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000983 print('ParsedDate:', time.asctime(date), end=' ')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000984 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000985 hhmm, ss = divmod(hhmmss, 60)
986 hh, mm = divmod(hhmm, 60)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000987 print("%+03d%02d" % (hh, mm), end=' ')
988 if ss: print(".%02d" % ss, end=' ')
989 print()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000990 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000991 print('ParsedDate:', None)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000992 m.rewindbody()
993 n = 0
994 while f.readline():
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000995 n += 1
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000996 print('Lines:', n)
997 print('-'*70)
998 print('len =', len(m))
999 if 'Date' in m: print('Date =', m['Date'])
Raymond Hettinger54f02222002-06-01 14:18:47 +00001000 if 'X-Nonsense' in m: pass
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001001 print('keys =', m.keys())
1002 print('values =', m.values())
1003 print('items =', m.items())