blob: d6d5e4712972e83b14c0b069a60e9baf904d4c1c [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Skip Montanaro0de65802001-02-15 22:15:14 +000076__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000077
Guido van Rossum9ab94c11997-12-10 16:17:39 +000078_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000079
80
Guido van Rossum01ca3361992-07-13 14:28:59 +000081class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000082 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000083
Guido van Rossum9ab94c11997-12-10 16:17:39 +000084 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000086 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000091 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000092 seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +000093 self.fp = fp
94 self.seekable = seekable
95 self.startofheaders = None
96 self.startofbody = None
97 #
98 if self.seekable:
99 try:
100 self.startofheaders = self.fp.tell()
101 except IOError:
102 self.seekable = 0
103 #
104 self.readheaders()
105 #
106 if self.seekable:
107 try:
108 self.startofbody = self.fp.tell()
109 except IOError:
110 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000111
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000112 def rewindbody(self):
113 """Rewind the file to the start of the body (if seekable)."""
114 if not self.seekable:
115 raise IOError, "unseekable file"
116 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000117
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000118 def readheaders(self):
119 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000120
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000121 Read header lines up to the entirely blank line that terminates them.
122 The (normally blank) line that ends the headers is skipped, but not
123 included in the returned list. If a non-header line ends the headers,
124 (which is an error), an attempt is made to backspace over it; it is
125 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000126
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000127 The variable self.status is set to the empty string if all went well,
128 otherwise it is an error message. The variable self.headers is a
129 completely uninterpreted list of lines contained in the header (so
130 printing them will reproduce the header exactly as it appears in the
131 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000132 """
133 self.dict = {}
134 self.unixfrom = ''
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000135 self.headers = lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000136 self.status = ''
137 headerseen = ""
138 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000139 startofline = unread = tell = None
140 if hasattr(self.fp, 'unread'):
141 unread = self.fp.unread
142 elif self.seekable:
143 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000144 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000145 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000146 try:
147 startofline = tell()
148 except IOError:
149 startofline = tell = None
150 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000151 line = self.fp.readline()
152 if not line:
153 self.status = 'EOF in headers'
154 break
155 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000156 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000157 self.unixfrom = self.unixfrom + line
158 continue
159 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000160 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000161 # It's a continuation line.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000162 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000163 x = (self.dict[headerseen] + "\n " + line.strip())
164 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000165 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000166 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000167 # It's a comment. Ignore it.
168 continue
169 elif self.islast(line):
170 # Note! No pushback here! The delimiter line gets eaten.
171 break
172 headerseen = self.isheader(line)
173 if headerseen:
174 # It's a legal header line, save it.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000175 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000176 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000177 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000178 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000179 # It's not a header line; throw it back and stop here.
180 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000181 self.status = 'No headers'
182 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000183 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000184 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000185 if unread:
186 unread(line)
187 elif tell:
188 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000189 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000190 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000191 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000192
193 def isheader(self, line):
194 """Determine whether a given line is a legal header.
195
196 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000197 You may override this method in order to use Message parsing on tagged
198 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000199 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000200 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000201 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000202 return line[:i].lower()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000203 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000204
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000205 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000206 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000207
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000208 You may override this method if your application wants to bend the
209 rules, e.g. to strip trailing whitespace, or to recognize MH template
210 separators ('--------'). For convenience (e.g. for code reading from
211 sockets) a line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000212 """
213 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000214
215 def iscomment(self, line):
216 """Determine whether a line should be skipped entirely.
217
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000218 You may override this method in order to use Message parsing on tagged
219 data in RFC 2822-like formats that support embedded comments or
220 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000221 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000222 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000223
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000224 def getallmatchingheaders(self, name):
225 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000226
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000227 Look through the list of headers and find all lines matching a given
228 header name (and their continuation lines). A list of the lines is
229 returned, without interpretation. If the header does not occur, an
230 empty list is returned. If the header occurs multiple times, all
231 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000232 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000233 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000234 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000235 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000236 hit = 0
237 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000238 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000239 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000240 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000241 hit = 0
242 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000243 lst.append(line)
244 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000245
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000246 def getfirstmatchingheader(self, name):
247 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000248
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000249 This is similar to getallmatchingheaders, but it returns only the
250 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000251 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000252 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000253 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000254 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 hit = 0
256 for line in self.headers:
257 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000258 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000259 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000260 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000261 hit = 1
262 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000263 lst.append(line)
264 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000265
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000266 def getrawheader(self, name):
267 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000268
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000269 Return a string containing the literal text of the header but with the
270 keyword stripped. All leading, trailing and embedded whitespace is
271 kept in the string, however. Return None if the header does not
272 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000273 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000274
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000275 lst = self.getfirstmatchingheader(name)
276 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277 return None
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000278 lst[0] = lst[0][len(name) + 1:]
279 return ''.join(lst)
Tim Peters0c9886d2001-01-15 01:18:21 +0000280
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000281 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000282 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000283
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000284 This is the normal interface: it returns a stripped version of the
285 header value for a given header name, or None if it doesn't exist.
286 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000287 """
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000288 return self.dict.get(name.lower(), default)
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000289 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000290
291 def getheaders(self, name):
292 """Get all values for a header.
293
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000294 This returns a list of values for headers given more than once; each
295 value in the result list is stripped in the same way as the result of
296 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000297 """
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000302 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000303 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000304 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000305 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000306 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000307 else:
308 if have_header:
309 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 have_header = 1
312 if have_header:
313 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000314 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000315
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000318
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
321 """
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000328
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000331
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000335 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000336 raw = []
337 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000343 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000347 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000348 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000349 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000350
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000351 def getdate(self, name):
352 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000353
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000354 Retrieves a date field from the named header, returning a tuple
355 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000356 """
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000362
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000365
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000366 The first 9 elements make up a tuple compatible with time.mktime(),
367 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000368 """
369 try:
370 data = self[name]
371 except KeyError:
372 return None
373 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000374
375
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000376 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000377
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 def __len__(self):
379 """Get the number of headers in a message."""
380 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000381
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000382 def __getitem__(self, name):
383 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000384 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000385
386 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000387 """Set the value of a header.
388
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000389 Note: This is not a perfect inversion of __getitem__, because any
390 changed headers get stuck at the end of the raw-headers list rather
391 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000392 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000393 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000394 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000395 text = name + ": " + value
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000396 for line in text.split("\n"):
397 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000398
Guido van Rossum75d92c11998-04-02 21:33:20 +0000399 def __delitem__(self, name):
400 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000401 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000402 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000403 return
404 del self.dict[name]
405 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000406 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000407 lst = []
Guido van Rossum75d92c11998-04-02 21:33:20 +0000408 hit = 0
409 for i in range(len(self.headers)):
410 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000411 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000412 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000413 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000414 hit = 0
415 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000416 lst.append(i)
417 for i in reversed(lst):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000418 del self.headers[i]
419
Fred Drake233226e2001-05-22 19:36:50 +0000420 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000421 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000422 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000423 return self.dict[lowername]
424 else:
Fred Drake233226e2001-05-22 19:36:50 +0000425 text = name + ": " + default
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000426 for line in text.split("\n"):
427 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000428 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000429 return default
430
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000431 def has_key(self, name):
432 """Determine whether a message contains the named header."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000433 return name.lower() in self.dict
434
435 def __contains__(self, name):
436 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000437 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000438
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000439 def __iter__(self):
440 return iter(self.dict)
441
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000442 def keys(self):
443 """Get all of a message's header field names."""
444 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000445
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000446 def values(self):
447 """Get all of a message's header field values."""
448 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000449
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000450 def items(self):
451 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000452
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000453 Returns a list of name, value tuples.
454 """
455 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000456
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000457 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000458 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000459
460
461# Utility functions
462# -----------------
463
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000464# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000465# XXX The inverses of the parse functions may also be useful.
466
Guido van Rossum01ca3361992-07-13 14:28:59 +0000467
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000468def unquote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000469 """Remove quotes from a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000470 if len(s) > 1:
471 if s.startswith('"') and s.endswith('"'):
472 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
473 if s.startswith('<') and s.endswith('>'):
474 return s[1:-1]
475 return s
Guido van Rossumb6775db1994-08-01 11:34:53 +0000476
477
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000478def quote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000479 """Add quotes around a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000480 return s.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000481
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000482
Guido van Rossumb6775db1994-08-01 11:34:53 +0000483def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000484 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000485 a = AddressList(address)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000486 lst = a.addresslist
487 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000488 return (None, None)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000489 return lst[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000490
491
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000492class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000493 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000494
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000495 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000496 RFC 2822 in front of you.
497
498 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000499
500 Note: this class interface is deprecated and may be removed in the future.
501 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000502 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000503
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000504 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000505 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000506
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000507 `field' is an unparsed address header field, containing one or more
508 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000509 """
510 self.specials = '()<>@,:;.\"[]'
511 self.pos = 0
512 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000513 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000514 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000515 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
516 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
517 # syntax, so allow dots in phrases.
518 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000519 self.field = field
520 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000521
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000522 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000523 """Parse up to the start of the next address."""
524 while self.pos < len(self.field):
525 if self.field[self.pos] in self.LWS + '\n\r':
526 self.pos = self.pos + 1
527 elif self.field[self.pos] == '(':
528 self.commentlist.append(self.getcomment())
529 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000530
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000531 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000532 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000533
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000534 Returns a list containing all of the addresses.
535 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000536 result = []
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000537 ad = self.getaddress()
538 while ad:
539 result += ad
Barry Warsawf1fd2822001-11-13 21:30:37 +0000540 ad = self.getaddress()
Barry Warsawf1fd2822001-11-13 21:30:37 +0000541 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000542
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000543 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000544 """Parse the next address."""
545 self.commentlist = []
546 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000547
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000548 oldpos = self.pos
549 oldcl = self.commentlist
550 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000551
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000552 self.gotonext()
553 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000554
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000555 if self.pos >= len(self.field):
556 # Bad email address technically, no domain.
557 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000558 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000559
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000560 elif self.field[self.pos] in '.@':
561 # email address is just an addrspec
562 # this isn't very efficient since we start over
563 self.pos = oldpos
564 self.commentlist = oldcl
565 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000566 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000567
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000568 elif self.field[self.pos] == ':':
569 # address is a group
570 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000571
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000572 fieldlen = len(self.field)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000573 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000574 while self.pos < len(self.field):
575 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000576 if self.pos < fieldlen and self.field[self.pos] == ';':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000577 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000578 break
579 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000580
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000581 elif self.field[self.pos] == '<':
582 # Address is a phrase then a route addr
583 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000584
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000585 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000586 returnlist = [(' '.join(plist) + ' (' + \
587 ' '.join(self.commentlist) + ')', routeaddr)]
588 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000589
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000590 else:
591 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000592 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000593 elif self.field[self.pos] in self.specials:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000594 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000595
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000596 self.gotonext()
597 if self.pos < len(self.field) and self.field[self.pos] == ',':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000598 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000599 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000600
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000601 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000602 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000603
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000604 This method just skips all the route stuff and returns the addrspec.
605 """
606 if self.field[self.pos] != '<':
607 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000608
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000609 expectroute = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000610 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000611 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000612 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000613 while self.pos < len(self.field):
614 if expectroute:
615 self.getdomain()
616 expectroute = 0
617 elif self.field[self.pos] == '>':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000618 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000619 break
620 elif self.field[self.pos] == '@':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000621 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 expectroute = 1
623 elif self.field[self.pos] == ':':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000624 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000625 else:
626 adlist = self.getaddrspec()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000627 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000628 break
629 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000630
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000631 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000632
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000633 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000634 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000635 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000636
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000637 self.gotonext()
638 while self.pos < len(self.field):
639 if self.field[self.pos] == '.':
640 aslist.append('.')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000641 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000642 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000643 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000644 elif self.field[self.pos] in self.atomends:
645 break
646 else: aslist.append(self.getatom())
647 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000648
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000649 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000650 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000651
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000652 aslist.append('@')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000653 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000654 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000655 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000656
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000657 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000658 """Get the complete domain name from an address."""
659 sdlist = []
660 while self.pos < len(self.field):
661 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000662 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000663 elif self.field[self.pos] == '(':
664 self.commentlist.append(self.getcomment())
665 elif self.field[self.pos] == '[':
666 sdlist.append(self.getdomainliteral())
667 elif self.field[self.pos] == '.':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000668 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000669 sdlist.append('.')
670 elif self.field[self.pos] in self.atomends:
671 break
672 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000673 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000674
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000675 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000676 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000677
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000678 `beginchar' is the start character for the fragment. If self is not
679 looking at an instance of `beginchar' then getdelimited returns the
680 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000681
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000682 `endchars' is a sequence of allowable end-delimiting characters.
683 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000684
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000685 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
686 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000687 """
688 if self.field[self.pos] != beginchar:
689 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000690
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000691 slist = ['']
692 quote = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000693 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000694 while self.pos < len(self.field):
695 if quote == 1:
696 slist.append(self.field[self.pos])
697 quote = 0
698 elif self.field[self.pos] in endchars:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000699 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000700 break
701 elif allowcomments and self.field[self.pos] == '(':
702 slist.append(self.getcomment())
Barry Warsawdbcc8d92006-05-01 03:03:02 +0000703 continue # have already advanced pos from getcomment
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000704 elif self.field[self.pos] == '\\':
705 quote = 1
706 else:
707 slist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000708 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000709
Guido van Rossumc80f1822000-12-15 15:37:48 +0000710 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000711
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000712 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000713 """Get a quote-delimited fragment from self's field."""
714 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000715
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000716 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000717 """Get a parenthesis-delimited fragment from self's field."""
718 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000719
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000720 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000721 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000722 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000723
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000724 def getatom(self, atomends=None):
725 """Parse an RFC 2822 atom.
726
727 Optional atomends specifies a different set of end token delimiters
728 (the default is to use self.atomends). This is used e.g. in
729 getphraselist() since phrase endings must not include the `.' (which
730 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000731 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000732 if atomends is None:
733 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000734
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000735 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000736 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000737 break
738 else: atomlist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000739 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000740
Guido van Rossumc80f1822000-12-15 15:37:48 +0000741 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000742
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000743 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000744 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000745
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000746 A phrase is a sequence of words, which are in turn either RFC 2822
747 atoms or quoted-strings. Phrases are canonicalized by squeezing all
748 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000749 """
750 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000751
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000752 while self.pos < len(self.field):
753 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000754 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000755 elif self.field[self.pos] == '"':
756 plist.append(self.getquote())
757 elif self.field[self.pos] == '(':
758 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000759 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000760 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000761 else:
762 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000763
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000764 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000765
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000766class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000767 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000768 def __init__(self, field):
769 AddrlistClass.__init__(self, field)
770 if field:
771 self.addresslist = self.getaddrlist()
772 else:
773 self.addresslist = []
774
775 def __len__(self):
776 return len(self.addresslist)
777
778 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000779 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000780
781 def __add__(self, other):
782 # Set union
783 newaddr = AddressList(None)
784 newaddr.addresslist = self.addresslist[:]
785 for x in other.addresslist:
786 if not x in self.addresslist:
787 newaddr.addresslist.append(x)
788 return newaddr
789
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000790 def __iadd__(self, other):
791 # Set union, in-place
792 for x in other.addresslist:
793 if not x in self.addresslist:
794 self.addresslist.append(x)
795 return self
796
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000797 def __sub__(self, other):
798 # Set difference
799 newaddr = AddressList(None)
800 for x in self.addresslist:
801 if not x in other.addresslist:
802 newaddr.addresslist.append(x)
803 return newaddr
804
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000805 def __isub__(self, other):
806 # Set difference, in-place
807 for x in other.addresslist:
808 if x in self.addresslist:
809 self.addresslist.remove(x)
810 return self
811
Guido van Rossum81d10b41998-06-16 22:29:03 +0000812 def __getitem__(self, index):
813 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000814 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000815
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000816def dump_address_pair(pair):
817 """Dump a (name, address) pair in a canonicalized form."""
818 if pair[0]:
819 return '"' + pair[0] + '" <' + pair[1] + '>'
820 else:
821 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000822
823# Parse a date field
824
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000825_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
826 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000827 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000828 'august', 'september', 'october', 'november', 'december']
829_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000830
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000831# The timezone table does not include the military time zones defined
832# in RFC822, other than Z. According to RFC1123, the description in
833# RFC822 gets the signs wrong, so we can't rely on any such time
834# zones. RFC1123 recommends that numeric timezone indicators be used
835# instead of timezone names.
836
Tim Peters0c9886d2001-01-15 01:18:21 +0000837_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000838 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000839 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000840 'CST': -600, 'CDT': -500, # Central
841 'MST': -700, 'MDT': -600, # Mountain
842 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000843 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000844
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000845
846def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000847 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000848
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000849 Accounts for military timezones.
850 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000851 if not data:
852 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000853 data = data.split()
854 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000855 # There's a dayname here. Skip it
856 del data[0]
857 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000858 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000859 if len(stuff) == 3:
860 data = stuff + data[1:]
861 if len(data) == 4:
862 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000863 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000864 if i > 0:
865 data[3:] = [s[:i], s[i+1:]]
866 else:
867 data.append('') # Dummy tz
868 if len(data) < 5:
869 return None
870 data = data[:5]
871 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000872 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000873 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000874 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000875 if not mm in _monthnames:
876 return None
877 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000878 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000879 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000880 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000881 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000882 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000883 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000884 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000885 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000886 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000887 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000888 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000889 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000890 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000891 if len(tm) == 2:
892 [thh, tmm] = tm
893 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000894 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000895 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000896 else:
897 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000898 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000899 yy = int(yy)
900 dd = int(dd)
901 thh = int(thh)
902 tmm = int(tmm)
903 tss = int(tss)
904 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000905 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000906 tzoffset = None
907 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000908 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000909 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000910 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000911 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000912 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000913 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000914 pass
915 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000916 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000917 if tzoffset < 0:
918 tzsign = -1
919 tzoffset = -tzoffset
920 else:
921 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000922 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000923 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000924
Guido van Rossumb6775db1994-08-01 11:34:53 +0000925
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000926def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000927 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000928 t = parsedate_tz(data)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000929 if t is None:
930 return t
931 return t[:9]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000932
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000933
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000934def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000935 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000936 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000937 # No zone info, so localtime is better assumption than GMT
938 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000939 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000940 t = time.mktime(data[:8] + (0,))
941 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000942
Guido van Rossum247a78a1999-04-19 18:04:38 +0000943def formatdate(timeval=None):
944 """Returns time format preferred for Internet standards.
945
946 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000947
948 According to RFC 1123, day and month names must always be in
949 English. If not for that, this code could use strftime(). It
950 can't because strftime() honors the locale and could generated
951 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000952 """
953 if timeval is None:
954 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000955 timeval = time.gmtime(timeval)
956 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000957 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000958 timeval[2],
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000959 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
960 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000961 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000962
Guido van Rossumb6775db1994-08-01 11:34:53 +0000963
964# When used as script, run a small test program.
965# The first command line argument must be a filename containing one
966# message in RFC-822 format.
967
968if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000969 import sys, os
970 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
971 if sys.argv[1:]: file = sys.argv[1]
972 f = open(file, 'r')
973 m = Message(f)
974 print 'From:', m.getaddr('from')
975 print 'To:', m.getaddrlist('to')
976 print 'Subject:', m.getheader('subject')
977 print 'Date:', m.getheader('date')
978 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000979 tz = date[-1]
980 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000981 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000982 print 'ParsedDate:', time.asctime(date),
983 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000984 hhmm, ss = divmod(hhmmss, 60)
985 hh, mm = divmod(hhmm, 60)
986 print "%+03d%02d" % (hh, mm),
987 if ss: print ".%02d" % ss,
988 print
989 else:
990 print 'ParsedDate:', None
991 m.rewindbody()
992 n = 0
993 while f.readline():
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000994 n += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000995 print 'Lines:', n
996 print '-'*70
997 print 'len =', len(m)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000998 if 'Date' in m: print 'Date =', m['Date']
999 if 'X-Nonsense' in m: pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +00001000 print 'keys =', m.keys()
1001 print 'values =', m.values()
1002 print 'items =', m.items()