blob: 871a049c2192074a9c6a45d010a6fb568e78c657 [file] [log] [blame]
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00001"""RFC 2822 message manipulation.
Guido van Rossum01ca3361992-07-13 14:28:59 +00002
Barry Warsaw9ec58aa2001-07-16 20:40:35 +00003Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
Barry Warsawb8a55c02001-07-16 20:41:40 +000012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000013
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000017
Guido van Rossum9ab94c11997-12-10 16:17:39 +000018 fp = open(file, 'r')
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000019
Guido van Rossumc7bb8571998-06-10 21:31:01 +000020You can use any other legal way of getting an open file object, e.g. use
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000021sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
Guido van Rossum9ab94c11997-12-10 16:17:39 +000024 m = Message(fp)
25
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000026This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000032
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000033The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
Guido van Rossumc7bb8571998-06-10 21:31:01 +000041
Guido van Rossum9ab94c11997-12-10 16:17:39 +000042To get the text of a particular header there are several methods:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000043
Guido van Rossum9ab94c11997-12-10 16:17:39 +000044 str = m.getheader(name)
45 str = m.getrawheader(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000046
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
Guido van Rossum9ab94c11997-12-10 16:17:39 +000052
53For addresses and address lists there are functions
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000054
55 realname, mailaddress = m.getaddr(name)
Guido van Rossum9ab94c11997-12-10 16:17:39 +000056 list = m.getaddrlist(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000057
Guido van Rossum9ab94c11997-12-10 16:17:39 +000058where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000061
Guido van Rossum9ab94c11997-12-10 16:17:39 +000062 time = m.getdate(name)
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000063
Guido van Rossum9ab94c11997-12-10 16:17:39 +000064which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
Guido van Rossum4d4ab921998-06-16 22:27:09 +000072# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
Guido van Rossum01ca3361992-07-13 14:28:59 +000073
Guido van Rossumb6775db1994-08-01 11:34:53 +000074import time
Guido van Rossum01ca3361992-07-13 14:28:59 +000075
Skip Montanaro0de65802001-02-15 22:15:14 +000076__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
Guido van Rossum01ca3361992-07-13 14:28:59 +000077
Guido van Rossum9ab94c11997-12-10 16:17:39 +000078_blanklines = ('\r\n', '\n') # Optimization for islast()
Guido van Rossum92457b91995-06-22 19:06:57 +000079
80
Guido van Rossum01ca3361992-07-13 14:28:59 +000081class Message:
Barry Warsaw9ec58aa2001-07-16 20:40:35 +000082 """Represents a single RFC 2822-compliant message."""
Tim Peters0c9886d2001-01-15 01:18:21 +000083
Guido van Rossum9ab94c11997-12-10 16:17:39 +000084 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
Guido van Rossumc7bb8571998-06-10 21:31:01 +000086 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
unknown67bbd7a2001-07-04 07:07:33 +000091 except (AttributeError, IOError):
Guido van Rossumc7bb8571998-06-10 21:31:01 +000092 seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +000093 self.fp = fp
94 self.seekable = seekable
95 self.startofheaders = None
96 self.startofbody = None
97 #
98 if self.seekable:
99 try:
100 self.startofheaders = self.fp.tell()
101 except IOError:
102 self.seekable = 0
103 #
104 self.readheaders()
105 #
106 if self.seekable:
107 try:
108 self.startofbody = self.fp.tell()
109 except IOError:
110 self.seekable = 0
Tim Peters0c9886d2001-01-15 01:18:21 +0000111
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000112 def rewindbody(self):
113 """Rewind the file to the start of the body (if seekable)."""
114 if not self.seekable:
115 raise IOError, "unseekable file"
116 self.fp.seek(self.startofbody)
Tim Peters0c9886d2001-01-15 01:18:21 +0000117
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000118 def readheaders(self):
119 """Read header lines.
Tim Peters0c9886d2001-01-15 01:18:21 +0000120
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000121 Read header lines up to the entirely blank line that terminates them.
122 The (normally blank) line that ends the headers is skipped, but not
123 included in the returned list. If a non-header line ends the headers,
124 (which is an error), an attempt is made to backspace over it; it is
125 never included in the returned list.
Tim Peters0c9886d2001-01-15 01:18:21 +0000126
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000127 The variable self.status is set to the empty string if all went well,
128 otherwise it is an error message. The variable self.headers is a
129 completely uninterpreted list of lines contained in the header (so
130 printing them will reproduce the header exactly as it appears in the
131 file).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000132 """
133 self.dict = {}
134 self.unixfrom = ''
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000135 self.headers = lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000136 self.status = ''
137 headerseen = ""
138 firstline = 1
Guido van Rossum052969a1998-07-21 14:24:04 +0000139 startofline = unread = tell = None
140 if hasattr(self.fp, 'unread'):
141 unread = self.fp.unread
142 elif self.seekable:
143 tell = self.fp.tell
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000144 while 1:
Guido van Rossum052969a1998-07-21 14:24:04 +0000145 if tell:
Guido van Rossuma66eed62000-11-09 18:05:24 +0000146 try:
147 startofline = tell()
148 except IOError:
149 startofline = tell = None
150 self.seekable = 0
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000151 line = self.fp.readline()
152 if not line:
153 self.status = 'EOF in headers'
154 break
155 # Skip unix From name time lines
Guido van Rossumc80f1822000-12-15 15:37:48 +0000156 if firstline and line.startswith('From '):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000157 self.unixfrom = self.unixfrom + line
158 continue
159 firstline = 0
Guido van Rossume894fc01998-06-11 13:58:40 +0000160 if headerseen and line[0] in ' \t':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000161 # It's a continuation line.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000162 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000163 x = (self.dict[headerseen] + "\n " + line.strip())
164 self.dict[headerseen] = x.strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000165 continue
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000166 elif self.iscomment(line):
Guido van Rossume894fc01998-06-11 13:58:40 +0000167 # It's a comment. Ignore it.
168 continue
169 elif self.islast(line):
170 # Note! No pushback here! The delimiter line gets eaten.
171 break
172 headerseen = self.isheader(line)
173 if headerseen:
174 # It's a legal header line, save it.
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000175 lst.append(line)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000176 self.dict[headerseen] = line[len(headerseen)+1:].strip()
Guido van Rossume894fc01998-06-11 13:58:40 +0000177 continue
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000178 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000179 # It's not a header line; throw it back and stop here.
180 if not self.dict:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000181 self.status = 'No headers'
182 else:
Guido van Rossume894fc01998-06-11 13:58:40 +0000183 self.status = 'Non-header line where header expected'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000184 # Try to undo the read.
Guido van Rossum052969a1998-07-21 14:24:04 +0000185 if unread:
186 unread(line)
187 elif tell:
188 self.fp.seek(startofline)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000189 else:
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000190 self.status = self.status + '; bad seek'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000191 break
Guido van Rossume894fc01998-06-11 13:58:40 +0000192
193 def isheader(self, line):
194 """Determine whether a given line is a legal header.
195
196 This method should return the header name, suitably canonicalized.
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000197 You may override this method in order to use Message parsing on tagged
198 data in RFC 2822-like formats with special header formats.
Guido van Rossume894fc01998-06-11 13:58:40 +0000199 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000200 i = line.find(':')
Guido van Rossume894fc01998-06-11 13:58:40 +0000201 if i > 0:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000202 return line[:i].lower()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000203 return None
Tim Peters0c9886d2001-01-15 01:18:21 +0000204
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000205 def islast(self, line):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000206 """Determine whether a line is a legal end of RFC 2822 headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000207
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000208 You may override this method if your application wants to bend the
209 rules, e.g. to strip trailing whitespace, or to recognize MH template
210 separators ('--------'). For convenience (e.g. for code reading from
211 sockets) a line consisting of \r\n also matches.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000212 """
213 return line in _blanklines
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000214
215 def iscomment(self, line):
216 """Determine whether a line should be skipped entirely.
217
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000218 You may override this method in order to use Message parsing on tagged
219 data in RFC 2822-like formats that support embedded comments or
220 free-text data.
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000221 """
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000222 return False
Tim Peters0c9886d2001-01-15 01:18:21 +0000223
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000224 def getallmatchingheaders(self, name):
225 """Find all header lines matching a given header name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000226
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000227 Look through the list of headers and find all lines matching a given
228 header name (and their continuation lines). A list of the lines is
229 returned, without interpretation. If the header does not occur, an
230 empty list is returned. If the header occurs multiple times, all
231 occurrences are returned. Case is not important in the header name.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000232 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000233 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000234 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000235 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000236 hit = 0
237 for line in self.headers:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000238 if line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000239 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000240 elif not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000241 hit = 0
242 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000243 lst.append(line)
244 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000245
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000246 def getfirstmatchingheader(self, name):
247 """Get the first header line matching name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000248
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000249 This is similar to getallmatchingheaders, but it returns only the
250 first matching header (and its continuation lines).
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000251 """
Guido van Rossumc80f1822000-12-15 15:37:48 +0000252 name = name.lower() + ':'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000253 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000254 lst = []
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000255 hit = 0
256 for line in self.headers:
257 if hit:
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000258 if not line[:1].isspace():
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000259 break
Guido van Rossumc80f1822000-12-15 15:37:48 +0000260 elif line[:n].lower() == name:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000261 hit = 1
262 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000263 lst.append(line)
264 return lst
Tim Peters0c9886d2001-01-15 01:18:21 +0000265
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000266 def getrawheader(self, name):
267 """A higher-level interface to getfirstmatchingheader().
Tim Peters0c9886d2001-01-15 01:18:21 +0000268
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000269 Return a string containing the literal text of the header but with the
270 keyword stripped. All leading, trailing and embedded whitespace is
271 kept in the string, however. Return None if the header does not
272 occur.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000273 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000274
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000275 lst = self.getfirstmatchingheader(name)
276 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000277 return None
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000278 lst[0] = lst[0][len(name) + 1:]
279 return ''.join(lst)
Tim Peters0c9886d2001-01-15 01:18:21 +0000280
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000281 def getheader(self, name, default=None):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000282 """Get the header value for a name.
Tim Peters0c9886d2001-01-15 01:18:21 +0000283
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000284 This is the normal interface: it returns a stripped version of the
285 header value for a given header name, or None if it doesn't exist.
286 This uses the dictionary version which finds the *last* such header.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000287 """
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000288 return self.dict.get(name.lower(), default)
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000289 get = getheader
Fred Drakeddf22c41999-04-28 21:17:38 +0000290
291 def getheaders(self, name):
292 """Get all values for a header.
293
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000294 This returns a list of values for headers given more than once; each
295 value in the result list is stripped in the same way as the result of
296 getheader(). If the header is not given, return an empty list.
Fred Drakeddf22c41999-04-28 21:17:38 +0000297 """
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000302 if s[0].isspace():
Fred Drakeddf22c41999-04-28 21:17:38 +0000303 if current:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000304 current = "%s\n %s" % (current, s.strip())
Fred Drakeddf22c41999-04-28 21:17:38 +0000305 else:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000306 current = s.strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000307 else:
308 if have_header:
309 result.append(current)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000310 current = s[s.find(":") + 1:].strip()
Fred Drakeddf22c41999-04-28 21:17:38 +0000311 have_header = 1
312 if have_header:
313 result.append(current)
Fred Drakecbfa5cb1999-06-14 15:40:23 +0000314 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000315
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000318
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
321 """
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
Tim Peters0c9886d2001-01-15 01:18:21 +0000328
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
Barry Warsaw8a578431999-01-14 19:59:58 +0000331
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000335 """
Barry Warsaw8a578431999-01-14 19:59:58 +0000336 raw = []
337 for h in self.getallmatchingheaders(name):
Fred Drake13a2c272000-02-10 17:17:14 +0000338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
Guido van Rossumc80f1822000-12-15 15:37:48 +0000343 i = h.find(':')
Barry Warsaw8a578431999-01-14 19:59:58 +0000344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
Guido van Rossumc80f1822000-12-15 15:37:48 +0000347 alladdrs = ''.join(raw)
Barry Warsaw56cdf112002-04-12 20:55:31 +0000348 a = AddressList(alladdrs)
Barry Warsaw0a8d4d52002-05-21 19:46:13 +0000349 return a.addresslist
Tim Peters0c9886d2001-01-15 01:18:21 +0000350
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000351 def getdate(self, name):
352 """Retrieve a date field from a header.
Tim Peters0c9886d2001-01-15 01:18:21 +0000353
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000354 Retrieves a date field from the named header, returning a tuple
355 compatible with time.mktime().
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000356 """
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000362
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000365
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000366 The first 9 elements make up a tuple compatible with time.mktime(),
367 and the 10th is the offset of the poster's time zone from GMT/UTC.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000368 """
369 try:
370 data = self[name]
371 except KeyError:
372 return None
373 return parsedate_tz(data)
Tim Peters0c9886d2001-01-15 01:18:21 +0000374
375
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000376 # Access as a dictionary (only finds *last* header of each type):
Tim Peters0c9886d2001-01-15 01:18:21 +0000377
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000378 def __len__(self):
379 """Get the number of headers in a message."""
380 return len(self.dict)
Tim Peters0c9886d2001-01-15 01:18:21 +0000381
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000382 def __getitem__(self, name):
383 """Get a specific header, as from a dictionary."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000384 return self.dict[name.lower()]
Guido van Rossume894fc01998-06-11 13:58:40 +0000385
386 def __setitem__(self, name, value):
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000387 """Set the value of a header.
388
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000389 Note: This is not a perfect inversion of __getitem__, because any
390 changed headers get stuck at the end of the raw-headers list rather
391 than where the altered header was.
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000392 """
Guido van Rossume894fc01998-06-11 13:58:40 +0000393 del self[name] # Won't fail if it doesn't exist
Guido van Rossumc80f1822000-12-15 15:37:48 +0000394 self.dict[name.lower()] = value
Guido van Rossume894fc01998-06-11 13:58:40 +0000395 text = name + ": " + value
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000396 for line in text.split("\n"):
397 self.headers.append(line + "\n")
Tim Peters0c9886d2001-01-15 01:18:21 +0000398
Guido van Rossum75d92c11998-04-02 21:33:20 +0000399 def __delitem__(self, name):
400 """Delete all occurrences of a specific header, if it is present."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000401 name = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000402 if not name in self.dict:
Guido van Rossumf3c5f5c1999-09-15 22:15:23 +0000403 return
404 del self.dict[name]
405 name = name + ':'
Guido van Rossum75d92c11998-04-02 21:33:20 +0000406 n = len(name)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000407 lst = []
Guido van Rossum75d92c11998-04-02 21:33:20 +0000408 hit = 0
409 for i in range(len(self.headers)):
410 line = self.headers[i]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000411 if line[:n].lower() == name:
Guido van Rossum75d92c11998-04-02 21:33:20 +0000412 hit = 1
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000413 elif not line[:1].isspace():
Guido van Rossum75d92c11998-04-02 21:33:20 +0000414 hit = 0
415 if hit:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000416 lst.append(i)
417 for i in reversed(lst):
Guido van Rossum75d92c11998-04-02 21:33:20 +0000418 del self.headers[i]
419
Fred Drake233226e2001-05-22 19:36:50 +0000420 def setdefault(self, name, default=""):
Fred Drake02959292001-05-22 14:58:10 +0000421 lowername = name.lower()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000422 if lowername in self.dict:
Fred Drake02959292001-05-22 14:58:10 +0000423 return self.dict[lowername]
424 else:
Fred Drake233226e2001-05-22 19:36:50 +0000425 text = name + ": " + default
Raymond Hettinger508e81e2005-02-08 15:39:11 +0000426 for line in text.split("\n"):
427 self.headers.append(line + "\n")
Fred Drake233226e2001-05-22 19:36:50 +0000428 self.dict[lowername] = default
Fred Drake02959292001-05-22 14:58:10 +0000429 return default
430
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000431 def has_key(self, name):
432 """Determine whether a message contains the named header."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000433 return name.lower() in self.dict
434
435 def __contains__(self, name):
436 """Determine whether a message contains the named header."""
Tim Petersc411dba2002-07-16 21:35:23 +0000437 return name.lower() in self.dict
Tim Peters0c9886d2001-01-15 01:18:21 +0000438
Raymond Hettingerce96d8b2004-09-22 17:17:32 +0000439 def __iter__(self):
440 return iter(self.dict)
441
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000442 def keys(self):
443 """Get all of a message's header field names."""
444 return self.dict.keys()
Tim Peters0c9886d2001-01-15 01:18:21 +0000445
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000446 def values(self):
447 """Get all of a message's header field values."""
448 return self.dict.values()
Tim Peters0c9886d2001-01-15 01:18:21 +0000449
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000450 def items(self):
451 """Get all of a message's headers.
Tim Peters0c9886d2001-01-15 01:18:21 +0000452
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000453 Returns a list of name, value tuples.
454 """
455 return self.dict.items()
Guido van Rossum01ca3361992-07-13 14:28:59 +0000456
Guido van Rossumc7bb8571998-06-10 21:31:01 +0000457 def __str__(self):
Neil Schemenauer767126d2003-11-11 19:39:17 +0000458 return ''.join(self.headers)
Guido van Rossum01ca3361992-07-13 14:28:59 +0000459
460
461# Utility functions
462# -----------------
463
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000464# XXX Should fix unquote() and quote() to be really conformant.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000465# XXX The inverses of the parse functions may also be useful.
466
Guido van Rossum01ca3361992-07-13 14:28:59 +0000467
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000468def unquote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000469 """Remove quotes from a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000470 if len(s) > 1:
471 if s.startswith('"') and s.endswith('"'):
472 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
473 if s.startswith('<') and s.endswith('>'):
474 return s[1:-1]
475 return s
Guido van Rossumb6775db1994-08-01 11:34:53 +0000476
477
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000478def quote(s):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000479 """Add quotes around a string."""
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000480 return s.replace('\\', '\\\\').replace('"', '\\"')
Guido van Rossumb6775db1994-08-01 11:34:53 +0000481
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000482
Guido van Rossumb6775db1994-08-01 11:34:53 +0000483def parseaddr(address):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000484 """Parse an address into a (realname, mailaddr) tuple."""
Barry Warsaw56cdf112002-04-12 20:55:31 +0000485 a = AddressList(address)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000486 lst = a.addresslist
487 if not lst:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000488 return (None, None)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000489 return lst[0]
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000490
491
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000492class AddrlistClass:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000493 """Address parser class by Ben Escoto.
Tim Peters0c9886d2001-01-15 01:18:21 +0000494
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000495 To understand what this class does, it helps to have a copy of
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000496 RFC 2822 in front of you.
497
498 http://www.faqs.org/rfcs/rfc2822.html
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000499
500 Note: this class interface is deprecated and may be removed in the future.
501 Use rfc822.AddressList instead.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000502 """
Tim Peters0c9886d2001-01-15 01:18:21 +0000503
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000504 def __init__(self, field):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000505 """Initialize a new instance.
Tim Peters0c9886d2001-01-15 01:18:21 +0000506
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000507 `field' is an unparsed address header field, containing one or more
508 addresses.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000509 """
510 self.specials = '()<>@,:;.\"[]'
511 self.pos = 0
512 self.LWS = ' \t'
Barry Warsaw8a578431999-01-14 19:59:58 +0000513 self.CR = '\r\n'
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000514 self.atomends = self.specials + self.LWS + self.CR
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000515 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
516 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
517 # syntax, so allow dots in phrases.
518 self.phraseends = self.atomends.replace('.', '')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000519 self.field = field
520 self.commentlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000521
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000522 def gotonext(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000523 """Parse up to the start of the next address."""
524 while self.pos < len(self.field):
525 if self.field[self.pos] in self.LWS + '\n\r':
526 self.pos = self.pos + 1
527 elif self.field[self.pos] == '(':
528 self.commentlist.append(self.getcomment())
529 else: break
Tim Peters0c9886d2001-01-15 01:18:21 +0000530
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000531 def getaddrlist(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000532 """Parse all addresses.
Tim Peters0c9886d2001-01-15 01:18:21 +0000533
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000534 Returns a list containing all of the addresses.
535 """
Barry Warsawf1fd2822001-11-13 21:30:37 +0000536 result = []
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000537 ad = self.getaddress()
538 while ad:
539 result += ad
Barry Warsawf1fd2822001-11-13 21:30:37 +0000540 ad = self.getaddress()
Barry Warsawf1fd2822001-11-13 21:30:37 +0000541 return result
Tim Peters0c9886d2001-01-15 01:18:21 +0000542
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000543 def getaddress(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000544 """Parse the next address."""
545 self.commentlist = []
546 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000547
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000548 oldpos = self.pos
549 oldcl = self.commentlist
550 plist = self.getphraselist()
Tim Peters0c9886d2001-01-15 01:18:21 +0000551
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000552 self.gotonext()
553 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000554
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000555 if self.pos >= len(self.field):
556 # Bad email address technically, no domain.
557 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000558 returnlist = [(' '.join(self.commentlist), plist[0])]
Tim Peters0c9886d2001-01-15 01:18:21 +0000559
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000560 elif self.field[self.pos] in '.@':
561 # email address is just an addrspec
562 # this isn't very efficient since we start over
563 self.pos = oldpos
564 self.commentlist = oldcl
565 addrspec = self.getaddrspec()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000566 returnlist = [(' '.join(self.commentlist), addrspec)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000567
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000568 elif self.field[self.pos] == ':':
569 # address is a group
570 returnlist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000571
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000572 fieldlen = len(self.field)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000573 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000574 while self.pos < len(self.field):
575 self.gotonext()
Barry Warsaw96e9bf41999-07-12 18:37:02 +0000576 if self.pos < fieldlen and self.field[self.pos] == ';':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000577 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000578 break
579 returnlist = returnlist + self.getaddress()
Tim Peters0c9886d2001-01-15 01:18:21 +0000580
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000581 elif self.field[self.pos] == '<':
582 # Address is a phrase then a route addr
583 routeaddr = self.getrouteaddr()
Tim Peters0c9886d2001-01-15 01:18:21 +0000584
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000585 if self.commentlist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000586 returnlist = [(' '.join(plist) + ' (' + \
587 ' '.join(self.commentlist) + ')', routeaddr)]
588 else: returnlist = [(' '.join(plist), routeaddr)]
Tim Peters0c9886d2001-01-15 01:18:21 +0000589
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000590 else:
591 if plist:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000592 returnlist = [(' '.join(self.commentlist), plist[0])]
Barry Warsaw8a578431999-01-14 19:59:58 +0000593 elif self.field[self.pos] in self.specials:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000594 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000595
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000596 self.gotonext()
597 if self.pos < len(self.field) and self.field[self.pos] == ',':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000598 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000599 return returnlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000600
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000601 def getrouteaddr(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000602 """Parse a route address (Return-path value).
Tim Peters0c9886d2001-01-15 01:18:21 +0000603
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000604 This method just skips all the route stuff and returns the addrspec.
605 """
606 if self.field[self.pos] != '<':
607 return
Tim Peters0c9886d2001-01-15 01:18:21 +0000608
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000609 expectroute = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000610 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000611 self.gotonext()
Guido van Rossumf830a522001-12-20 15:54:48 +0000612 adlist = ""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000613 while self.pos < len(self.field):
614 if expectroute:
615 self.getdomain()
616 expectroute = 0
617 elif self.field[self.pos] == '>':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000618 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000619 break
620 elif self.field[self.pos] == '@':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000621 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000622 expectroute = 1
623 elif self.field[self.pos] == ':':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000624 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000625 else:
626 adlist = self.getaddrspec()
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000627 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000628 break
629 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000630
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000631 return adlist
Tim Peters0c9886d2001-01-15 01:18:21 +0000632
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000633 def getaddrspec(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000634 """Parse an RFC 2822 addr-spec."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000635 aslist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000636
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000637 self.gotonext()
638 while self.pos < len(self.field):
639 if self.field[self.pos] == '.':
640 aslist.append('.')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000641 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000642 elif self.field[self.pos] == '"':
Guido van Rossumb1844871999-06-15 18:06:20 +0000643 aslist.append('"%s"' % self.getquote())
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000644 elif self.field[self.pos] in self.atomends:
645 break
646 else: aslist.append(self.getatom())
647 self.gotonext()
Tim Peters0c9886d2001-01-15 01:18:21 +0000648
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000649 if self.pos >= len(self.field) or self.field[self.pos] != '@':
Guido van Rossumc80f1822000-12-15 15:37:48 +0000650 return ''.join(aslist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000651
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000652 aslist.append('@')
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000653 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000654 self.gotonext()
Guido van Rossumc80f1822000-12-15 15:37:48 +0000655 return ''.join(aslist) + self.getdomain()
Tim Peters0c9886d2001-01-15 01:18:21 +0000656
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000657 def getdomain(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000658 """Get the complete domain name from an address."""
659 sdlist = []
660 while self.pos < len(self.field):
661 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000662 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000663 elif self.field[self.pos] == '(':
664 self.commentlist.append(self.getcomment())
665 elif self.field[self.pos] == '[':
666 sdlist.append(self.getdomainliteral())
667 elif self.field[self.pos] == '.':
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000668 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000669 sdlist.append('.')
670 elif self.field[self.pos] in self.atomends:
671 break
672 else: sdlist.append(self.getatom())
Guido van Rossumc80f1822000-12-15 15:37:48 +0000673 return ''.join(sdlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000674
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000675 def getdelimited(self, beginchar, endchars, allowcomments = 1):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000676 """Parse a header fragment delimited by special characters.
Tim Peters0c9886d2001-01-15 01:18:21 +0000677
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000678 `beginchar' is the start character for the fragment. If self is not
679 looking at an instance of `beginchar' then getdelimited returns the
680 empty string.
Tim Peters0c9886d2001-01-15 01:18:21 +0000681
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000682 `endchars' is a sequence of allowable end-delimiting characters.
683 Parsing stops when one of these is encountered.
Tim Peters0c9886d2001-01-15 01:18:21 +0000684
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000685 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
686 within the parsed fragment.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000687 """
688 if self.field[self.pos] != beginchar:
689 return ''
Tim Peters0c9886d2001-01-15 01:18:21 +0000690
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000691 slist = ['']
692 quote = 0
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000693 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000694 while self.pos < len(self.field):
695 if quote == 1:
696 slist.append(self.field[self.pos])
697 quote = 0
698 elif self.field[self.pos] in endchars:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000699 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000700 break
701 elif allowcomments and self.field[self.pos] == '(':
702 slist.append(self.getcomment())
703 elif self.field[self.pos] == '\\':
704 quote = 1
705 else:
706 slist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000707 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000708
Guido van Rossumc80f1822000-12-15 15:37:48 +0000709 return ''.join(slist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000710
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000711 def getquote(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000712 """Get a quote-delimited fragment from self's field."""
713 return self.getdelimited('"', '"\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000714
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000715 def getcomment(self):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000716 """Get a parenthesis-delimited fragment from self's field."""
717 return self.getdelimited('(', ')\r', 1)
Tim Peters0c9886d2001-01-15 01:18:21 +0000718
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000719 def getdomainliteral(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000720 """Parse an RFC 2822 domain-literal."""
Barry Warsaw2ea2b112000-09-25 15:08:27 +0000721 return '[%s]' % self.getdelimited('[', ']\r', 0)
Tim Peters0c9886d2001-01-15 01:18:21 +0000722
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000723 def getatom(self, atomends=None):
724 """Parse an RFC 2822 atom.
725
726 Optional atomends specifies a different set of end token delimiters
727 (the default is to use self.atomends). This is used e.g. in
728 getphraselist() since phrase endings must not include the `.' (which
729 is legal in phrases)."""
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000730 atomlist = ['']
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000731 if atomends is None:
732 atomends = self.atomends
Tim Peters0c9886d2001-01-15 01:18:21 +0000733
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000734 while self.pos < len(self.field):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000735 if self.field[self.pos] in atomends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000736 break
737 else: atomlist.append(self.field[self.pos])
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000738 self.pos += 1
Tim Peters0c9886d2001-01-15 01:18:21 +0000739
Guido van Rossumc80f1822000-12-15 15:37:48 +0000740 return ''.join(atomlist)
Tim Peters0c9886d2001-01-15 01:18:21 +0000741
Guido van Rossumbe7c45e1997-11-22 21:49:19 +0000742 def getphraselist(self):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000743 """Parse a sequence of RFC 2822 phrases.
Tim Peters0c9886d2001-01-15 01:18:21 +0000744
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000745 A phrase is a sequence of words, which are in turn either RFC 2822
746 atoms or quoted-strings. Phrases are canonicalized by squeezing all
747 runs of continuous whitespace into one space.
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000748 """
749 plist = []
Tim Peters0c9886d2001-01-15 01:18:21 +0000750
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000751 while self.pos < len(self.field):
752 if self.field[self.pos] in self.LWS:
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000753 self.pos += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000754 elif self.field[self.pos] == '"':
755 plist.append(self.getquote())
756 elif self.field[self.pos] == '(':
757 self.commentlist.append(self.getcomment())
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000758 elif self.field[self.pos] in self.phraseends:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000759 break
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000760 else:
761 plist.append(self.getatom(self.phraseends))
Tim Peters0c9886d2001-01-15 01:18:21 +0000762
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000763 return plist
Guido van Rossumb6775db1994-08-01 11:34:53 +0000764
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000765class AddressList(AddrlistClass):
Barry Warsaw9ec58aa2001-07-16 20:40:35 +0000766 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000767 def __init__(self, field):
768 AddrlistClass.__init__(self, field)
769 if field:
770 self.addresslist = self.getaddrlist()
771 else:
772 self.addresslist = []
773
774 def __len__(self):
775 return len(self.addresslist)
776
777 def __str__(self):
Guido van Rossumc80f1822000-12-15 15:37:48 +0000778 return ", ".join(map(dump_address_pair, self.addresslist))
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000779
780 def __add__(self, other):
781 # Set union
782 newaddr = AddressList(None)
783 newaddr.addresslist = self.addresslist[:]
784 for x in other.addresslist:
785 if not x in self.addresslist:
786 newaddr.addresslist.append(x)
787 return newaddr
788
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000789 def __iadd__(self, other):
790 # Set union, in-place
791 for x in other.addresslist:
792 if not x in self.addresslist:
793 self.addresslist.append(x)
794 return self
795
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000796 def __sub__(self, other):
797 # Set difference
798 newaddr = AddressList(None)
799 for x in self.addresslist:
800 if not x in other.addresslist:
801 newaddr.addresslist.append(x)
802 return newaddr
803
Thomas Wouters104a7bc2000-08-24 20:14:10 +0000804 def __isub__(self, other):
805 # Set difference, in-place
806 for x in other.addresslist:
807 if x in self.addresslist:
808 self.addresslist.remove(x)
809 return self
810
Guido van Rossum81d10b41998-06-16 22:29:03 +0000811 def __getitem__(self, index):
812 # Make indexing, slices, and 'in' work
Guido van Rossuma07934e1999-09-03 13:23:49 +0000813 return self.addresslist[index]
Guido van Rossum81d10b41998-06-16 22:29:03 +0000814
Guido van Rossum4d4ab921998-06-16 22:27:09 +0000815def dump_address_pair(pair):
816 """Dump a (name, address) pair in a canonicalized form."""
817 if pair[0]:
818 return '"' + pair[0] + '" <' + pair[1] + '>'
819 else:
820 return pair[1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000821
822# Parse a date field
823
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000824_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
825 'aug', 'sep', 'oct', 'nov', 'dec',
Fred Drake13a2c272000-02-10 17:17:14 +0000826 'january', 'february', 'march', 'april', 'may', 'june', 'july',
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000827 'august', 'september', 'october', 'november', 'december']
828_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
Guido van Rossumb6775db1994-08-01 11:34:53 +0000829
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000830# The timezone table does not include the military time zones defined
831# in RFC822, other than Z. According to RFC1123, the description in
832# RFC822 gets the signs wrong, so we can't rely on any such time
833# zones. RFC1123 recommends that numeric timezone indicators be used
834# instead of timezone names.
835
Tim Peters0c9886d2001-01-15 01:18:21 +0000836_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
Guido van Rossum67133e21998-05-18 16:09:10 +0000837 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000838 'EST': -500, 'EDT': -400, # Eastern
Guido van Rossum67133e21998-05-18 16:09:10 +0000839 'CST': -600, 'CDT': -500, # Central
840 'MST': -700, 'MDT': -600, # Mountain
841 'PST': -800, 'PDT': -700 # Pacific
Tim Peters0c9886d2001-01-15 01:18:21 +0000842 }
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000843
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000844
845def parsedate_tz(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000846 """Convert a date string to a time tuple.
Tim Peters0c9886d2001-01-15 01:18:21 +0000847
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000848 Accounts for military timezones.
849 """
Barry Warsaw4a106ee2001-11-13 18:00:40 +0000850 if not data:
851 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000852 data = data.split()
853 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000854 # There's a dayname here. Skip it
855 del data[0]
856 if len(data) == 3: # RFC 850 date, deprecated
Guido van Rossumc80f1822000-12-15 15:37:48 +0000857 stuff = data[0].split('-')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000858 if len(stuff) == 3:
859 data = stuff + data[1:]
860 if len(data) == 4:
861 s = data[3]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000862 i = s.find('+')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000863 if i > 0:
864 data[3:] = [s[:i], s[i+1:]]
865 else:
866 data.append('') # Dummy tz
867 if len(data) < 5:
868 return None
869 data = data[:5]
870 [dd, mm, yy, tm, tz] = data
Guido van Rossumc80f1822000-12-15 15:37:48 +0000871 mm = mm.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000872 if not mm in _monthnames:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000873 dd, mm = mm, dd.lower()
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000874 if not mm in _monthnames:
875 return None
876 mm = _monthnames.index(mm)+1
Guido van Rossumb08f51b1999-04-29 12:50:36 +0000877 if mm > 12: mm = mm - 12
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000878 if dd[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000879 dd = dd[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000880 i = yy.find(':')
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000881 if i > 0:
Fred Drake13a2c272000-02-10 17:17:14 +0000882 yy, tm = tm, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000883 if yy[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000884 yy = yy[:-1]
Guido van Rossum352ca8c2001-01-02 20:36:32 +0000885 if not yy[0].isdigit():
Fred Drake13a2c272000-02-10 17:17:14 +0000886 yy, tz = tz, yy
Guido van Rossumdb01ee01998-12-23 22:22:10 +0000887 if tm[-1] == ',':
Fred Drake13a2c272000-02-10 17:17:14 +0000888 tm = tm[:-1]
Guido van Rossumc80f1822000-12-15 15:37:48 +0000889 tm = tm.split(':')
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000890 if len(tm) == 2:
891 [thh, tmm] = tm
892 tss = '0'
Guido van Rossum99e11311998-12-23 21:58:38 +0000893 elif len(tm) == 3:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000894 [thh, tmm, tss] = tm
Guido van Rossum99e11311998-12-23 21:58:38 +0000895 else:
896 return None
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000897 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000898 yy = int(yy)
899 dd = int(dd)
900 thh = int(thh)
901 tmm = int(tmm)
902 tss = int(tss)
903 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000904 return None
Guido van Rossumc80f1822000-12-15 15:37:48 +0000905 tzoffset = None
906 tz = tz.upper()
Raymond Hettinger54f02222002-06-01 14:18:47 +0000907 if tz in _timezones:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000908 tzoffset = _timezones[tz]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000909 else:
Tim Peters0c9886d2001-01-15 01:18:21 +0000910 try:
Guido van Rossumc80f1822000-12-15 15:37:48 +0000911 tzoffset = int(tz)
Tim Peters0c9886d2001-01-15 01:18:21 +0000912 except ValueError:
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000913 pass
914 # Convert a timezone offset into seconds ; -0500 -> -18000
Guido van Rossuma73033f1998-02-19 00:28:58 +0000915 if tzoffset:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000916 if tzoffset < 0:
917 tzsign = -1
918 tzoffset = -tzoffset
919 else:
920 tzsign = 1
Guido van Rossum54e54c62001-09-04 19:14:14 +0000921 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000922 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000923
Guido van Rossumb6775db1994-08-01 11:34:53 +0000924
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000925def parsedate(data):
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000926 """Convert a time string to a time tuple."""
Guido van Rossumc80f1822000-12-15 15:37:48 +0000927 t = parsedate_tz(data)
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000928 if t is None:
929 return t
930 return t[:9]
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000931
Guido van Rossum27cb8a41996-11-20 22:12:26 +0000932
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000933def mktime_tz(data):
Guido van Rossum67133e21998-05-18 16:09:10 +0000934 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
Guido van Rossuma73033f1998-02-19 00:28:58 +0000935 if data[9] is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000936 # No zone info, so localtime is better assumption than GMT
937 return time.mktime(data[:8] + (-1,))
Guido van Rossuma73033f1998-02-19 00:28:58 +0000938 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000939 t = time.mktime(data[:8] + (0,))
940 return t - data[9] - time.timezone
Guido van Rossum6cdd7a01996-12-12 18:39:54 +0000941
Guido van Rossum247a78a1999-04-19 18:04:38 +0000942def formatdate(timeval=None):
943 """Returns time format preferred for Internet standards.
944
945 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000946
947 According to RFC 1123, day and month names must always be in
948 English. If not for that, this code could use strftime(). It
949 can't because strftime() honors the locale and could generated
950 non-English names.
Guido van Rossum247a78a1999-04-19 18:04:38 +0000951 """
952 if timeval is None:
953 timeval = time.time()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000954 timeval = time.gmtime(timeval)
955 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000956 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000957 timeval[2],
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000958 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
959 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
Tim Peters83e7ccc2001-09-04 06:37:28 +0000960 timeval[0], timeval[3], timeval[4], timeval[5])
Guido van Rossum247a78a1999-04-19 18:04:38 +0000961
Guido van Rossumb6775db1994-08-01 11:34:53 +0000962
963# When used as script, run a small test program.
964# The first command line argument must be a filename containing one
965# message in RFC-822 format.
966
967if __name__ == '__main__':
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000968 import sys, os
969 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
970 if sys.argv[1:]: file = sys.argv[1]
971 f = open(file, 'r')
972 m = Message(f)
973 print 'From:', m.getaddr('from')
974 print 'To:', m.getaddrlist('to')
975 print 'Subject:', m.getheader('subject')
976 print 'Date:', m.getheader('date')
977 date = m.getdate_tz('date')
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000978 tz = date[-1]
979 date = time.localtime(mktime_tz(date))
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000980 if date:
Guido van Rossum1d2b23e2000-01-17 14:11:04 +0000981 print 'ParsedDate:', time.asctime(date),
982 hhmmss = tz
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000983 hhmm, ss = divmod(hhmmss, 60)
984 hh, mm = divmod(hhmm, 60)
985 print "%+03d%02d" % (hh, mm),
986 if ss: print ".%02d" % ss,
987 print
988 else:
989 print 'ParsedDate:', None
990 m.rewindbody()
991 n = 0
992 while f.readline():
Raymond Hettingerbb5fbc42005-02-08 08:05:13 +0000993 n += 1
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000994 print 'Lines:', n
995 print '-'*70
996 print 'len =', len(m)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000997 if 'Date' in m: print 'Date =', m['Date']
998 if 'X-Nonsense' in m: pass
Guido van Rossum9ab94c11997-12-10 16:17:39 +0000999 print 'keys =', m.keys()
1000 print 'values =', m.values()
1001 print 'items =', m.items()