| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 1 | # Copyright (C) 2002 Python Software Foundation | 
 | 2 |  | 
 | 3 | """Email address parsing code. | 
 | 4 |  | 
 | 5 | Lifted directly from rfc822.py.  This should eventually be rewritten. | 
 | 6 | """ | 
 | 7 |  | 
 | 8 | import time | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 9 | from types import TupleType | 
 | 10 |  | 
 | 11 | try: | 
 | 12 |     True, False | 
 | 13 | except NameError: | 
 | 14 |     True = 1 | 
 | 15 |     False = 0 | 
 | 16 |  | 
 | 17 | SPACE = ' ' | 
 | 18 | EMPTYSTRING = '' | 
 | 19 | COMMASPACE = ', ' | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 20 |  | 
 | 21 | # Parse a date field | 
 | 22 | _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', | 
 | 23 |                'aug', 'sep', 'oct', 'nov', 'dec', | 
 | 24 |                'january', 'february', 'march', 'april', 'may', 'june', 'july', | 
 | 25 |                'august', 'september', 'october', 'november', 'december'] | 
 | 26 |  | 
 | 27 | _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] | 
 | 28 |  | 
 | 29 | # The timezone table does not include the military time zones defined | 
 | 30 | # in RFC822, other than Z.  According to RFC1123, the description in | 
 | 31 | # RFC822 gets the signs wrong, so we can't rely on any such time | 
 | 32 | # zones.  RFC1123 recommends that numeric timezone indicators be used | 
 | 33 | # instead of timezone names. | 
 | 34 |  | 
 | 35 | _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, | 
 | 36 |               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada) | 
 | 37 |               'EST': -500, 'EDT': -400,  # Eastern | 
 | 38 |               'CST': -600, 'CDT': -500,  # Central | 
 | 39 |               'MST': -700, 'MDT': -600,  # Mountain | 
 | 40 |               'PST': -800, 'PDT': -700   # Pacific | 
 | 41 |               } | 
 | 42 |  | 
 | 43 |  | 
 | 44 | def parsedate_tz(data): | 
 | 45 |     """Convert a date string to a time tuple. | 
 | 46 |  | 
 | 47 |     Accounts for military timezones. | 
 | 48 |     """ | 
 | 49 |     data = data.split() | 
| Barry Warsaw | ba97659 | 2002-12-30 17:21:36 +0000 | [diff] [blame] | 50 |     # The FWS after the comma after the day-of-week is optional, so search and | 
 | 51 |     # adjust for this. | 
 | 52 |     if data[0].endswith(',') or data[0].lower() in _daynames: | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 53 |         # There's a dayname here. Skip it | 
 | 54 |         del data[0] | 
| Barry Warsaw | ba97659 | 2002-12-30 17:21:36 +0000 | [diff] [blame] | 55 |     else: | 
 | 56 |         i = data[0].rfind(',') | 
 | 57 |         if i < 0: | 
 | 58 |             return None | 
 | 59 |         data[0] = data[0][i+1:] | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 60 |     if len(data) == 3: # RFC 850 date, deprecated | 
 | 61 |         stuff = data[0].split('-') | 
 | 62 |         if len(stuff) == 3: | 
 | 63 |             data = stuff + data[1:] | 
 | 64 |     if len(data) == 4: | 
 | 65 |         s = data[3] | 
 | 66 |         i = s.find('+') | 
 | 67 |         if i > 0: | 
 | 68 |             data[3:] = [s[:i], s[i+1:]] | 
 | 69 |         else: | 
 | 70 |             data.append('') # Dummy tz | 
 | 71 |     if len(data) < 5: | 
 | 72 |         return None | 
 | 73 |     data = data[:5] | 
 | 74 |     [dd, mm, yy, tm, tz] = data | 
 | 75 |     mm = mm.lower() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 76 |     if mm not in _monthnames: | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 77 |         dd, mm = mm, dd.lower() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 78 |         if mm not in _monthnames: | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 79 |             return None | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 80 |     mm = _monthnames.index(mm) + 1 | 
 | 81 |     if mm > 12: | 
 | 82 |         mm -= 12 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 83 |     if dd[-1] == ',': | 
 | 84 |         dd = dd[:-1] | 
 | 85 |     i = yy.find(':') | 
 | 86 |     if i > 0: | 
 | 87 |         yy, tm = tm, yy | 
 | 88 |     if yy[-1] == ',': | 
 | 89 |         yy = yy[:-1] | 
 | 90 |     if not yy[0].isdigit(): | 
 | 91 |         yy, tz = tz, yy | 
 | 92 |     if tm[-1] == ',': | 
 | 93 |         tm = tm[:-1] | 
 | 94 |     tm = tm.split(':') | 
 | 95 |     if len(tm) == 2: | 
 | 96 |         [thh, tmm] = tm | 
 | 97 |         tss = '0' | 
 | 98 |     elif len(tm) == 3: | 
 | 99 |         [thh, tmm, tss] = tm | 
 | 100 |     else: | 
 | 101 |         return None | 
 | 102 |     try: | 
 | 103 |         yy = int(yy) | 
 | 104 |         dd = int(dd) | 
 | 105 |         thh = int(thh) | 
 | 106 |         tmm = int(tmm) | 
 | 107 |         tss = int(tss) | 
 | 108 |     except ValueError: | 
 | 109 |         return None | 
 | 110 |     tzoffset = None | 
 | 111 |     tz = tz.upper() | 
 | 112 |     if _timezones.has_key(tz): | 
 | 113 |         tzoffset = _timezones[tz] | 
 | 114 |     else: | 
 | 115 |         try: | 
 | 116 |             tzoffset = int(tz) | 
 | 117 |         except ValueError: | 
 | 118 |             pass | 
 | 119 |     # Convert a timezone offset into seconds ; -0500 -> -18000 | 
 | 120 |     if tzoffset: | 
 | 121 |         if tzoffset < 0: | 
 | 122 |             tzsign = -1 | 
 | 123 |             tzoffset = -tzoffset | 
 | 124 |         else: | 
 | 125 |             tzsign = 1 | 
 | 126 |         tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) | 
 | 127 |     tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) | 
 | 128 |     return tuple | 
 | 129 |  | 
 | 130 |  | 
 | 131 | def parsedate(data): | 
 | 132 |     """Convert a time string to a time tuple.""" | 
 | 133 |     t = parsedate_tz(data) | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 134 |     if isinstance(t, TupleType): | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 135 |         return t[:9] | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 136 |     else: | 
 | 137 |         return t | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 138 |  | 
 | 139 |  | 
 | 140 | def mktime_tz(data): | 
 | 141 |     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" | 
 | 142 |     if data[9] is None: | 
 | 143 |         # No zone info, so localtime is better assumption than GMT | 
 | 144 |         return time.mktime(data[:8] + (-1,)) | 
 | 145 |     else: | 
 | 146 |         t = time.mktime(data[:8] + (0,)) | 
 | 147 |         return t - data[9] - time.timezone | 
 | 148 |  | 
 | 149 |  | 
 | 150 | def quote(str): | 
 | 151 |     """Add quotes around a string.""" | 
 | 152 |     return str.replace('\\', '\\\\').replace('"', '\\"') | 
 | 153 |  | 
 | 154 |  | 
 | 155 | class AddrlistClass: | 
 | 156 |     """Address parser class by Ben Escoto. | 
 | 157 |  | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 158 |     To understand what this class does, it helps to have a copy of RFC 2822 in | 
 | 159 |     front of you. | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 160 |  | 
 | 161 |     Note: this class interface is deprecated and may be removed in the future. | 
 | 162 |     Use rfc822.AddressList instead. | 
 | 163 |     """ | 
 | 164 |  | 
 | 165 |     def __init__(self, field): | 
 | 166 |         """Initialize a new instance. | 
 | 167 |  | 
 | 168 |         `field' is an unparsed address header field, containing | 
 | 169 |         one or more addresses. | 
 | 170 |         """ | 
 | 171 |         self.specials = '()<>@,:;.\"[]' | 
 | 172 |         self.pos = 0 | 
 | 173 |         self.LWS = ' \t' | 
 | 174 |         self.CR = '\r\n' | 
 | 175 |         self.atomends = self.specials + self.LWS + self.CR | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 176 |         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it | 
 | 177 |         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete | 
 | 178 |         # syntax, so allow dots in phrases. | 
 | 179 |         self.phraseends = self.atomends.replace('.', '') | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 180 |         self.field = field | 
 | 181 |         self.commentlist = [] | 
 | 182 |  | 
 | 183 |     def gotonext(self): | 
 | 184 |         """Parse up to the start of the next address.""" | 
 | 185 |         while self.pos < len(self.field): | 
 | 186 |             if self.field[self.pos] in self.LWS + '\n\r': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 187 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 188 |             elif self.field[self.pos] == '(': | 
 | 189 |                 self.commentlist.append(self.getcomment()) | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 190 |             else: | 
 | 191 |                 break | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 192 |  | 
 | 193 |     def getaddrlist(self): | 
 | 194 |         """Parse all addresses. | 
 | 195 |  | 
 | 196 |         Returns a list containing all of the addresses. | 
 | 197 |         """ | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 198 |         result = [] | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 199 |         while True: | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 200 |             ad = self.getaddress() | 
 | 201 |             if ad: | 
 | 202 |                 result += ad | 
 | 203 |             else: | 
 | 204 |                 break | 
 | 205 |         return result | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 206 |  | 
 | 207 |     def getaddress(self): | 
 | 208 |         """Parse the next address.""" | 
 | 209 |         self.commentlist = [] | 
 | 210 |         self.gotonext() | 
 | 211 |  | 
 | 212 |         oldpos = self.pos | 
 | 213 |         oldcl = self.commentlist | 
 | 214 |         plist = self.getphraselist() | 
 | 215 |  | 
 | 216 |         self.gotonext() | 
 | 217 |         returnlist = [] | 
 | 218 |  | 
 | 219 |         if self.pos >= len(self.field): | 
 | 220 |             # Bad email address technically, no domain. | 
 | 221 |             if plist: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 222 |                 returnlist = [(SPACE.join(self.commentlist), plist[0])] | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 223 |  | 
 | 224 |         elif self.field[self.pos] in '.@': | 
 | 225 |             # email address is just an addrspec | 
 | 226 |             # this isn't very efficient since we start over | 
 | 227 |             self.pos = oldpos | 
 | 228 |             self.commentlist = oldcl | 
 | 229 |             addrspec = self.getaddrspec() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 230 |             returnlist = [(SPACE.join(self.commentlist), addrspec)] | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 231 |  | 
 | 232 |         elif self.field[self.pos] == ':': | 
 | 233 |             # address is a group | 
 | 234 |             returnlist = [] | 
 | 235 |  | 
 | 236 |             fieldlen = len(self.field) | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 237 |             self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 238 |             while self.pos < len(self.field): | 
 | 239 |                 self.gotonext() | 
 | 240 |                 if self.pos < fieldlen and self.field[self.pos] == ';': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 241 |                     self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 242 |                     break | 
 | 243 |                 returnlist = returnlist + self.getaddress() | 
 | 244 |  | 
 | 245 |         elif self.field[self.pos] == '<': | 
 | 246 |             # Address is a phrase then a route addr | 
 | 247 |             routeaddr = self.getrouteaddr() | 
 | 248 |  | 
 | 249 |             if self.commentlist: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 250 |                 returnlist = [(SPACE.join(plist) + ' (' + | 
 | 251 |                                ' '.join(self.commentlist) + ')', routeaddr)] | 
 | 252 |             else: | 
 | 253 |                 returnlist = [(SPACE.join(plist), routeaddr)] | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 254 |  | 
 | 255 |         else: | 
 | 256 |             if plist: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 257 |                 returnlist = [(SPACE.join(self.commentlist), plist[0])] | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 258 |             elif self.field[self.pos] in self.specials: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 259 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 260 |  | 
 | 261 |         self.gotonext() | 
 | 262 |         if self.pos < len(self.field) and self.field[self.pos] == ',': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 263 |             self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 264 |         return returnlist | 
 | 265 |  | 
 | 266 |     def getrouteaddr(self): | 
 | 267 |         """Parse a route address (Return-path value). | 
 | 268 |  | 
 | 269 |         This method just skips all the route stuff and returns the addrspec. | 
 | 270 |         """ | 
 | 271 |         if self.field[self.pos] != '<': | 
 | 272 |             return | 
 | 273 |  | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 274 |         expectroute = False | 
 | 275 |         self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 276 |         self.gotonext() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 277 |         adlist = '' | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 278 |         while self.pos < len(self.field): | 
 | 279 |             if expectroute: | 
 | 280 |                 self.getdomain() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 281 |                 expectroute = False | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 282 |             elif self.field[self.pos] == '>': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 283 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 284 |                 break | 
 | 285 |             elif self.field[self.pos] == '@': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 286 |                 self.pos += 1 | 
 | 287 |                 expectroute = True | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 288 |             elif self.field[self.pos] == ':': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 289 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 290 |             else: | 
 | 291 |                 adlist = self.getaddrspec() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 292 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 293 |                 break | 
 | 294 |             self.gotonext() | 
 | 295 |  | 
 | 296 |         return adlist | 
 | 297 |  | 
 | 298 |     def getaddrspec(self): | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 299 |         """Parse an RFC 2822 addr-spec.""" | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 300 |         aslist = [] | 
 | 301 |  | 
 | 302 |         self.gotonext() | 
 | 303 |         while self.pos < len(self.field): | 
 | 304 |             if self.field[self.pos] == '.': | 
 | 305 |                 aslist.append('.') | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 306 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 307 |             elif self.field[self.pos] == '"': | 
 | 308 |                 aslist.append('"%s"' % self.getquote()) | 
 | 309 |             elif self.field[self.pos] in self.atomends: | 
 | 310 |                 break | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 311 |             else: | 
 | 312 |                 aslist.append(self.getatom()) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 313 |             self.gotonext() | 
 | 314 |  | 
 | 315 |         if self.pos >= len(self.field) or self.field[self.pos] != '@': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 316 |             return EMPTYSTRING.join(aslist) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 317 |  | 
 | 318 |         aslist.append('@') | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 319 |         self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 320 |         self.gotonext() | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 321 |         return EMPTYSTRING.join(aslist) + self.getdomain() | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 322 |  | 
 | 323 |     def getdomain(self): | 
 | 324 |         """Get the complete domain name from an address.""" | 
 | 325 |         sdlist = [] | 
 | 326 |         while self.pos < len(self.field): | 
 | 327 |             if self.field[self.pos] in self.LWS: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 328 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 329 |             elif self.field[self.pos] == '(': | 
 | 330 |                 self.commentlist.append(self.getcomment()) | 
 | 331 |             elif self.field[self.pos] == '[': | 
 | 332 |                 sdlist.append(self.getdomainliteral()) | 
 | 333 |             elif self.field[self.pos] == '.': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 334 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 335 |                 sdlist.append('.') | 
 | 336 |             elif self.field[self.pos] in self.atomends: | 
 | 337 |                 break | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 338 |             else: | 
 | 339 |                 sdlist.append(self.getatom()) | 
 | 340 |         return EMPTYSTRING.join(sdlist) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 341 |  | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 342 |     def getdelimited(self, beginchar, endchars, allowcomments=True): | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 343 |         """Parse a header fragment delimited by special characters. | 
 | 344 |  | 
 | 345 |         `beginchar' is the start character for the fragment. | 
 | 346 |         If self is not looking at an instance of `beginchar' then | 
 | 347 |         getdelimited returns the empty string. | 
 | 348 |  | 
 | 349 |         `endchars' is a sequence of allowable end-delimiting characters. | 
 | 350 |         Parsing stops when one of these is encountered. | 
 | 351 |  | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 352 |         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed | 
 | 353 |         within the parsed fragment. | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 354 |         """ | 
 | 355 |         if self.field[self.pos] != beginchar: | 
 | 356 |             return '' | 
 | 357 |  | 
 | 358 |         slist = [''] | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 359 |         quote = False | 
 | 360 |         self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 361 |         while self.pos < len(self.field): | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 362 |             if quote: | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 363 |                 slist.append(self.field[self.pos]) | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 364 |                 quote = False | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 365 |             elif self.field[self.pos] in endchars: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 366 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 367 |                 break | 
 | 368 |             elif allowcomments and self.field[self.pos] == '(': | 
 | 369 |                 slist.append(self.getcomment()) | 
 | 370 |             elif self.field[self.pos] == '\\': | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 371 |                 quote = True | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 372 |             else: | 
 | 373 |                 slist.append(self.field[self.pos]) | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 374 |             self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 375 |  | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 376 |         return EMPTYSTRING.join(slist) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 377 |  | 
 | 378 |     def getquote(self): | 
 | 379 |         """Get a quote-delimited fragment from self's field.""" | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 380 |         return self.getdelimited('"', '"\r', False) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 381 |  | 
 | 382 |     def getcomment(self): | 
 | 383 |         """Get a parenthesis-delimited fragment from self's field.""" | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 384 |         return self.getdelimited('(', ')\r', True) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 385 |  | 
 | 386 |     def getdomainliteral(self): | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 387 |         """Parse an RFC 2822 domain-literal.""" | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 388 |         return '[%s]' % self.getdelimited('[', ']\r', False) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 389 |  | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 390 |     def getatom(self, atomends=None): | 
 | 391 |         """Parse an RFC 2822 atom. | 
 | 392 |  | 
 | 393 |         Optional atomends specifies a different set of end token delimiters | 
 | 394 |         (the default is to use self.atomends).  This is used e.g. in | 
 | 395 |         getphraselist() since phrase endings must not include the `.' (which | 
 | 396 |         is legal in phrases).""" | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 397 |         atomlist = [''] | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 398 |         if atomends is None: | 
 | 399 |             atomends = self.atomends | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 400 |  | 
 | 401 |         while self.pos < len(self.field): | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 402 |             if self.field[self.pos] in atomends: | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 403 |                 break | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 404 |             else: | 
 | 405 |                 atomlist.append(self.field[self.pos]) | 
 | 406 |             self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 407 |  | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 408 |         return EMPTYSTRING.join(atomlist) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 409 |  | 
 | 410 |     def getphraselist(self): | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 411 |         """Parse a sequence of RFC 2822 phrases. | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 412 |  | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 413 |         A phrase is a sequence of words, which are in turn either RFC 2822 | 
 | 414 |         atoms or quoted-strings.  Phrases are canonicalized by squeezing all | 
 | 415 |         runs of continuous whitespace into one space. | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 416 |         """ | 
 | 417 |         plist = [] | 
 | 418 |  | 
 | 419 |         while self.pos < len(self.field): | 
 | 420 |             if self.field[self.pos] in self.LWS: | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 421 |                 self.pos += 1 | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 422 |             elif self.field[self.pos] == '"': | 
 | 423 |                 plist.append(self.getquote()) | 
 | 424 |             elif self.field[self.pos] == '(': | 
 | 425 |                 self.commentlist.append(self.getcomment()) | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 426 |             elif self.field[self.pos] in self.phraseends: | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 427 |                 break | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 428 |             else: | 
 | 429 |                 plist.append(self.getatom(self.phraseends)) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 430 |  | 
 | 431 |         return plist | 
 | 432 |  | 
 | 433 | class AddressList(AddrlistClass): | 
| Barry Warsaw | 1fb22bb | 2002-12-30 16:21:07 +0000 | [diff] [blame] | 434 |     """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 435 |     def __init__(self, field): | 
 | 436 |         AddrlistClass.__init__(self, field) | 
 | 437 |         if field: | 
 | 438 |             self.addresslist = self.getaddrlist() | 
 | 439 |         else: | 
 | 440 |             self.addresslist = [] | 
 | 441 |  | 
 | 442 |     def __len__(self): | 
 | 443 |         return len(self.addresslist) | 
 | 444 |  | 
 | 445 |     def __str__(self): | 
| Barry Warsaw | 5c8fef9 | 2002-12-30 16:43:42 +0000 | [diff] [blame] | 446 |         return COMMASPACE.join(map(dump_address_pair, self.addresslist)) | 
| Barry Warsaw | 030ddf7 | 2002-11-05 19:54:52 +0000 | [diff] [blame] | 447 |  | 
 | 448 |     def __add__(self, other): | 
 | 449 |         # Set union | 
 | 450 |         newaddr = AddressList(None) | 
 | 451 |         newaddr.addresslist = self.addresslist[:] | 
 | 452 |         for x in other.addresslist: | 
 | 453 |             if not x in self.addresslist: | 
 | 454 |                 newaddr.addresslist.append(x) | 
 | 455 |         return newaddr | 
 | 456 |  | 
 | 457 |     def __iadd__(self, other): | 
 | 458 |         # Set union, in-place | 
 | 459 |         for x in other.addresslist: | 
 | 460 |             if not x in self.addresslist: | 
 | 461 |                 self.addresslist.append(x) | 
 | 462 |         return self | 
 | 463 |  | 
 | 464 |     def __sub__(self, other): | 
 | 465 |         # Set difference | 
 | 466 |         newaddr = AddressList(None) | 
 | 467 |         for x in self.addresslist: | 
 | 468 |             if not x in other.addresslist: | 
 | 469 |                 newaddr.addresslist.append(x) | 
 | 470 |         return newaddr | 
 | 471 |  | 
 | 472 |     def __isub__(self, other): | 
 | 473 |         # Set difference, in-place | 
 | 474 |         for x in other.addresslist: | 
 | 475 |             if x in self.addresslist: | 
 | 476 |                 self.addresslist.remove(x) | 
 | 477 |         return self | 
 | 478 |  | 
 | 479 |     def __getitem__(self, index): | 
 | 480 |         # Make indexing, slices, and 'in' work | 
 | 481 |         return self.addresslist[index] |