| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1 | """HTTP cookie handling for web clients. | 
|  | 2 |  | 
|  | 3 | This module has (now fairly distant) origins in Gisle Aas' Perl module | 
|  | 4 | HTTP::Cookies, from the libwww-perl library. | 
|  | 5 |  | 
|  | 6 | Docstrings, comments and debug strings in this code refer to the | 
|  | 7 | attributes of the HTTP cookie system as cookie-attributes, to distinguish | 
|  | 8 | them clearly from Python attributes. | 
|  | 9 |  | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 10 | Class diagram (note that BSDDBCookieJar and the MSIE* classes are not | 
|  | 11 | distributed with the Python standard library, but are available from | 
|  | 12 | http://wwwsearch.sf.net/): | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 13 |  | 
|  | 14 | CookieJar____ | 
|  | 15 | /     \      \ | 
|  | 16 | FileCookieJar      \      \ | 
|  | 17 | /    |   \         \      \ | 
|  | 18 | MozillaCookieJar | LWPCookieJar \      \ | 
|  | 19 | |               |      \ | 
|  | 20 | |   ---MSIEBase |       \ | 
|  | 21 | |  /      |     |        \ | 
|  | 22 | | /   MSIEDBCookieJar BSDDBCookieJar | 
|  | 23 | |/ | 
|  | 24 | MSIECookieJar | 
|  | 25 |  | 
|  | 26 | """ | 
|  | 27 |  | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 28 | __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', | 
|  | 29 | 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] | 
|  | 30 |  | 
|  | 31 | import re, urlparse, copy, time, urllib | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 32 | try: | 
|  | 33 | import threading as _threading | 
|  | 34 | except ImportError: | 
|  | 35 | import dummy_threading as _threading | 
|  | 36 | import httplib  # only for the default HTTP port | 
|  | 37 | from calendar import timegm | 
|  | 38 |  | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 39 | debug = False   # set to True to enable debugging via the logging module | 
|  | 40 | logger = None | 
|  | 41 |  | 
|  | 42 | def _debug(*args): | 
|  | 43 | if not debug: | 
|  | 44 | return | 
|  | 45 | global logger | 
|  | 46 | if not logger: | 
|  | 47 | import logging | 
|  | 48 | logger = logging.getLogger("cookielib") | 
|  | 49 | return logger.debug(*args) | 
|  | 50 |  | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 51 |  | 
|  | 52 | DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) | 
|  | 53 | MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " | 
|  | 54 | "instance initialised with one)") | 
|  | 55 |  | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 56 | def _warn_unhandled_exception(): | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 57 | # There are a few catch-all except: statements in this module, for | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 58 | # catching input that's bad in unexpected ways.  Warn if any | 
|  | 59 | # exceptions are caught there. | 
| Andrew M. Kuchling | ae40c2f | 2004-07-10 18:32:12 +0000 | [diff] [blame] | 60 | import warnings, traceback, StringIO | 
|  | 61 | f = StringIO.StringIO() | 
|  | 62 | traceback.print_exc(None, f) | 
|  | 63 | msg = f.getvalue() | 
|  | 64 | warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 65 |  | 
|  | 66 |  | 
|  | 67 | # Date/time conversion | 
|  | 68 | # ----------------------------------------------------------------------------- | 
|  | 69 |  | 
|  | 70 | EPOCH_YEAR = 1970 | 
|  | 71 | def _timegm(tt): | 
|  | 72 | year, month, mday, hour, min, sec = tt[:6] | 
|  | 73 | if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and | 
|  | 74 | (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): | 
|  | 75 | return timegm(tt) | 
|  | 76 | else: | 
|  | 77 | return None | 
|  | 78 |  | 
|  | 79 | DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] | 
|  | 80 | MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", | 
|  | 81 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] | 
|  | 82 | MONTHS_LOWER = [] | 
|  | 83 | for month in MONTHS: MONTHS_LOWER.append(month.lower()) | 
|  | 84 |  | 
|  | 85 | def time2isoz(t=None): | 
|  | 86 | """Return a string representing time in seconds since epoch, t. | 
|  | 87 |  | 
|  | 88 | If the function is called without an argument, it will use the current | 
|  | 89 | time. | 
|  | 90 |  | 
|  | 91 | The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", | 
|  | 92 | representing Universal Time (UTC, aka GMT).  An example of this format is: | 
|  | 93 |  | 
|  | 94 | 1994-11-24 08:49:37Z | 
|  | 95 |  | 
|  | 96 | """ | 
|  | 97 | if t is None: t = time.time() | 
|  | 98 | year, mon, mday, hour, min, sec = time.gmtime(t)[:6] | 
|  | 99 | return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( | 
|  | 100 | year, mon, mday, hour, min, sec) | 
|  | 101 |  | 
|  | 102 | def time2netscape(t=None): | 
|  | 103 | """Return a string representing time in seconds since epoch, t. | 
|  | 104 |  | 
|  | 105 | If the function is called without an argument, it will use the current | 
|  | 106 | time. | 
|  | 107 |  | 
|  | 108 | The format of the returned string is like this: | 
|  | 109 |  | 
|  | 110 | Wed, DD-Mon-YYYY HH:MM:SS GMT | 
|  | 111 |  | 
|  | 112 | """ | 
|  | 113 | if t is None: t = time.time() | 
|  | 114 | year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] | 
|  | 115 | return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( | 
|  | 116 | DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) | 
|  | 117 |  | 
|  | 118 |  | 
|  | 119 | UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} | 
|  | 120 |  | 
|  | 121 | TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") | 
|  | 122 | def offset_from_tz_string(tz): | 
|  | 123 | offset = None | 
|  | 124 | if tz in UTC_ZONES: | 
|  | 125 | offset = 0 | 
|  | 126 | else: | 
|  | 127 | m = TIMEZONE_RE.search(tz) | 
|  | 128 | if m: | 
|  | 129 | offset = 3600 * int(m.group(2)) | 
|  | 130 | if m.group(3): | 
|  | 131 | offset = offset + 60 * int(m.group(3)) | 
|  | 132 | if m.group(1) == '-': | 
|  | 133 | offset = -offset | 
|  | 134 | return offset | 
|  | 135 |  | 
|  | 136 | def _str2time(day, mon, yr, hr, min, sec, tz): | 
|  | 137 | # translate month name to number | 
|  | 138 | # month numbers start with 1 (January) | 
|  | 139 | try: | 
|  | 140 | mon = MONTHS_LOWER.index(mon.lower())+1 | 
|  | 141 | except ValueError: | 
|  | 142 | # maybe it's already a number | 
|  | 143 | try: | 
|  | 144 | imon = int(mon) | 
|  | 145 | except ValueError: | 
|  | 146 | return None | 
|  | 147 | if 1 <= imon <= 12: | 
|  | 148 | mon = imon | 
|  | 149 | else: | 
|  | 150 | return None | 
|  | 151 |  | 
|  | 152 | # make sure clock elements are defined | 
|  | 153 | if hr is None: hr = 0 | 
|  | 154 | if min is None: min = 0 | 
|  | 155 | if sec is None: sec = 0 | 
|  | 156 |  | 
|  | 157 | yr = int(yr) | 
|  | 158 | day = int(day) | 
|  | 159 | hr = int(hr) | 
|  | 160 | min = int(min) | 
|  | 161 | sec = int(sec) | 
|  | 162 |  | 
|  | 163 | if yr < 1000: | 
|  | 164 | # find "obvious" year | 
|  | 165 | cur_yr = time.localtime(time.time())[0] | 
|  | 166 | m = cur_yr % 100 | 
|  | 167 | tmp = yr | 
|  | 168 | yr = yr + cur_yr - m | 
|  | 169 | m = m - tmp | 
|  | 170 | if abs(m) > 50: | 
|  | 171 | if m > 0: yr = yr + 100 | 
|  | 172 | else: yr = yr - 100 | 
|  | 173 |  | 
|  | 174 | # convert UTC time tuple to seconds since epoch (not timezone-adjusted) | 
|  | 175 | t = _timegm((yr, mon, day, hr, min, sec, tz)) | 
|  | 176 |  | 
|  | 177 | if t is not None: | 
|  | 178 | # adjust time using timezone string, to get absolute time since epoch | 
|  | 179 | if tz is None: | 
|  | 180 | tz = "UTC" | 
|  | 181 | tz = tz.upper() | 
|  | 182 | offset = offset_from_tz_string(tz) | 
|  | 183 | if offset is None: | 
|  | 184 | return None | 
|  | 185 | t = t - offset | 
|  | 186 |  | 
|  | 187 | return t | 
|  | 188 |  | 
|  | 189 | STRICT_DATE_RE = re.compile( | 
|  | 190 | r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " | 
|  | 191 | "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") | 
|  | 192 | WEEKDAY_RE = re.compile( | 
|  | 193 | r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) | 
|  | 194 | LOOSE_HTTP_DATE_RE = re.compile( | 
|  | 195 | r"""^ | 
|  | 196 | (\d\d?)            # day | 
|  | 197 | (?:\s+|[-\/]) | 
|  | 198 | (\w+)              # month | 
|  | 199 | (?:\s+|[-\/]) | 
|  | 200 | (\d+)              # year | 
|  | 201 | (?: | 
|  | 202 | (?:\s+|:)    # separator before clock | 
|  | 203 | (\d\d?):(\d\d)  # hour:min | 
|  | 204 | (?::(\d\d))?    # optional seconds | 
|  | 205 | )?                 # optional clock | 
|  | 206 | \s* | 
|  | 207 | ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone | 
|  | 208 | \s* | 
|  | 209 | (?:\(\w+\))?       # ASCII representation of timezone in parens. | 
|  | 210 | \s*$""", re.X) | 
|  | 211 | def http2time(text): | 
|  | 212 | """Returns time in seconds since epoch of time represented by a string. | 
|  | 213 |  | 
|  | 214 | Return value is an integer. | 
|  | 215 |  | 
|  | 216 | None is returned if the format of str is unrecognized, the time is outside | 
|  | 217 | the representable range, or the timezone string is not recognized.  If the | 
|  | 218 | string contains no timezone, UTC is assumed. | 
|  | 219 |  | 
|  | 220 | The timezone in the string may be numerical (like "-0800" or "+0100") or a | 
|  | 221 | string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the | 
|  | 222 | timezone strings equivalent to UTC (zero offset) are known to the function. | 
|  | 223 |  | 
|  | 224 | The function loosely parses the following formats: | 
|  | 225 |  | 
|  | 226 | Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format | 
|  | 227 | Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format | 
|  | 228 | Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format | 
|  | 229 | 09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday) | 
|  | 230 | 08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday) | 
|  | 231 | 08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday) | 
|  | 232 |  | 
|  | 233 | The parser ignores leading and trailing whitespace.  The time may be | 
|  | 234 | absent. | 
|  | 235 |  | 
|  | 236 | If the year is given with only 2 digits, the function will select the | 
|  | 237 | century that makes the year closest to the current date. | 
|  | 238 |  | 
|  | 239 | """ | 
|  | 240 | # fast exit for strictly conforming string | 
|  | 241 | m = STRICT_DATE_RE.search(text) | 
|  | 242 | if m: | 
|  | 243 | g = m.groups() | 
|  | 244 | mon = MONTHS_LOWER.index(g[1].lower()) + 1 | 
|  | 245 | tt = (int(g[2]), mon, int(g[0]), | 
|  | 246 | int(g[3]), int(g[4]), float(g[5])) | 
|  | 247 | return _timegm(tt) | 
|  | 248 |  | 
|  | 249 | # No, we need some messy parsing... | 
|  | 250 |  | 
|  | 251 | # clean up | 
|  | 252 | text = text.lstrip() | 
|  | 253 | text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday | 
|  | 254 |  | 
|  | 255 | # tz is time zone specifier string | 
|  | 256 | day, mon, yr, hr, min, sec, tz = [None]*7 | 
|  | 257 |  | 
|  | 258 | # loose regexp parse | 
|  | 259 | m = LOOSE_HTTP_DATE_RE.search(text) | 
|  | 260 | if m is not None: | 
|  | 261 | day, mon, yr, hr, min, sec, tz = m.groups() | 
|  | 262 | else: | 
|  | 263 | return None  # bad format | 
|  | 264 |  | 
|  | 265 | return _str2time(day, mon, yr, hr, min, sec, tz) | 
|  | 266 |  | 
|  | 267 | ISO_DATE_RE = re.compile( | 
|  | 268 | """^ | 
|  | 269 | (\d{4})              # year | 
|  | 270 | [-\/]? | 
|  | 271 | (\d\d?)              # numerical month | 
|  | 272 | [-\/]? | 
|  | 273 | (\d\d?)              # day | 
|  | 274 | (?: | 
|  | 275 | (?:\s+|[-:Tt])  # separator before clock | 
|  | 276 | (\d\d?):?(\d\d)    # hour:min | 
|  | 277 | (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional) | 
|  | 278 | )?                    # optional clock | 
|  | 279 | \s* | 
|  | 280 | ([-+]?\d\d?:?(:?\d\d)? | 
|  | 281 | |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT) | 
|  | 282 | \s*$""", re.X) | 
|  | 283 | def iso2time(text): | 
|  | 284 | """ | 
|  | 285 | As for http2time, but parses the ISO 8601 formats: | 
|  | 286 |  | 
|  | 287 | 1994-02-03 14:15:29 -0100    -- ISO 8601 format | 
|  | 288 | 1994-02-03 14:15:29          -- zone is optional | 
|  | 289 | 1994-02-03                   -- only date | 
|  | 290 | 1994-02-03T14:15:29          -- Use T as separator | 
|  | 291 | 19940203T141529Z             -- ISO 8601 compact format | 
|  | 292 | 19940203                     -- only date | 
|  | 293 |  | 
|  | 294 | """ | 
|  | 295 | # clean up | 
|  | 296 | text = text.lstrip() | 
|  | 297 |  | 
|  | 298 | # tz is time zone specifier string | 
|  | 299 | day, mon, yr, hr, min, sec, tz = [None]*7 | 
|  | 300 |  | 
|  | 301 | # loose regexp parse | 
|  | 302 | m = ISO_DATE_RE.search(text) | 
|  | 303 | if m is not None: | 
|  | 304 | # XXX there's an extra bit of the timezone I'm ignoring here: is | 
|  | 305 | #   this the right thing to do? | 
|  | 306 | yr, mon, day, hr, min, sec, tz, _ = m.groups() | 
|  | 307 | else: | 
|  | 308 | return None  # bad format | 
|  | 309 |  | 
|  | 310 | return _str2time(day, mon, yr, hr, min, sec, tz) | 
|  | 311 |  | 
|  | 312 |  | 
|  | 313 | # Header parsing | 
|  | 314 | # ----------------------------------------------------------------------------- | 
|  | 315 |  | 
|  | 316 | def unmatched(match): | 
|  | 317 | """Return unmatched part of re.Match object.""" | 
|  | 318 | start, end = match.span(0) | 
|  | 319 | return match.string[:start]+match.string[end:] | 
|  | 320 |  | 
|  | 321 | HEADER_TOKEN_RE =        re.compile(r"^\s*([^=\s;,]+)") | 
|  | 322 | HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") | 
|  | 323 | HEADER_VALUE_RE =        re.compile(r"^\s*=\s*([^\s;,]*)") | 
|  | 324 | HEADER_ESCAPE_RE = re.compile(r"\\(.)") | 
|  | 325 | def split_header_words(header_values): | 
|  | 326 | r"""Parse header values into a list of lists containing key,value pairs. | 
|  | 327 |  | 
|  | 328 | The function knows how to deal with ",", ";" and "=" as well as quoted | 
|  | 329 | values after "=".  A list of space separated tokens are parsed as if they | 
|  | 330 | were separated by ";". | 
|  | 331 |  | 
|  | 332 | If the header_values passed as argument contains multiple values, then they | 
|  | 333 | are treated as if they were a single value separated by comma ",". | 
|  | 334 |  | 
|  | 335 | This means that this function is useful for parsing header fields that | 
|  | 336 | follow this syntax (BNF as from the HTTP/1.1 specification, but we relax | 
|  | 337 | the requirement for tokens). | 
|  | 338 |  | 
|  | 339 | headers           = #header | 
|  | 340 | header            = (token | parameter) *( [";"] (token | parameter)) | 
|  | 341 |  | 
|  | 342 | token             = 1*<any CHAR except CTLs or separators> | 
|  | 343 | separators        = "(" | ")" | "<" | ">" | "@" | 
|  | 344 | | "," | ";" | ":" | "\" | <"> | 
|  | 345 | | "/" | "[" | "]" | "?" | "=" | 
|  | 346 | | "{" | "}" | SP | HT | 
|  | 347 |  | 
|  | 348 | quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> ) | 
|  | 349 | qdtext            = <any TEXT except <">> | 
|  | 350 | quoted-pair       = "\" CHAR | 
|  | 351 |  | 
|  | 352 | parameter         = attribute "=" value | 
|  | 353 | attribute         = token | 
|  | 354 | value             = token | quoted-string | 
|  | 355 |  | 
|  | 356 | Each header is represented by a list of key/value pairs.  The value for a | 
|  | 357 | simple token (not part of a parameter) is None.  Syntactically incorrect | 
|  | 358 | headers will not necessarily be parsed as you would want. | 
|  | 359 |  | 
|  | 360 | This is easier to describe with some examples: | 
|  | 361 |  | 
|  | 362 | >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) | 
|  | 363 | [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] | 
|  | 364 | >>> split_header_words(['text/html; charset="iso-8859-1"']) | 
|  | 365 | [[('text/html', None), ('charset', 'iso-8859-1')]] | 
|  | 366 | >>> split_header_words([r'Basic realm="\"foo\bar\""']) | 
|  | 367 | [[('Basic', None), ('realm', '"foobar"')]] | 
|  | 368 |  | 
|  | 369 | """ | 
| Raymond Hettinger | f715366 | 2005-02-07 14:16:21 +0000 | [diff] [blame] | 370 | assert not isinstance(header_values, basestring) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 371 | result = [] | 
|  | 372 | for text in header_values: | 
|  | 373 | orig_text = text | 
|  | 374 | pairs = [] | 
|  | 375 | while text: | 
|  | 376 | m = HEADER_TOKEN_RE.search(text) | 
|  | 377 | if m: | 
|  | 378 | text = unmatched(m) | 
|  | 379 | name = m.group(1) | 
|  | 380 | m = HEADER_QUOTED_VALUE_RE.search(text) | 
|  | 381 | if m:  # quoted value | 
|  | 382 | text = unmatched(m) | 
|  | 383 | value = m.group(1) | 
|  | 384 | value = HEADER_ESCAPE_RE.sub(r"\1", value) | 
|  | 385 | else: | 
|  | 386 | m = HEADER_VALUE_RE.search(text) | 
|  | 387 | if m:  # unquoted value | 
|  | 388 | text = unmatched(m) | 
|  | 389 | value = m.group(1) | 
|  | 390 | value = value.rstrip() | 
|  | 391 | else: | 
|  | 392 | # no value, a lone token | 
|  | 393 | value = None | 
|  | 394 | pairs.append((name, value)) | 
|  | 395 | elif text.lstrip().startswith(","): | 
|  | 396 | # concatenated headers, as per RFC 2616 section 4.2 | 
|  | 397 | text = text.lstrip()[1:] | 
|  | 398 | if pairs: result.append(pairs) | 
|  | 399 | pairs = [] | 
|  | 400 | else: | 
|  | 401 | # skip junk | 
|  | 402 | non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) | 
|  | 403 | assert nr_junk_chars > 0, ( | 
|  | 404 | "split_header_words bug: '%s', '%s', %s" % | 
|  | 405 | (orig_text, text, pairs)) | 
|  | 406 | text = non_junk | 
|  | 407 | if pairs: result.append(pairs) | 
|  | 408 | return result | 
|  | 409 |  | 
|  | 410 | HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") | 
|  | 411 | def join_header_words(lists): | 
|  | 412 | """Do the inverse (almost) of the conversion done by split_header_words. | 
|  | 413 |  | 
|  | 414 | Takes a list of lists of (key, value) pairs and produces a single header | 
|  | 415 | value.  Attribute values are quoted if needed. | 
|  | 416 |  | 
|  | 417 | >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) | 
|  | 418 | 'text/plain; charset="iso-8859/1"' | 
|  | 419 | >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) | 
|  | 420 | 'text/plain, charset="iso-8859/1"' | 
|  | 421 |  | 
|  | 422 | """ | 
|  | 423 | headers = [] | 
|  | 424 | for pairs in lists: | 
|  | 425 | attr = [] | 
|  | 426 | for k, v in pairs: | 
|  | 427 | if v is not None: | 
|  | 428 | if not re.search(r"^\w+$", v): | 
|  | 429 | v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \ | 
|  | 430 | v = '"%s"' % v | 
|  | 431 | k = "%s=%s" % (k, v) | 
|  | 432 | attr.append(k) | 
|  | 433 | if attr: headers.append("; ".join(attr)) | 
|  | 434 | return ", ".join(headers) | 
|  | 435 |  | 
|  | 436 | def parse_ns_headers(ns_headers): | 
|  | 437 | """Ad-hoc parser for Netscape protocol cookie-attributes. | 
|  | 438 |  | 
|  | 439 | The old Netscape cookie format for Set-Cookie can for instance contain | 
|  | 440 | an unquoted "," in the expires field, so we have to use this ad-hoc | 
|  | 441 | parser instead of split_header_words. | 
|  | 442 |  | 
|  | 443 | XXX This may not make the best possible effort to parse all the crap | 
|  | 444 | that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient | 
|  | 445 | parser is probably better, so could do worse than following that if | 
|  | 446 | this ever gives any trouble. | 
|  | 447 |  | 
|  | 448 | Currently, this is also used for parsing RFC 2109 cookies. | 
|  | 449 |  | 
|  | 450 | """ | 
|  | 451 | known_attrs = ("expires", "domain", "path", "secure", | 
|  | 452 | # RFC 2109 attrs (may turn up in Netscape cookies, too) | 
|  | 453 | "port", "max-age") | 
|  | 454 |  | 
|  | 455 | result = [] | 
|  | 456 | for ns_header in ns_headers: | 
|  | 457 | pairs = [] | 
|  | 458 | version_set = False | 
| Martin v. Löwis | 4ea3ead | 2005-03-03 10:48:12 +0000 | [diff] [blame] | 459 | for ii, param in enumerate(re.split(r";\s*", ns_header)): | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 460 | param = param.rstrip() | 
|  | 461 | if param == "": continue | 
|  | 462 | if "=" not in param: | 
| Martin v. Löwis | c5574e8 | 2005-03-03 10:57:37 +0000 | [diff] [blame] | 463 | k, v = param, None | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 464 | else: | 
|  | 465 | k, v = re.split(r"\s*=\s*", param, 1) | 
|  | 466 | k = k.lstrip() | 
| Martin v. Löwis | 4ea3ead | 2005-03-03 10:48:12 +0000 | [diff] [blame] | 467 | if ii != 0: | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 468 | lc = k.lower() | 
|  | 469 | if lc in known_attrs: | 
|  | 470 | k = lc | 
|  | 471 | if k == "version": | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 472 | # This is an RFC 2109 cookie. | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 473 | version_set = True | 
|  | 474 | if k == "expires": | 
|  | 475 | # convert expires date to seconds since epoch | 
|  | 476 | if v.startswith('"'): v = v[1:] | 
|  | 477 | if v.endswith('"'): v = v[:-1] | 
|  | 478 | v = http2time(v)  # None if invalid | 
|  | 479 | pairs.append((k, v)) | 
|  | 480 |  | 
|  | 481 | if pairs: | 
|  | 482 | if not version_set: | 
|  | 483 | pairs.append(("version", "0")) | 
|  | 484 | result.append(pairs) | 
|  | 485 |  | 
|  | 486 | return result | 
|  | 487 |  | 
|  | 488 |  | 
|  | 489 | IPV4_RE = re.compile(r"\.\d+$") | 
|  | 490 | def is_HDN(text): | 
|  | 491 | """Return True if text is a host domain name.""" | 
|  | 492 | # XXX | 
|  | 493 | # This may well be wrong.  Which RFC is HDN defined in, if any (for | 
|  | 494 | #  the purposes of RFC 2965)? | 
|  | 495 | # For the current implementation, what about IPv6?  Remember to look | 
|  | 496 | #  at other uses of IPV4_RE also, if change this. | 
|  | 497 | if IPV4_RE.search(text): | 
|  | 498 | return False | 
|  | 499 | if text == "": | 
|  | 500 | return False | 
|  | 501 | if text[0] == "." or text[-1] == ".": | 
|  | 502 | return False | 
|  | 503 | return True | 
|  | 504 |  | 
|  | 505 | def domain_match(A, B): | 
|  | 506 | """Return True if domain A domain-matches domain B, according to RFC 2965. | 
|  | 507 |  | 
|  | 508 | A and B may be host domain names or IP addresses. | 
|  | 509 |  | 
|  | 510 | RFC 2965, section 1: | 
|  | 511 |  | 
|  | 512 | Host names can be specified either as an IP address or a HDN string. | 
|  | 513 | Sometimes we compare one host name with another.  (Such comparisons SHALL | 
|  | 514 | be case-insensitive.)  Host A's name domain-matches host B's if | 
|  | 515 |  | 
|  | 516 | *  their host name strings string-compare equal; or | 
|  | 517 |  | 
|  | 518 | * A is a HDN string and has the form NB, where N is a non-empty | 
|  | 519 | name string, B has the form .B', and B' is a HDN string.  (So, | 
|  | 520 | x.y.com domain-matches .Y.com but not Y.com.) | 
|  | 521 |  | 
|  | 522 | Note that domain-match is not a commutative operation: a.b.c.com | 
|  | 523 | domain-matches .c.com, but not the reverse. | 
|  | 524 |  | 
|  | 525 | """ | 
|  | 526 | # Note that, if A or B are IP addresses, the only relevant part of the | 
|  | 527 | # definition of the domain-match algorithm is the direct string-compare. | 
|  | 528 | A = A.lower() | 
|  | 529 | B = B.lower() | 
|  | 530 | if A == B: | 
|  | 531 | return True | 
|  | 532 | if not is_HDN(A): | 
|  | 533 | return False | 
|  | 534 | i = A.rfind(B) | 
|  | 535 | if i == -1 or i == 0: | 
|  | 536 | # A does not have form NB, or N is the empty string | 
|  | 537 | return False | 
|  | 538 | if not B.startswith("."): | 
|  | 539 | return False | 
|  | 540 | if not is_HDN(B[1:]): | 
|  | 541 | return False | 
|  | 542 | return True | 
|  | 543 |  | 
|  | 544 | def liberal_is_HDN(text): | 
|  | 545 | """Return True if text is a sort-of-like a host domain name. | 
|  | 546 |  | 
|  | 547 | For accepting/blocking domains. | 
|  | 548 |  | 
|  | 549 | """ | 
|  | 550 | if IPV4_RE.search(text): | 
|  | 551 | return False | 
|  | 552 | return True | 
|  | 553 |  | 
|  | 554 | def user_domain_match(A, B): | 
|  | 555 | """For blocking/accepting domains. | 
|  | 556 |  | 
|  | 557 | A and B may be host domain names or IP addresses. | 
|  | 558 |  | 
|  | 559 | """ | 
|  | 560 | A = A.lower() | 
|  | 561 | B = B.lower() | 
|  | 562 | if not (liberal_is_HDN(A) and liberal_is_HDN(B)): | 
|  | 563 | if A == B: | 
|  | 564 | # equal IP addresses | 
|  | 565 | return True | 
|  | 566 | return False | 
|  | 567 | initial_dot = B.startswith(".") | 
|  | 568 | if initial_dot and A.endswith(B): | 
|  | 569 | return True | 
|  | 570 | if not initial_dot and A == B: | 
|  | 571 | return True | 
|  | 572 | return False | 
|  | 573 |  | 
|  | 574 | cut_port_re = re.compile(r":\d+$") | 
|  | 575 | def request_host(request): | 
|  | 576 | """Return request-host, as defined by RFC 2965. | 
|  | 577 |  | 
|  | 578 | Variation from RFC: returned value is lowercased, for convenient | 
|  | 579 | comparison. | 
|  | 580 |  | 
|  | 581 | """ | 
|  | 582 | url = request.get_full_url() | 
|  | 583 | host = urlparse.urlparse(url)[1] | 
|  | 584 | if host == "": | 
|  | 585 | host = request.get_header("Host", "") | 
|  | 586 |  | 
|  | 587 | # remove port, if present | 
|  | 588 | host = cut_port_re.sub("", host, 1) | 
|  | 589 | return host.lower() | 
|  | 590 |  | 
|  | 591 | def eff_request_host(request): | 
|  | 592 | """Return a tuple (request-host, effective request-host name). | 
|  | 593 |  | 
|  | 594 | As defined by RFC 2965, except both are lowercased. | 
|  | 595 |  | 
|  | 596 | """ | 
|  | 597 | erhn = req_host = request_host(request) | 
|  | 598 | if req_host.find(".") == -1 and not IPV4_RE.search(req_host): | 
|  | 599 | erhn = req_host + ".local" | 
|  | 600 | return req_host, erhn | 
|  | 601 |  | 
|  | 602 | def request_path(request): | 
|  | 603 | """request-URI, as defined by RFC 2965.""" | 
|  | 604 | url = request.get_full_url() | 
|  | 605 | #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) | 
|  | 606 | #req_path = escape_path("".join(urlparse.urlparse(url)[2:])) | 
|  | 607 | path, parameters, query, frag = urlparse.urlparse(url)[2:] | 
|  | 608 | if parameters: | 
|  | 609 | path = "%s;%s" % (path, parameters) | 
|  | 610 | path = escape_path(path) | 
|  | 611 | req_path = urlparse.urlunparse(("", "", path, "", query, frag)) | 
|  | 612 | if not req_path.startswith("/"): | 
|  | 613 | # fix bad RFC 2396 absoluteURI | 
|  | 614 | req_path = "/"+req_path | 
|  | 615 | return req_path | 
|  | 616 |  | 
|  | 617 | def request_port(request): | 
|  | 618 | host = request.get_host() | 
|  | 619 | i = host.find(':') | 
|  | 620 | if i >= 0: | 
|  | 621 | port = host[i+1:] | 
|  | 622 | try: | 
|  | 623 | int(port) | 
|  | 624 | except ValueError: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 625 | _debug("nonnumeric port: '%s'", port) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 626 | return None | 
|  | 627 | else: | 
|  | 628 | port = DEFAULT_HTTP_PORT | 
|  | 629 | return port | 
|  | 630 |  | 
|  | 631 | # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't | 
|  | 632 | # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). | 
|  | 633 | HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" | 
|  | 634 | ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") | 
|  | 635 | def uppercase_escaped_char(match): | 
|  | 636 | return "%%%s" % match.group(1).upper() | 
|  | 637 | def escape_path(path): | 
|  | 638 | """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" | 
|  | 639 | # There's no knowing what character encoding was used to create URLs | 
|  | 640 | # containing %-escapes, but since we have to pick one to escape invalid | 
|  | 641 | # path characters, we pick UTF-8, as recommended in the HTML 4.0 | 
|  | 642 | # specification: | 
|  | 643 | # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 | 
|  | 644 | # And here, kind of: draft-fielding-uri-rfc2396bis-03 | 
|  | 645 | # (And in draft IRI specification: draft-duerst-iri-05) | 
|  | 646 | # (And here, for new URI schemes: RFC 2718) | 
| Neal Norwitz | 2fa0b9d | 2004-10-17 16:23:52 +0000 | [diff] [blame] | 647 | if isinstance(path, unicode): | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 648 | path = path.encode("utf-8") | 
|  | 649 | path = urllib.quote(path, HTTP_PATH_SAFE) | 
|  | 650 | path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) | 
|  | 651 | return path | 
|  | 652 |  | 
|  | 653 | def reach(h): | 
|  | 654 | """Return reach of host h, as defined by RFC 2965, section 1. | 
|  | 655 |  | 
|  | 656 | The reach R of a host name H is defined as follows: | 
|  | 657 |  | 
|  | 658 | *  If | 
|  | 659 |  | 
|  | 660 | -  H is the host domain name of a host; and, | 
|  | 661 |  | 
|  | 662 | -  H has the form A.B; and | 
|  | 663 |  | 
|  | 664 | -  A has no embedded (that is, interior) dots; and | 
|  | 665 |  | 
|  | 666 | -  B has at least one embedded dot, or B is the string "local". | 
|  | 667 | then the reach of H is .B. | 
|  | 668 |  | 
|  | 669 | *  Otherwise, the reach of H is H. | 
|  | 670 |  | 
|  | 671 | >>> reach("www.acme.com") | 
|  | 672 | '.acme.com' | 
|  | 673 | >>> reach("acme.com") | 
|  | 674 | 'acme.com' | 
|  | 675 | >>> reach("acme.local") | 
|  | 676 | '.local' | 
|  | 677 |  | 
|  | 678 | """ | 
|  | 679 | i = h.find(".") | 
|  | 680 | if i >= 0: | 
|  | 681 | #a = h[:i]  # this line is only here to show what a is | 
|  | 682 | b = h[i+1:] | 
|  | 683 | i = b.find(".") | 
|  | 684 | if is_HDN(h) and (i >= 0 or b == "local"): | 
|  | 685 | return "."+b | 
|  | 686 | return h | 
|  | 687 |  | 
|  | 688 | def is_third_party(request): | 
|  | 689 | """ | 
|  | 690 |  | 
|  | 691 | RFC 2965, section 3.3.6: | 
|  | 692 |  | 
|  | 693 | An unverifiable transaction is to a third-party host if its request- | 
|  | 694 | host U does not domain-match the reach R of the request-host O in the | 
|  | 695 | origin transaction. | 
|  | 696 |  | 
|  | 697 | """ | 
|  | 698 | req_host = request_host(request) | 
|  | 699 | if not domain_match(req_host, reach(request.get_origin_req_host())): | 
|  | 700 | return True | 
|  | 701 | else: | 
|  | 702 | return False | 
|  | 703 |  | 
|  | 704 |  | 
|  | 705 | class Cookie: | 
|  | 706 | """HTTP Cookie. | 
|  | 707 |  | 
|  | 708 | This class represents both Netscape and RFC 2965 cookies. | 
|  | 709 |  | 
|  | 710 | This is deliberately a very simple class.  It just holds attributes.  It's | 
|  | 711 | possible to construct Cookie instances that don't comply with the cookie | 
|  | 712 | standards.  CookieJar.make_cookies is the factory function for Cookie | 
|  | 713 | objects -- it deals with cookie parsing, supplying defaults, and | 
|  | 714 | normalising to the representation used in this class.  CookiePolicy is | 
|  | 715 | responsible for checking them to see whether they should be accepted from | 
|  | 716 | and returned to the server. | 
|  | 717 |  | 
|  | 718 | Note that the port may be present in the headers, but unspecified ("Port" | 
|  | 719 | rather than"Port=80", for example); if this is the case, port is None. | 
|  | 720 |  | 
|  | 721 | """ | 
|  | 722 |  | 
|  | 723 | def __init__(self, version, name, value, | 
|  | 724 | port, port_specified, | 
|  | 725 | domain, domain_specified, domain_initial_dot, | 
|  | 726 | path, path_specified, | 
|  | 727 | secure, | 
|  | 728 | expires, | 
|  | 729 | discard, | 
|  | 730 | comment, | 
|  | 731 | comment_url, | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 732 | rest, | 
|  | 733 | rfc2109=False, | 
|  | 734 | ): | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 735 |  | 
|  | 736 | if version is not None: version = int(version) | 
|  | 737 | if expires is not None: expires = int(expires) | 
|  | 738 | if port is None and port_specified is True: | 
|  | 739 | raise ValueError("if port is None, port_specified must be false") | 
|  | 740 |  | 
|  | 741 | self.version = version | 
|  | 742 | self.name = name | 
|  | 743 | self.value = value | 
|  | 744 | self.port = port | 
|  | 745 | self.port_specified = port_specified | 
|  | 746 | # normalise case, as per RFC 2965 section 3.3.3 | 
|  | 747 | self.domain = domain.lower() | 
|  | 748 | self.domain_specified = domain_specified | 
|  | 749 | # Sigh.  We need to know whether the domain given in the | 
|  | 750 | # cookie-attribute had an initial dot, in order to follow RFC 2965 | 
|  | 751 | # (as clarified in draft errata).  Needed for the returned $Domain | 
|  | 752 | # value. | 
|  | 753 | self.domain_initial_dot = domain_initial_dot | 
|  | 754 | self.path = path | 
|  | 755 | self.path_specified = path_specified | 
|  | 756 | self.secure = secure | 
|  | 757 | self.expires = expires | 
|  | 758 | self.discard = discard | 
|  | 759 | self.comment = comment | 
|  | 760 | self.comment_url = comment_url | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 761 | self.rfc2109 = rfc2109 | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 762 |  | 
|  | 763 | self._rest = copy.copy(rest) | 
|  | 764 |  | 
|  | 765 | def has_nonstandard_attr(self, name): | 
|  | 766 | return name in self._rest | 
|  | 767 | def get_nonstandard_attr(self, name, default=None): | 
|  | 768 | return self._rest.get(name, default) | 
|  | 769 | def set_nonstandard_attr(self, name, value): | 
|  | 770 | self._rest[name] = value | 
|  | 771 |  | 
|  | 772 | def is_expired(self, now=None): | 
|  | 773 | if now is None: now = time.time() | 
|  | 774 | if (self.expires is not None) and (self.expires <= now): | 
|  | 775 | return True | 
|  | 776 | return False | 
|  | 777 |  | 
|  | 778 | def __str__(self): | 
|  | 779 | if self.port is None: p = "" | 
|  | 780 | else: p = ":"+self.port | 
|  | 781 | limit = self.domain + p + self.path | 
|  | 782 | if self.value is not None: | 
|  | 783 | namevalue = "%s=%s" % (self.name, self.value) | 
|  | 784 | else: | 
|  | 785 | namevalue = self.name | 
|  | 786 | return "<Cookie %s for %s>" % (namevalue, limit) | 
|  | 787 |  | 
|  | 788 | def __repr__(self): | 
|  | 789 | args = [] | 
| Raymond Hettinger | dbecd93 | 2005-02-06 06:57:08 +0000 | [diff] [blame] | 790 | for name in ("version", "name", "value", | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 791 | "port", "port_specified", | 
|  | 792 | "domain", "domain_specified", "domain_initial_dot", | 
|  | 793 | "path", "path_specified", | 
|  | 794 | "secure", "expires", "discard", "comment", "comment_url", | 
| Raymond Hettinger | dbecd93 | 2005-02-06 06:57:08 +0000 | [diff] [blame] | 795 | ): | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 796 | attr = getattr(self, name) | 
|  | 797 | args.append("%s=%s" % (name, repr(attr))) | 
|  | 798 | args.append("rest=%s" % repr(self._rest)) | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 799 | args.append("rfc2109=%s" % repr(self.rfc2109)) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 800 | return "Cookie(%s)" % ", ".join(args) | 
|  | 801 |  | 
|  | 802 |  | 
|  | 803 | class CookiePolicy: | 
|  | 804 | """Defines which cookies get accepted from and returned to server. | 
|  | 805 |  | 
|  | 806 | May also modify cookies, though this is probably a bad idea. | 
|  | 807 |  | 
|  | 808 | The subclass DefaultCookiePolicy defines the standard rules for Netscape | 
|  | 809 | and RFC 2965 cookies -- override that if you want a customised policy. | 
|  | 810 |  | 
|  | 811 | """ | 
|  | 812 | def set_ok(self, cookie, request): | 
|  | 813 | """Return true if (and only if) cookie should be accepted from server. | 
|  | 814 |  | 
|  | 815 | Currently, pre-expired cookies never get this far -- the CookieJar | 
|  | 816 | class deletes such cookies itself. | 
|  | 817 |  | 
|  | 818 | """ | 
|  | 819 | raise NotImplementedError() | 
|  | 820 |  | 
|  | 821 | def return_ok(self, cookie, request): | 
|  | 822 | """Return true if (and only if) cookie should be returned to server.""" | 
|  | 823 | raise NotImplementedError() | 
|  | 824 |  | 
|  | 825 | def domain_return_ok(self, domain, request): | 
|  | 826 | """Return false if cookies should not be returned, given cookie domain. | 
|  | 827 | """ | 
|  | 828 | return True | 
|  | 829 |  | 
|  | 830 | def path_return_ok(self, path, request): | 
|  | 831 | """Return false if cookies should not be returned, given cookie path. | 
|  | 832 | """ | 
|  | 833 | return True | 
|  | 834 |  | 
|  | 835 |  | 
|  | 836 | class DefaultCookiePolicy(CookiePolicy): | 
|  | 837 | """Implements the standard rules for accepting and returning cookies.""" | 
|  | 838 |  | 
|  | 839 | DomainStrictNoDots = 1 | 
|  | 840 | DomainStrictNonDomain = 2 | 
|  | 841 | DomainRFC2965Match = 4 | 
|  | 842 |  | 
|  | 843 | DomainLiberal = 0 | 
|  | 844 | DomainStrict = DomainStrictNoDots|DomainStrictNonDomain | 
|  | 845 |  | 
|  | 846 | def __init__(self, | 
|  | 847 | blocked_domains=None, allowed_domains=None, | 
|  | 848 | netscape=True, rfc2965=False, | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 849 | rfc2109_as_netscape=None, | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 850 | hide_cookie2=False, | 
|  | 851 | strict_domain=False, | 
|  | 852 | strict_rfc2965_unverifiable=True, | 
|  | 853 | strict_ns_unverifiable=False, | 
|  | 854 | strict_ns_domain=DomainLiberal, | 
|  | 855 | strict_ns_set_initial_dollar=False, | 
|  | 856 | strict_ns_set_path=False, | 
|  | 857 | ): | 
|  | 858 | """Constructor arguments should be passed as keyword arguments only.""" | 
|  | 859 | self.netscape = netscape | 
|  | 860 | self.rfc2965 = rfc2965 | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 861 | self.rfc2109_as_netscape = rfc2109_as_netscape | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 862 | self.hide_cookie2 = hide_cookie2 | 
|  | 863 | self.strict_domain = strict_domain | 
|  | 864 | self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable | 
|  | 865 | self.strict_ns_unverifiable = strict_ns_unverifiable | 
|  | 866 | self.strict_ns_domain = strict_ns_domain | 
|  | 867 | self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar | 
|  | 868 | self.strict_ns_set_path = strict_ns_set_path | 
|  | 869 |  | 
|  | 870 | if blocked_domains is not None: | 
|  | 871 | self._blocked_domains = tuple(blocked_domains) | 
|  | 872 | else: | 
|  | 873 | self._blocked_domains = () | 
|  | 874 |  | 
|  | 875 | if allowed_domains is not None: | 
|  | 876 | allowed_domains = tuple(allowed_domains) | 
|  | 877 | self._allowed_domains = allowed_domains | 
|  | 878 |  | 
|  | 879 | def blocked_domains(self): | 
|  | 880 | """Return the sequence of blocked domains (as a tuple).""" | 
|  | 881 | return self._blocked_domains | 
|  | 882 | def set_blocked_domains(self, blocked_domains): | 
|  | 883 | """Set the sequence of blocked domains.""" | 
|  | 884 | self._blocked_domains = tuple(blocked_domains) | 
|  | 885 |  | 
|  | 886 | def is_blocked(self, domain): | 
|  | 887 | for blocked_domain in self._blocked_domains: | 
|  | 888 | if user_domain_match(domain, blocked_domain): | 
|  | 889 | return True | 
|  | 890 | return False | 
|  | 891 |  | 
|  | 892 | def allowed_domains(self): | 
|  | 893 | """Return None, or the sequence of allowed domains (as a tuple).""" | 
|  | 894 | return self._allowed_domains | 
|  | 895 | def set_allowed_domains(self, allowed_domains): | 
|  | 896 | """Set the sequence of allowed domains, or None.""" | 
|  | 897 | if allowed_domains is not None: | 
|  | 898 | allowed_domains = tuple(allowed_domains) | 
|  | 899 | self._allowed_domains = allowed_domains | 
|  | 900 |  | 
|  | 901 | def is_not_allowed(self, domain): | 
|  | 902 | if self._allowed_domains is None: | 
|  | 903 | return False | 
|  | 904 | for allowed_domain in self._allowed_domains: | 
|  | 905 | if user_domain_match(domain, allowed_domain): | 
|  | 906 | return False | 
|  | 907 | return True | 
|  | 908 |  | 
|  | 909 | def set_ok(self, cookie, request): | 
|  | 910 | """ | 
|  | 911 | If you override .set_ok(), be sure to call this method.  If it returns | 
|  | 912 | false, so should your subclass (assuming your subclass wants to be more | 
|  | 913 | strict about which cookies to accept). | 
|  | 914 |  | 
|  | 915 | """ | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 916 | _debug(" - checking cookie %s=%s", cookie.name, cookie.value) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 917 |  | 
|  | 918 | assert cookie.name is not None | 
|  | 919 |  | 
|  | 920 | for n in "version", "verifiability", "name", "path", "domain", "port": | 
|  | 921 | fn_name = "set_ok_"+n | 
|  | 922 | fn = getattr(self, fn_name) | 
|  | 923 | if not fn(cookie, request): | 
|  | 924 | return False | 
|  | 925 |  | 
|  | 926 | return True | 
|  | 927 |  | 
|  | 928 | def set_ok_version(self, cookie, request): | 
|  | 929 | if cookie.version is None: | 
|  | 930 | # Version is always set to 0 by parse_ns_headers if it's a Netscape | 
|  | 931 | # cookie, so this must be an invalid RFC 2965 cookie. | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 932 | _debug("   Set-Cookie2 without version attribute (%s=%s)", | 
|  | 933 | cookie.name, cookie.value) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 934 | return False | 
|  | 935 | if cookie.version > 0 and not self.rfc2965: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 936 | _debug("   RFC 2965 cookies are switched off") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 937 | return False | 
|  | 938 | elif cookie.version == 0 and not self.netscape: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 939 | _debug("   Netscape cookies are switched off") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 940 | return False | 
|  | 941 | return True | 
|  | 942 |  | 
|  | 943 | def set_ok_verifiability(self, cookie, request): | 
|  | 944 | if request.is_unverifiable() and is_third_party(request): | 
|  | 945 | if cookie.version > 0 and self.strict_rfc2965_unverifiable: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 946 | _debug("   third-party RFC 2965 cookie during " | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 947 | "unverifiable transaction") | 
|  | 948 | return False | 
|  | 949 | elif cookie.version == 0 and self.strict_ns_unverifiable: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 950 | _debug("   third-party Netscape cookie during " | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 951 | "unverifiable transaction") | 
|  | 952 | return False | 
|  | 953 | return True | 
|  | 954 |  | 
|  | 955 | def set_ok_name(self, cookie, request): | 
|  | 956 | # Try and stop servers setting V0 cookies designed to hack other | 
|  | 957 | # servers that know both V0 and V1 protocols. | 
|  | 958 | if (cookie.version == 0 and self.strict_ns_set_initial_dollar and | 
|  | 959 | cookie.name.startswith("$")): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 960 | _debug("   illegal name (starts with '$'): '%s'", cookie.name) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 961 | return False | 
|  | 962 | return True | 
|  | 963 |  | 
|  | 964 | def set_ok_path(self, cookie, request): | 
|  | 965 | if cookie.path_specified: | 
|  | 966 | req_path = request_path(request) | 
|  | 967 | if ((cookie.version > 0 or | 
|  | 968 | (cookie.version == 0 and self.strict_ns_set_path)) and | 
|  | 969 | not req_path.startswith(cookie.path)): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 970 | _debug("   path attribute %s is not a prefix of request " | 
|  | 971 | "path %s", cookie.path, req_path) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 972 | return False | 
|  | 973 | return True | 
|  | 974 |  | 
|  | 975 | def set_ok_domain(self, cookie, request): | 
|  | 976 | if self.is_blocked(cookie.domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 977 | _debug("   domain %s is in user block-list", cookie.domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 978 | return False | 
|  | 979 | if self.is_not_allowed(cookie.domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 980 | _debug("   domain %s is not in user allow-list", cookie.domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 981 | return False | 
|  | 982 | if cookie.domain_specified: | 
|  | 983 | req_host, erhn = eff_request_host(request) | 
|  | 984 | domain = cookie.domain | 
|  | 985 | if self.strict_domain and (domain.count(".") >= 2): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 986 | # XXX This should probably be compared with the Konqueror | 
|  | 987 | # (kcookiejar.cpp) and Mozilla implementations, but it's a | 
|  | 988 | # losing battle. | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 989 | i = domain.rfind(".") | 
|  | 990 | j = domain.rfind(".", 0, i) | 
|  | 991 | if j == 0:  # domain like .foo.bar | 
|  | 992 | tld = domain[i+1:] | 
|  | 993 | sld = domain[j+1:i] | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 994 | if sld.lower() in ("co", "ac", "com", "edu", "org", "net", | 
|  | 995 | "gov", "mil", "int", "aero", "biz", "cat", "coop", | 
|  | 996 | "info", "jobs", "mobi", "museum", "name", "pro", | 
|  | 997 | "travel", "eu") and len(tld) == 2: | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 998 | # domain like .co.uk | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 999 | _debug("   country-code second level domain %s", domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1000 | return False | 
|  | 1001 | if domain.startswith("."): | 
|  | 1002 | undotted_domain = domain[1:] | 
|  | 1003 | else: | 
|  | 1004 | undotted_domain = domain | 
|  | 1005 | embedded_dots = (undotted_domain.find(".") >= 0) | 
|  | 1006 | if not embedded_dots and domain != ".local": | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1007 | _debug("   non-local domain %s contains no embedded dot", | 
|  | 1008 | domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1009 | return False | 
|  | 1010 | if cookie.version == 0: | 
|  | 1011 | if (not erhn.endswith(domain) and | 
|  | 1012 | (not erhn.startswith(".") and | 
|  | 1013 | not ("."+erhn).endswith(domain))): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1014 | _debug("   effective request-host %s (even with added " | 
|  | 1015 | "initial dot) does not end end with %s", | 
|  | 1016 | erhn, domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1017 | return False | 
|  | 1018 | if (cookie.version > 0 or | 
|  | 1019 | (self.strict_ns_domain & self.DomainRFC2965Match)): | 
|  | 1020 | if not domain_match(erhn, domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1021 | _debug("   effective request-host %s does not domain-match " | 
|  | 1022 | "%s", erhn, domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1023 | return False | 
|  | 1024 | if (cookie.version > 0 or | 
|  | 1025 | (self.strict_ns_domain & self.DomainStrictNoDots)): | 
|  | 1026 | host_prefix = req_host[:-len(domain)] | 
|  | 1027 | if (host_prefix.find(".") >= 0 and | 
|  | 1028 | not IPV4_RE.search(req_host)): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1029 | _debug("   host prefix %s for domain %s contains a dot", | 
|  | 1030 | host_prefix, domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1031 | return False | 
|  | 1032 | return True | 
|  | 1033 |  | 
|  | 1034 | def set_ok_port(self, cookie, request): | 
|  | 1035 | if cookie.port_specified: | 
|  | 1036 | req_port = request_port(request) | 
|  | 1037 | if req_port is None: | 
|  | 1038 | req_port = "80" | 
|  | 1039 | else: | 
|  | 1040 | req_port = str(req_port) | 
|  | 1041 | for p in cookie.port.split(","): | 
|  | 1042 | try: | 
|  | 1043 | int(p) | 
|  | 1044 | except ValueError: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1045 | _debug("   bad port %s (not numeric)", p) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1046 | return False | 
|  | 1047 | if p == req_port: | 
|  | 1048 | break | 
|  | 1049 | else: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1050 | _debug("   request port (%s) not found in %s", | 
|  | 1051 | req_port, cookie.port) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1052 | return False | 
|  | 1053 | return True | 
|  | 1054 |  | 
|  | 1055 | def return_ok(self, cookie, request): | 
|  | 1056 | """ | 
|  | 1057 | If you override .return_ok(), be sure to call this method.  If it | 
|  | 1058 | returns false, so should your subclass (assuming your subclass wants to | 
|  | 1059 | be more strict about which cookies to return). | 
|  | 1060 |  | 
|  | 1061 | """ | 
|  | 1062 | # Path has already been checked by .path_return_ok(), and domain | 
|  | 1063 | # blocking done by .domain_return_ok(). | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1064 | _debug(" - checking cookie %s=%s", cookie.name, cookie.value) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1065 |  | 
|  | 1066 | for n in "version", "verifiability", "secure", "expires", "port", "domain": | 
|  | 1067 | fn_name = "return_ok_"+n | 
|  | 1068 | fn = getattr(self, fn_name) | 
|  | 1069 | if not fn(cookie, request): | 
|  | 1070 | return False | 
|  | 1071 | return True | 
|  | 1072 |  | 
|  | 1073 | def return_ok_version(self, cookie, request): | 
|  | 1074 | if cookie.version > 0 and not self.rfc2965: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1075 | _debug("   RFC 2965 cookies are switched off") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1076 | return False | 
|  | 1077 | elif cookie.version == 0 and not self.netscape: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1078 | _debug("   Netscape cookies are switched off") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1079 | return False | 
|  | 1080 | return True | 
|  | 1081 |  | 
|  | 1082 | def return_ok_verifiability(self, cookie, request): | 
|  | 1083 | if request.is_unverifiable() and is_third_party(request): | 
|  | 1084 | if cookie.version > 0 and self.strict_rfc2965_unverifiable: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1085 | _debug("   third-party RFC 2965 cookie during unverifiable " | 
|  | 1086 | "transaction") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1087 | return False | 
|  | 1088 | elif cookie.version == 0 and self.strict_ns_unverifiable: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1089 | _debug("   third-party Netscape cookie during unverifiable " | 
|  | 1090 | "transaction") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1091 | return False | 
|  | 1092 | return True | 
|  | 1093 |  | 
|  | 1094 | def return_ok_secure(self, cookie, request): | 
|  | 1095 | if cookie.secure and request.get_type() != "https": | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1096 | _debug("   secure cookie with non-secure request") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1097 | return False | 
|  | 1098 | return True | 
|  | 1099 |  | 
|  | 1100 | def return_ok_expires(self, cookie, request): | 
|  | 1101 | if cookie.is_expired(self._now): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1102 | _debug("   cookie expired") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1103 | return False | 
|  | 1104 | return True | 
|  | 1105 |  | 
|  | 1106 | def return_ok_port(self, cookie, request): | 
|  | 1107 | if cookie.port: | 
|  | 1108 | req_port = request_port(request) | 
|  | 1109 | if req_port is None: | 
|  | 1110 | req_port = "80" | 
|  | 1111 | for p in cookie.port.split(","): | 
|  | 1112 | if p == req_port: | 
|  | 1113 | break | 
|  | 1114 | else: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1115 | _debug("   request port %s does not match cookie port %s", | 
|  | 1116 | req_port, cookie.port) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1117 | return False | 
|  | 1118 | return True | 
|  | 1119 |  | 
|  | 1120 | def return_ok_domain(self, cookie, request): | 
|  | 1121 | req_host, erhn = eff_request_host(request) | 
|  | 1122 | domain = cookie.domain | 
|  | 1123 |  | 
|  | 1124 | # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't | 
|  | 1125 | if (cookie.version == 0 and | 
|  | 1126 | (self.strict_ns_domain & self.DomainStrictNonDomain) and | 
|  | 1127 | not cookie.domain_specified and domain != erhn): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1128 | _debug("   cookie with unspecified domain does not string-compare " | 
|  | 1129 | "equal to request domain") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1130 | return False | 
|  | 1131 |  | 
|  | 1132 | if cookie.version > 0 and not domain_match(erhn, domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1133 | _debug("   effective request-host name %s does not domain-match " | 
|  | 1134 | "RFC 2965 cookie domain %s", erhn, domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1135 | return False | 
|  | 1136 | if cookie.version == 0 and not ("."+erhn).endswith(domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1137 | _debug("   request-host %s does not match Netscape cookie domain " | 
|  | 1138 | "%s", req_host, domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1139 | return False | 
|  | 1140 | return True | 
|  | 1141 |  | 
|  | 1142 | def domain_return_ok(self, domain, request): | 
|  | 1143 | # Liberal check of.  This is here as an optimization to avoid | 
|  | 1144 | # having to load lots of MSIE cookie files unless necessary. | 
|  | 1145 | req_host, erhn = eff_request_host(request) | 
|  | 1146 | if not req_host.startswith("."): | 
| Raymond Hettinger | bab4143 | 2005-02-05 01:31:19 +0000 | [diff] [blame] | 1147 | req_host = "."+req_host | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1148 | if not erhn.startswith("."): | 
| Raymond Hettinger | bab4143 | 2005-02-05 01:31:19 +0000 | [diff] [blame] | 1149 | erhn = "."+erhn | 
|  | 1150 | if not (req_host.endswith(domain) or erhn.endswith(domain)): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1151 | #_debug("   request domain %s does not match cookie domain %s", | 
|  | 1152 | #       req_host, domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1153 | return False | 
|  | 1154 |  | 
|  | 1155 | if self.is_blocked(domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1156 | _debug("   domain %s is in user block-list", domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1157 | return False | 
|  | 1158 | if self.is_not_allowed(domain): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1159 | _debug("   domain %s is not in user allow-list", domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1160 | return False | 
|  | 1161 |  | 
|  | 1162 | return True | 
|  | 1163 |  | 
|  | 1164 | def path_return_ok(self, path, request): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1165 | _debug("- checking cookie path=%s", path) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1166 | req_path = request_path(request) | 
|  | 1167 | if not req_path.startswith(path): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1168 | _debug("  %s does not path-match %s", req_path, path) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1169 | return False | 
|  | 1170 | return True | 
|  | 1171 |  | 
|  | 1172 |  | 
|  | 1173 | def vals_sorted_by_key(adict): | 
| Guido van Rossum | cc2b016 | 2007-02-11 06:12:03 +0000 | [diff] [blame^] | 1174 | keys = sorted(adict.keys()) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1175 | return map(adict.get, keys) | 
|  | 1176 |  | 
|  | 1177 | def deepvalues(mapping): | 
|  | 1178 | """Iterates over nested mapping, depth-first, in sorted order by key.""" | 
|  | 1179 | values = vals_sorted_by_key(mapping) | 
|  | 1180 | for obj in values: | 
|  | 1181 | mapping = False | 
|  | 1182 | try: | 
|  | 1183 | obj.items | 
|  | 1184 | except AttributeError: | 
|  | 1185 | pass | 
|  | 1186 | else: | 
|  | 1187 | mapping = True | 
|  | 1188 | for subobj in deepvalues(obj): | 
|  | 1189 | yield subobj | 
|  | 1190 | if not mapping: | 
|  | 1191 | yield obj | 
|  | 1192 |  | 
|  | 1193 |  | 
|  | 1194 | # Used as second parameter to dict.get() method, to distinguish absent | 
|  | 1195 | # dict key from one with a None value. | 
|  | 1196 | class Absent: pass | 
|  | 1197 |  | 
|  | 1198 | class CookieJar: | 
|  | 1199 | """Collection of HTTP cookies. | 
|  | 1200 |  | 
|  | 1201 | You may not need to know about this class: try | 
|  | 1202 | urllib2.build_opener(HTTPCookieProcessor).open(url). | 
|  | 1203 |  | 
|  | 1204 | """ | 
|  | 1205 |  | 
|  | 1206 | non_word_re = re.compile(r"\W") | 
|  | 1207 | quote_re = re.compile(r"([\"\\])") | 
|  | 1208 | strict_domain_re = re.compile(r"\.?[^.]*") | 
|  | 1209 | domain_re = re.compile(r"[^.]*") | 
|  | 1210 | dots_re = re.compile(r"^\.+") | 
|  | 1211 |  | 
|  | 1212 | magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" | 
|  | 1213 |  | 
|  | 1214 | def __init__(self, policy=None): | 
|  | 1215 | if policy is None: | 
|  | 1216 | policy = DefaultCookiePolicy() | 
|  | 1217 | self._policy = policy | 
|  | 1218 |  | 
|  | 1219 | self._cookies_lock = _threading.RLock() | 
|  | 1220 | self._cookies = {} | 
|  | 1221 |  | 
|  | 1222 | def set_policy(self, policy): | 
|  | 1223 | self._policy = policy | 
|  | 1224 |  | 
|  | 1225 | def _cookies_for_domain(self, domain, request): | 
|  | 1226 | cookies = [] | 
|  | 1227 | if not self._policy.domain_return_ok(domain, request): | 
|  | 1228 | return [] | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1229 | _debug("Checking %s for cookies to return", domain) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1230 | cookies_by_path = self._cookies[domain] | 
|  | 1231 | for path in cookies_by_path.keys(): | 
|  | 1232 | if not self._policy.path_return_ok(path, request): | 
|  | 1233 | continue | 
|  | 1234 | cookies_by_name = cookies_by_path[path] | 
|  | 1235 | for cookie in cookies_by_name.values(): | 
|  | 1236 | if not self._policy.return_ok(cookie, request): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1237 | _debug("   not returning cookie") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1238 | continue | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1239 | _debug("   it's a match") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1240 | cookies.append(cookie) | 
|  | 1241 | return cookies | 
|  | 1242 |  | 
|  | 1243 | def _cookies_for_request(self, request): | 
|  | 1244 | """Return a list of cookies to be returned to server.""" | 
|  | 1245 | cookies = [] | 
|  | 1246 | for domain in self._cookies.keys(): | 
|  | 1247 | cookies.extend(self._cookies_for_domain(domain, request)) | 
|  | 1248 | return cookies | 
|  | 1249 |  | 
|  | 1250 | def _cookie_attrs(self, cookies): | 
|  | 1251 | """Return a list of cookie-attributes to be returned to server. | 
|  | 1252 |  | 
|  | 1253 | like ['foo="bar"; $Path="/"', ...] | 
|  | 1254 |  | 
|  | 1255 | The $Version attribute is also added when appropriate (currently only | 
|  | 1256 | once per request). | 
|  | 1257 |  | 
|  | 1258 | """ | 
|  | 1259 | # add cookies in order of most specific (ie. longest) path first | 
|  | 1260 | def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) | 
|  | 1261 | cookies.sort(decreasing_size) | 
|  | 1262 |  | 
|  | 1263 | version_set = False | 
|  | 1264 |  | 
|  | 1265 | attrs = [] | 
|  | 1266 | for cookie in cookies: | 
|  | 1267 | # set version of Cookie header | 
|  | 1268 | # XXX | 
|  | 1269 | # What should it be if multiple matching Set-Cookie headers have | 
|  | 1270 | #  different versions themselves? | 
|  | 1271 | # Answer: there is no answer; was supposed to be settled by | 
|  | 1272 | #  RFC 2965 errata, but that may never appear... | 
|  | 1273 | version = cookie.version | 
|  | 1274 | if not version_set: | 
|  | 1275 | version_set = True | 
|  | 1276 | if version > 0: | 
|  | 1277 | attrs.append("$Version=%s" % version) | 
|  | 1278 |  | 
|  | 1279 | # quote cookie value if necessary | 
|  | 1280 | # (not for Netscape protocol, which already has any quotes | 
|  | 1281 | #  intact, due to the poorly-specified Netscape Cookie: syntax) | 
|  | 1282 | if ((cookie.value is not None) and | 
|  | 1283 | self.non_word_re.search(cookie.value) and version > 0): | 
|  | 1284 | value = self.quote_re.sub(r"\\\1", cookie.value) | 
|  | 1285 | else: | 
|  | 1286 | value = cookie.value | 
|  | 1287 |  | 
|  | 1288 | # add cookie-attributes to be returned in Cookie header | 
|  | 1289 | if cookie.value is None: | 
|  | 1290 | attrs.append(cookie.name) | 
|  | 1291 | else: | 
|  | 1292 | attrs.append("%s=%s" % (cookie.name, value)) | 
|  | 1293 | if version > 0: | 
|  | 1294 | if cookie.path_specified: | 
|  | 1295 | attrs.append('$Path="%s"' % cookie.path) | 
|  | 1296 | if cookie.domain.startswith("."): | 
|  | 1297 | domain = cookie.domain | 
|  | 1298 | if (not cookie.domain_initial_dot and | 
|  | 1299 | domain.startswith(".")): | 
|  | 1300 | domain = domain[1:] | 
|  | 1301 | attrs.append('$Domain="%s"' % domain) | 
|  | 1302 | if cookie.port is not None: | 
|  | 1303 | p = "$Port" | 
|  | 1304 | if cookie.port_specified: | 
|  | 1305 | p = p + ('="%s"' % cookie.port) | 
|  | 1306 | attrs.append(p) | 
|  | 1307 |  | 
|  | 1308 | return attrs | 
|  | 1309 |  | 
|  | 1310 | def add_cookie_header(self, request): | 
|  | 1311 | """Add correct Cookie: header to request (urllib2.Request object). | 
|  | 1312 |  | 
|  | 1313 | The Cookie2 header is also added unless policy.hide_cookie2 is true. | 
|  | 1314 |  | 
|  | 1315 | """ | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1316 | _debug("add_cookie_header") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1317 | self._cookies_lock.acquire() | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1318 | try: | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1319 |  | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1320 | self._policy._now = self._now = int(time.time()) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1321 |  | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1322 | cookies = self._cookies_for_request(request) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1323 |  | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1324 | attrs = self._cookie_attrs(cookies) | 
|  | 1325 | if attrs: | 
|  | 1326 | if not request.has_header("Cookie"): | 
|  | 1327 | request.add_unredirected_header( | 
|  | 1328 | "Cookie", "; ".join(attrs)) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1329 |  | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1330 | # if necessary, advertise that we know RFC 2965 | 
|  | 1331 | if (self._policy.rfc2965 and not self._policy.hide_cookie2 and | 
|  | 1332 | not request.has_header("Cookie2")): | 
|  | 1333 | for cookie in cookies: | 
|  | 1334 | if cookie.version != 1: | 
|  | 1335 | request.add_unredirected_header("Cookie2", '$Version="1"') | 
|  | 1336 | break | 
|  | 1337 |  | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1338 | finally: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1339 | self._cookies_lock.release() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1340 |  | 
|  | 1341 | self.clear_expired_cookies() | 
|  | 1342 |  | 
|  | 1343 | def _normalized_cookie_tuples(self, attrs_set): | 
|  | 1344 | """Return list of tuples containing normalised cookie information. | 
|  | 1345 |  | 
|  | 1346 | attrs_set is the list of lists of key,value pairs extracted from | 
|  | 1347 | the Set-Cookie or Set-Cookie2 headers. | 
|  | 1348 |  | 
|  | 1349 | Tuples are name, value, standard, rest, where name and value are the | 
|  | 1350 | cookie name and value, standard is a dictionary containing the standard | 
|  | 1351 | cookie-attributes (discard, secure, version, expires or max-age, | 
|  | 1352 | domain, path and port) and rest is a dictionary containing the rest of | 
|  | 1353 | the cookie-attributes. | 
|  | 1354 |  | 
|  | 1355 | """ | 
|  | 1356 | cookie_tuples = [] | 
|  | 1357 |  | 
|  | 1358 | boolean_attrs = "discard", "secure" | 
|  | 1359 | value_attrs = ("version", | 
|  | 1360 | "expires", "max-age", | 
|  | 1361 | "domain", "path", "port", | 
|  | 1362 | "comment", "commenturl") | 
|  | 1363 |  | 
|  | 1364 | for cookie_attrs in attrs_set: | 
|  | 1365 | name, value = cookie_attrs[0] | 
|  | 1366 |  | 
|  | 1367 | # Build dictionary of standard cookie-attributes (standard) and | 
|  | 1368 | # dictionary of other cookie-attributes (rest). | 
|  | 1369 |  | 
|  | 1370 | # Note: expiry time is normalised to seconds since epoch.  V0 | 
|  | 1371 | # cookies should have the Expires cookie-attribute, and V1 cookies | 
|  | 1372 | # should have Max-Age, but since V1 includes RFC 2109 cookies (and | 
|  | 1373 | # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we | 
|  | 1374 | # accept either (but prefer Max-Age). | 
|  | 1375 | max_age_set = False | 
|  | 1376 |  | 
|  | 1377 | bad_cookie = False | 
|  | 1378 |  | 
|  | 1379 | standard = {} | 
|  | 1380 | rest = {} | 
|  | 1381 | for k, v in cookie_attrs[1:]: | 
|  | 1382 | lc = k.lower() | 
|  | 1383 | # don't lose case distinction for unknown fields | 
|  | 1384 | if lc in value_attrs or lc in boolean_attrs: | 
|  | 1385 | k = lc | 
|  | 1386 | if k in boolean_attrs and v is None: | 
|  | 1387 | # boolean cookie-attribute is present, but has no value | 
|  | 1388 | # (like "discard", rather than "port=80") | 
|  | 1389 | v = True | 
|  | 1390 | if k in standard: | 
|  | 1391 | # only first value is significant | 
|  | 1392 | continue | 
|  | 1393 | if k == "domain": | 
|  | 1394 | if v is None: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1395 | _debug("   missing value for domain attribute") | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1396 | bad_cookie = True | 
|  | 1397 | break | 
|  | 1398 | # RFC 2965 section 3.3.3 | 
|  | 1399 | v = v.lower() | 
|  | 1400 | if k == "expires": | 
|  | 1401 | if max_age_set: | 
|  | 1402 | # Prefer max-age to expires (like Mozilla) | 
|  | 1403 | continue | 
|  | 1404 | if v is None: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1405 | _debug("   missing or invalid value for expires " | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1406 | "attribute: treating as session cookie") | 
|  | 1407 | continue | 
|  | 1408 | if k == "max-age": | 
|  | 1409 | max_age_set = True | 
|  | 1410 | try: | 
|  | 1411 | v = int(v) | 
|  | 1412 | except ValueError: | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1413 | _debug("   missing or invalid (non-numeric) value for " | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1414 | "max-age attribute") | 
|  | 1415 | bad_cookie = True | 
|  | 1416 | break | 
|  | 1417 | # convert RFC 2965 Max-Age to seconds since epoch | 
|  | 1418 | # XXX Strictly you're supposed to follow RFC 2616 | 
|  | 1419 | #   age-calculation rules.  Remember that zero Max-Age is a | 
|  | 1420 | #   is a request to discard (old and new) cookie, though. | 
|  | 1421 | k = "expires" | 
|  | 1422 | v = self._now + v | 
|  | 1423 | if (k in value_attrs) or (k in boolean_attrs): | 
|  | 1424 | if (v is None and | 
| Raymond Hettinger | dbecd93 | 2005-02-06 06:57:08 +0000 | [diff] [blame] | 1425 | k not in ("port", "comment", "commenturl")): | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1426 | _debug("   missing value for %s attribute" % k) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1427 | bad_cookie = True | 
|  | 1428 | break | 
|  | 1429 | standard[k] = v | 
|  | 1430 | else: | 
|  | 1431 | rest[k] = v | 
|  | 1432 |  | 
|  | 1433 | if bad_cookie: | 
|  | 1434 | continue | 
|  | 1435 |  | 
|  | 1436 | cookie_tuples.append((name, value, standard, rest)) | 
|  | 1437 |  | 
|  | 1438 | return cookie_tuples | 
|  | 1439 |  | 
|  | 1440 | def _cookie_from_cookie_tuple(self, tup, request): | 
|  | 1441 | # standard is dict of standard cookie-attributes, rest is dict of the | 
|  | 1442 | # rest of them | 
|  | 1443 | name, value, standard, rest = tup | 
|  | 1444 |  | 
|  | 1445 | domain = standard.get("domain", Absent) | 
|  | 1446 | path = standard.get("path", Absent) | 
|  | 1447 | port = standard.get("port", Absent) | 
|  | 1448 | expires = standard.get("expires", Absent) | 
|  | 1449 |  | 
|  | 1450 | # set the easy defaults | 
|  | 1451 | version = standard.get("version", None) | 
|  | 1452 | if version is not None: version = int(version) | 
|  | 1453 | secure = standard.get("secure", False) | 
|  | 1454 | # (discard is also set if expires is Absent) | 
|  | 1455 | discard = standard.get("discard", False) | 
|  | 1456 | comment = standard.get("comment", None) | 
|  | 1457 | comment_url = standard.get("commenturl", None) | 
|  | 1458 |  | 
|  | 1459 | # set default path | 
|  | 1460 | if path is not Absent and path != "": | 
|  | 1461 | path_specified = True | 
|  | 1462 | path = escape_path(path) | 
|  | 1463 | else: | 
|  | 1464 | path_specified = False | 
|  | 1465 | path = request_path(request) | 
|  | 1466 | i = path.rfind("/") | 
|  | 1467 | if i != -1: | 
|  | 1468 | if version == 0: | 
|  | 1469 | # Netscape spec parts company from reality here | 
|  | 1470 | path = path[:i] | 
|  | 1471 | else: | 
|  | 1472 | path = path[:i+1] | 
|  | 1473 | if len(path) == 0: path = "/" | 
|  | 1474 |  | 
|  | 1475 | # set default domain | 
|  | 1476 | domain_specified = domain is not Absent | 
|  | 1477 | # but first we have to remember whether it starts with a dot | 
|  | 1478 | domain_initial_dot = False | 
|  | 1479 | if domain_specified: | 
|  | 1480 | domain_initial_dot = bool(domain.startswith(".")) | 
|  | 1481 | if domain is Absent: | 
|  | 1482 | req_host, erhn = eff_request_host(request) | 
|  | 1483 | domain = erhn | 
|  | 1484 | elif not domain.startswith("."): | 
|  | 1485 | domain = "."+domain | 
|  | 1486 |  | 
|  | 1487 | # set default port | 
|  | 1488 | port_specified = False | 
|  | 1489 | if port is not Absent: | 
|  | 1490 | if port is None: | 
|  | 1491 | # Port attr present, but has no value: default to request port. | 
|  | 1492 | # Cookie should then only be sent back on that port. | 
|  | 1493 | port = request_port(request) | 
|  | 1494 | else: | 
|  | 1495 | port_specified = True | 
|  | 1496 | port = re.sub(r"\s+", "", port) | 
|  | 1497 | else: | 
|  | 1498 | # No port attr present.  Cookie can be sent back on any port. | 
|  | 1499 | port = None | 
|  | 1500 |  | 
|  | 1501 | # set default expires and discard | 
|  | 1502 | if expires is Absent: | 
|  | 1503 | expires = None | 
|  | 1504 | discard = True | 
|  | 1505 | elif expires <= self._now: | 
|  | 1506 | # Expiry date in past is request to delete cookie.  This can't be | 
|  | 1507 | # in DefaultCookiePolicy, because can't delete cookies there. | 
|  | 1508 | try: | 
|  | 1509 | self.clear(domain, path, name) | 
|  | 1510 | except KeyError: | 
|  | 1511 | pass | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1512 | _debug("Expiring cookie, domain='%s', path='%s', name='%s'", | 
|  | 1513 | domain, path, name) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1514 | return None | 
|  | 1515 |  | 
|  | 1516 | return Cookie(version, | 
|  | 1517 | name, value, | 
|  | 1518 | port, port_specified, | 
|  | 1519 | domain, domain_specified, domain_initial_dot, | 
|  | 1520 | path, path_specified, | 
|  | 1521 | secure, | 
|  | 1522 | expires, | 
|  | 1523 | discard, | 
|  | 1524 | comment, | 
|  | 1525 | comment_url, | 
|  | 1526 | rest) | 
|  | 1527 |  | 
|  | 1528 | def _cookies_from_attrs_set(self, attrs_set, request): | 
|  | 1529 | cookie_tuples = self._normalized_cookie_tuples(attrs_set) | 
|  | 1530 |  | 
|  | 1531 | cookies = [] | 
|  | 1532 | for tup in cookie_tuples: | 
|  | 1533 | cookie = self._cookie_from_cookie_tuple(tup, request) | 
|  | 1534 | if cookie: cookies.append(cookie) | 
|  | 1535 | return cookies | 
|  | 1536 |  | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 1537 | def _process_rfc2109_cookies(self, cookies): | 
|  | 1538 | rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) | 
|  | 1539 | if rfc2109_as_ns is None: | 
|  | 1540 | rfc2109_as_ns = not self._policy.rfc2965 | 
|  | 1541 | for cookie in cookies: | 
|  | 1542 | if cookie.version == 1: | 
|  | 1543 | cookie.rfc2109 = True | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 1544 | if rfc2109_as_ns: | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 1545 | # treat 2109 cookies as Netscape cookies rather than | 
|  | 1546 | # as RFC2965 cookies | 
|  | 1547 | cookie.version = 0 | 
|  | 1548 |  | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1549 | def make_cookies(self, response, request): | 
|  | 1550 | """Return sequence of Cookie objects extracted from response object.""" | 
|  | 1551 | # get cookie-attributes for RFC 2965 and Netscape protocols | 
|  | 1552 | headers = response.info() | 
|  | 1553 | rfc2965_hdrs = headers.getheaders("Set-Cookie2") | 
|  | 1554 | ns_hdrs = headers.getheaders("Set-Cookie") | 
|  | 1555 |  | 
|  | 1556 | rfc2965 = self._policy.rfc2965 | 
|  | 1557 | netscape = self._policy.netscape | 
|  | 1558 |  | 
|  | 1559 | if ((not rfc2965_hdrs and not ns_hdrs) or | 
|  | 1560 | (not ns_hdrs and not rfc2965) or | 
|  | 1561 | (not rfc2965_hdrs and not netscape) or | 
|  | 1562 | (not netscape and not rfc2965)): | 
|  | 1563 | return []  # no relevant cookie headers: quick exit | 
|  | 1564 |  | 
|  | 1565 | try: | 
|  | 1566 | cookies = self._cookies_from_attrs_set( | 
|  | 1567 | split_header_words(rfc2965_hdrs), request) | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1568 | except Exception: | 
|  | 1569 | _warn_unhandled_exception() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1570 | cookies = [] | 
|  | 1571 |  | 
|  | 1572 | if ns_hdrs and netscape: | 
|  | 1573 | try: | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 1574 | # RFC 2109 and Netscape cookies | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1575 | ns_cookies = self._cookies_from_attrs_set( | 
|  | 1576 | parse_ns_headers(ns_hdrs), request) | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1577 | except Exception: | 
|  | 1578 | _warn_unhandled_exception() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1579 | ns_cookies = [] | 
| Neal Norwitz | 71dad72 | 2005-12-23 21:43:48 +0000 | [diff] [blame] | 1580 | self._process_rfc2109_cookies(ns_cookies) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1581 |  | 
|  | 1582 | # Look for Netscape cookies (from Set-Cookie headers) that match | 
|  | 1583 | # corresponding RFC 2965 cookies (from Set-Cookie2 headers). | 
|  | 1584 | # For each match, keep the RFC 2965 cookie and ignore the Netscape | 
|  | 1585 | # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are | 
|  | 1586 | # bundled in with the Netscape cookies for this purpose, which is | 
|  | 1587 | # reasonable behaviour. | 
|  | 1588 | if rfc2965: | 
|  | 1589 | lookup = {} | 
|  | 1590 | for cookie in cookies: | 
|  | 1591 | lookup[(cookie.domain, cookie.path, cookie.name)] = None | 
|  | 1592 |  | 
|  | 1593 | def no_matching_rfc2965(ns_cookie, lookup=lookup): | 
|  | 1594 | key = ns_cookie.domain, ns_cookie.path, ns_cookie.name | 
|  | 1595 | return key not in lookup | 
|  | 1596 | ns_cookies = filter(no_matching_rfc2965, ns_cookies) | 
|  | 1597 |  | 
|  | 1598 | if ns_cookies: | 
|  | 1599 | cookies.extend(ns_cookies) | 
|  | 1600 |  | 
|  | 1601 | return cookies | 
|  | 1602 |  | 
|  | 1603 | def set_cookie_if_ok(self, cookie, request): | 
|  | 1604 | """Set a cookie if policy says it's OK to do so.""" | 
|  | 1605 | self._cookies_lock.acquire() | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1606 | try: | 
|  | 1607 | self._policy._now = self._now = int(time.time()) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1608 |  | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1609 | if self._policy.set_ok(cookie, request): | 
|  | 1610 | self.set_cookie(cookie) | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1611 |  | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1612 |  | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1613 | finally: | 
|  | 1614 | self._cookies_lock.release() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1615 |  | 
|  | 1616 | def set_cookie(self, cookie): | 
|  | 1617 | """Set a cookie, without checking whether or not it should be set.""" | 
|  | 1618 | c = self._cookies | 
|  | 1619 | self._cookies_lock.acquire() | 
|  | 1620 | try: | 
|  | 1621 | if cookie.domain not in c: c[cookie.domain] = {} | 
|  | 1622 | c2 = c[cookie.domain] | 
|  | 1623 | if cookie.path not in c2: c2[cookie.path] = {} | 
|  | 1624 | c3 = c2[cookie.path] | 
|  | 1625 | c3[cookie.name] = cookie | 
|  | 1626 | finally: | 
|  | 1627 | self._cookies_lock.release() | 
|  | 1628 |  | 
|  | 1629 | def extract_cookies(self, response, request): | 
|  | 1630 | """Extract cookies from response, where allowable given the request.""" | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 1631 | _debug("extract_cookies: %s", response.info()) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1632 | self._cookies_lock.acquire() | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1633 | try: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1634 | self._policy._now = self._now = int(time.time()) | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1635 |  | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1636 | for cookie in self.make_cookies(response, request): | 
|  | 1637 | if self._policy.set_ok(cookie, request): | 
|  | 1638 | _debug(" setting cookie: %s", cookie) | 
|  | 1639 | self.set_cookie(cookie) | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1640 | finally: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1641 | self._cookies_lock.release() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1642 |  | 
|  | 1643 | def clear(self, domain=None, path=None, name=None): | 
|  | 1644 | """Clear some cookies. | 
|  | 1645 |  | 
|  | 1646 | Invoking this method without arguments will clear all cookies.  If | 
|  | 1647 | given a single argument, only cookies belonging to that domain will be | 
|  | 1648 | removed.  If given two arguments, cookies belonging to the specified | 
|  | 1649 | path within that domain are removed.  If given three arguments, then | 
|  | 1650 | the cookie with the specified name, path and domain is removed. | 
|  | 1651 |  | 
|  | 1652 | Raises KeyError if no matching cookie exists. | 
|  | 1653 |  | 
|  | 1654 | """ | 
|  | 1655 | if name is not None: | 
|  | 1656 | if (domain is None) or (path is None): | 
|  | 1657 | raise ValueError( | 
|  | 1658 | "domain and path must be given to remove a cookie by name") | 
|  | 1659 | del self._cookies[domain][path][name] | 
|  | 1660 | elif path is not None: | 
|  | 1661 | if domain is None: | 
|  | 1662 | raise ValueError( | 
|  | 1663 | "domain must be given to remove cookies by path") | 
|  | 1664 | del self._cookies[domain][path] | 
|  | 1665 | elif domain is not None: | 
|  | 1666 | del self._cookies[domain] | 
|  | 1667 | else: | 
|  | 1668 | self._cookies = {} | 
|  | 1669 |  | 
|  | 1670 | def clear_session_cookies(self): | 
|  | 1671 | """Discard all session cookies. | 
|  | 1672 |  | 
|  | 1673 | Note that the .save() method won't save session cookies anyway, unless | 
|  | 1674 | you ask otherwise by passing a true ignore_discard argument. | 
|  | 1675 |  | 
|  | 1676 | """ | 
|  | 1677 | self._cookies_lock.acquire() | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1678 | try: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1679 | for cookie in self: | 
|  | 1680 | if cookie.discard: | 
|  | 1681 | self.clear(cookie.domain, cookie.path, cookie.name) | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1682 | finally: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1683 | self._cookies_lock.release() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1684 |  | 
|  | 1685 | def clear_expired_cookies(self): | 
|  | 1686 | """Discard all expired cookies. | 
|  | 1687 |  | 
|  | 1688 | You probably don't need to call this method: expired cookies are never | 
|  | 1689 | sent back to the server (provided you're using DefaultCookiePolicy), | 
|  | 1690 | this method is called by CookieJar itself every so often, and the | 
|  | 1691 | .save() method won't save expired cookies anyway (unless you ask | 
|  | 1692 | otherwise by passing a true ignore_expires argument). | 
|  | 1693 |  | 
|  | 1694 | """ | 
|  | 1695 | self._cookies_lock.acquire() | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1696 | try: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1697 | now = time.time() | 
|  | 1698 | for cookie in self: | 
|  | 1699 | if cookie.is_expired(now): | 
|  | 1700 | self.clear(cookie.domain, cookie.path, cookie.name) | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1701 | finally: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1702 | self._cookies_lock.release() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1703 |  | 
|  | 1704 | def __iter__(self): | 
|  | 1705 | return deepvalues(self._cookies) | 
|  | 1706 |  | 
|  | 1707 | def __len__(self): | 
|  | 1708 | """Return number of contained cookies.""" | 
|  | 1709 | i = 0 | 
|  | 1710 | for cookie in self: i = i + 1 | 
|  | 1711 | return i | 
|  | 1712 |  | 
|  | 1713 | def __repr__(self): | 
|  | 1714 | r = [] | 
|  | 1715 | for cookie in self: r.append(repr(cookie)) | 
|  | 1716 | return "<%s[%s]>" % (self.__class__, ", ".join(r)) | 
|  | 1717 |  | 
|  | 1718 | def __str__(self): | 
|  | 1719 | r = [] | 
|  | 1720 | for cookie in self: r.append(str(cookie)) | 
|  | 1721 | return "<%s[%s]>" % (self.__class__, ", ".join(r)) | 
|  | 1722 |  | 
|  | 1723 |  | 
| Neal Norwitz | 3e7de59 | 2005-12-23 21:24:35 +0000 | [diff] [blame] | 1724 | # derives from IOError for backwards-compatibility with Python 2.4.0 | 
|  | 1725 | class LoadError(IOError): pass | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1726 |  | 
|  | 1727 | class FileCookieJar(CookieJar): | 
|  | 1728 | """CookieJar that can be loaded from and saved to a file.""" | 
|  | 1729 |  | 
|  | 1730 | def __init__(self, filename=None, delayload=False, policy=None): | 
|  | 1731 | """ | 
|  | 1732 | Cookies are NOT loaded from the named file until either the .load() or | 
|  | 1733 | .revert() method is called. | 
|  | 1734 |  | 
|  | 1735 | """ | 
|  | 1736 | CookieJar.__init__(self, policy) | 
|  | 1737 | if filename is not None: | 
|  | 1738 | try: | 
|  | 1739 | filename+"" | 
|  | 1740 | except: | 
|  | 1741 | raise ValueError("filename must be string-like") | 
|  | 1742 | self.filename = filename | 
|  | 1743 | self.delayload = bool(delayload) | 
|  | 1744 |  | 
|  | 1745 | def save(self, filename=None, ignore_discard=False, ignore_expires=False): | 
|  | 1746 | """Save cookies to a file.""" | 
|  | 1747 | raise NotImplementedError() | 
|  | 1748 |  | 
|  | 1749 | def load(self, filename=None, ignore_discard=False, ignore_expires=False): | 
|  | 1750 | """Load cookies from a file.""" | 
|  | 1751 | if filename is None: | 
|  | 1752 | if self.filename is not None: filename = self.filename | 
|  | 1753 | else: raise ValueError(MISSING_FILENAME_TEXT) | 
|  | 1754 |  | 
|  | 1755 | f = open(filename) | 
|  | 1756 | try: | 
|  | 1757 | self._really_load(f, filename, ignore_discard, ignore_expires) | 
|  | 1758 | finally: | 
|  | 1759 | f.close() | 
|  | 1760 |  | 
|  | 1761 | def revert(self, filename=None, | 
|  | 1762 | ignore_discard=False, ignore_expires=False): | 
|  | 1763 | """Clear all cookies and reload cookies from a saved file. | 
|  | 1764 |  | 
|  | 1765 | Raises LoadError (or IOError) if reversion is not successful; the | 
|  | 1766 | object's state will not be altered if this happens. | 
|  | 1767 |  | 
|  | 1768 | """ | 
|  | 1769 | if filename is None: | 
|  | 1770 | if self.filename is not None: filename = self.filename | 
|  | 1771 | else: raise ValueError(MISSING_FILENAME_TEXT) | 
|  | 1772 |  | 
|  | 1773 | self._cookies_lock.acquire() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1774 | try: | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1775 |  | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1776 | old_state = copy.deepcopy(self._cookies) | 
|  | 1777 | self._cookies = {} | 
|  | 1778 | try: | 
|  | 1779 | self.load(filename, ignore_discard, ignore_expires) | 
|  | 1780 | except (LoadError, IOError): | 
|  | 1781 | self._cookies = old_state | 
|  | 1782 | raise | 
| Thomas Wouters | 902d6eb | 2007-01-09 23:18:33 +0000 | [diff] [blame] | 1783 |  | 
|  | 1784 | finally: | 
| Thomas Wouters | 9fe394c | 2007-02-05 01:24:16 +0000 | [diff] [blame] | 1785 | self._cookies_lock.release() | 
| Martin v. Löwis | 2a6ba90 | 2004-05-31 18:22:40 +0000 | [diff] [blame] | 1786 |  | 
|  | 1787 | from _LWPCookieJar import LWPCookieJar, lwp_cookie_str | 
|  | 1788 | from _MozillaCookieJar import MozillaCookieJar |