blob: b61a2b2a1a57a2f5ec08888ee665a73fdead769d [file] [log] [blame]
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001"""HTTP cookie handling for web clients.
2
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Georg Brandle854e762006-05-08 17:48:01 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Georg Brandle854e762006-05-08 17:48:01 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
Brett Cannon88f801d2008-08-18 00:46:22 +000029 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
30 'MozillaCookieJar']
Georg Brandle854e762006-05-08 17:48:01 +000031
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000032import re, urlparse, copy, time, urllib
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000033try:
34 import threading as _threading
35except ImportError:
36 import dummy_threading as _threading
37import httplib # only for the default HTTP port
38from calendar import timegm
39
Neal Norwitzb678ce52006-05-18 06:51:46 +000040debug = False # set to True to enable debugging via the logging module
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000041logger = None
42
43def _debug(*args):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000044 if not debug:
45 return
Neal Norwitzb678ce52006-05-18 06:51:46 +000046 global logger
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000047 if not logger:
48 import logging
49 logger = logging.getLogger("cookielib")
50 return logger.debug(*args)
51
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000052
53DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
54MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
55 "instance initialised with one)")
56
Georg Brandle854e762006-05-08 17:48:01 +000057def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000058 # There are a few catch-all except: statements in this module, for
Georg Brandle854e762006-05-08 17:48:01 +000059 # catching input that's bad in unexpected ways. Warn if any
60 # exceptions are caught there.
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000061 import warnings, traceback, StringIO
62 f = StringIO.StringIO()
63 traceback.print_exc(None, f)
64 msg = f.getvalue()
65 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000066
67
68# Date/time conversion
69# -----------------------------------------------------------------------------
70
71EPOCH_YEAR = 1970
72def _timegm(tt):
73 year, month, mday, hour, min, sec = tt[:6]
74 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
75 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
76 return timegm(tt)
77 else:
78 return None
79
80DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
81MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
82 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
83MONTHS_LOWER = []
84for month in MONTHS: MONTHS_LOWER.append(month.lower())
85
86def time2isoz(t=None):
87 """Return a string representing time in seconds since epoch, t.
88
89 If the function is called without an argument, it will use the current
90 time.
91
92 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
93 representing Universal Time (UTC, aka GMT). An example of this format is:
94
95 1994-11-24 08:49:37Z
96
97 """
98 if t is None: t = time.time()
99 year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
100 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
101 year, mon, mday, hour, min, sec)
102
103def time2netscape(t=None):
104 """Return a string representing time in seconds since epoch, t.
105
106 If the function is called without an argument, it will use the current
107 time.
108
109 The format of the returned string is like this:
110
111 Wed, DD-Mon-YYYY HH:MM:SS GMT
112
113 """
114 if t is None: t = time.time()
115 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
116 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
117 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
118
119
120UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
121
122TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
123def offset_from_tz_string(tz):
124 offset = None
125 if tz in UTC_ZONES:
126 offset = 0
127 else:
128 m = TIMEZONE_RE.search(tz)
129 if m:
130 offset = 3600 * int(m.group(2))
131 if m.group(3):
132 offset = offset + 60 * int(m.group(3))
133 if m.group(1) == '-':
134 offset = -offset
135 return offset
136
137def _str2time(day, mon, yr, hr, min, sec, tz):
138 # translate month name to number
139 # month numbers start with 1 (January)
140 try:
141 mon = MONTHS_LOWER.index(mon.lower())+1
142 except ValueError:
143 # maybe it's already a number
144 try:
145 imon = int(mon)
146 except ValueError:
147 return None
148 if 1 <= imon <= 12:
149 mon = imon
150 else:
151 return None
152
153 # make sure clock elements are defined
154 if hr is None: hr = 0
155 if min is None: min = 0
156 if sec is None: sec = 0
157
158 yr = int(yr)
159 day = int(day)
160 hr = int(hr)
161 min = int(min)
162 sec = int(sec)
163
164 if yr < 1000:
165 # find "obvious" year
166 cur_yr = time.localtime(time.time())[0]
167 m = cur_yr % 100
168 tmp = yr
169 yr = yr + cur_yr - m
170 m = m - tmp
171 if abs(m) > 50:
172 if m > 0: yr = yr + 100
173 else: yr = yr - 100
174
175 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
176 t = _timegm((yr, mon, day, hr, min, sec, tz))
177
178 if t is not None:
179 # adjust time using timezone string, to get absolute time since epoch
180 if tz is None:
181 tz = "UTC"
182 tz = tz.upper()
183 offset = offset_from_tz_string(tz)
184 if offset is None:
185 return None
186 t = t - offset
187
188 return t
189
190STRICT_DATE_RE = re.compile(
191 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
192 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
193WEEKDAY_RE = re.compile(
194 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
195LOOSE_HTTP_DATE_RE = re.compile(
196 r"""^
197 (\d\d?) # day
198 (?:\s+|[-\/])
199 (\w+) # month
200 (?:\s+|[-\/])
201 (\d+) # year
202 (?:
203 (?:\s+|:) # separator before clock
204 (\d\d?):(\d\d) # hour:min
205 (?::(\d\d))? # optional seconds
206 )? # optional clock
207 \s*
208 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
209 \s*
210 (?:\(\w+\))? # ASCII representation of timezone in parens.
211 \s*$""", re.X)
212def http2time(text):
213 """Returns time in seconds since epoch of time represented by a string.
214
215 Return value is an integer.
216
217 None is returned if the format of str is unrecognized, the time is outside
218 the representable range, or the timezone string is not recognized. If the
219 string contains no timezone, UTC is assumed.
220
221 The timezone in the string may be numerical (like "-0800" or "+0100") or a
222 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
223 timezone strings equivalent to UTC (zero offset) are known to the function.
224
225 The function loosely parses the following formats:
226
227 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
228 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
229 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
230 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
231 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
232 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
233
234 The parser ignores leading and trailing whitespace. The time may be
235 absent.
236
237 If the year is given with only 2 digits, the function will select the
238 century that makes the year closest to the current date.
239
240 """
241 # fast exit for strictly conforming string
242 m = STRICT_DATE_RE.search(text)
243 if m:
244 g = m.groups()
245 mon = MONTHS_LOWER.index(g[1].lower()) + 1
246 tt = (int(g[2]), mon, int(g[0]),
247 int(g[3]), int(g[4]), float(g[5]))
248 return _timegm(tt)
249
250 # No, we need some messy parsing...
251
252 # clean up
253 text = text.lstrip()
254 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
255
256 # tz is time zone specifier string
257 day, mon, yr, hr, min, sec, tz = [None]*7
258
259 # loose regexp parse
260 m = LOOSE_HTTP_DATE_RE.search(text)
261 if m is not None:
262 day, mon, yr, hr, min, sec, tz = m.groups()
263 else:
264 return None # bad format
265
266 return _str2time(day, mon, yr, hr, min, sec, tz)
267
268ISO_DATE_RE = re.compile(
269 """^
270 (\d{4}) # year
271 [-\/]?
272 (\d\d?) # numerical month
273 [-\/]?
274 (\d\d?) # day
275 (?:
276 (?:\s+|[-:Tt]) # separator before clock
277 (\d\d?):?(\d\d) # hour:min
278 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
279 )? # optional clock
280 \s*
281 ([-+]?\d\d?:?(:?\d\d)?
282 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
283 \s*$""", re.X)
284def iso2time(text):
285 """
286 As for http2time, but parses the ISO 8601 formats:
287
288 1994-02-03 14:15:29 -0100 -- ISO 8601 format
289 1994-02-03 14:15:29 -- zone is optional
290 1994-02-03 -- only date
291 1994-02-03T14:15:29 -- Use T as separator
292 19940203T141529Z -- ISO 8601 compact format
293 19940203 -- only date
294
295 """
296 # clean up
297 text = text.lstrip()
298
299 # tz is time zone specifier string
300 day, mon, yr, hr, min, sec, tz = [None]*7
301
302 # loose regexp parse
303 m = ISO_DATE_RE.search(text)
304 if m is not None:
305 # XXX there's an extra bit of the timezone I'm ignoring here: is
306 # this the right thing to do?
307 yr, mon, day, hr, min, sec, tz, _ = m.groups()
308 else:
309 return None # bad format
310
311 return _str2time(day, mon, yr, hr, min, sec, tz)
312
313
314# Header parsing
315# -----------------------------------------------------------------------------
316
317def unmatched(match):
318 """Return unmatched part of re.Match object."""
319 start, end = match.span(0)
320 return match.string[:start]+match.string[end:]
321
322HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
323HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
324HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
325HEADER_ESCAPE_RE = re.compile(r"\\(.)")
326def split_header_words(header_values):
327 r"""Parse header values into a list of lists containing key,value pairs.
328
329 The function knows how to deal with ",", ";" and "=" as well as quoted
330 values after "=". A list of space separated tokens are parsed as if they
331 were separated by ";".
332
333 If the header_values passed as argument contains multiple values, then they
334 are treated as if they were a single value separated by comma ",".
335
336 This means that this function is useful for parsing header fields that
337 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
338 the requirement for tokens).
339
340 headers = #header
341 header = (token | parameter) *( [";"] (token | parameter))
342
343 token = 1*<any CHAR except CTLs or separators>
344 separators = "(" | ")" | "<" | ">" | "@"
345 | "," | ";" | ":" | "\" | <">
346 | "/" | "[" | "]" | "?" | "="
347 | "{" | "}" | SP | HT
348
349 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
350 qdtext = <any TEXT except <">>
351 quoted-pair = "\" CHAR
352
353 parameter = attribute "=" value
354 attribute = token
355 value = token | quoted-string
356
357 Each header is represented by a list of key/value pairs. The value for a
358 simple token (not part of a parameter) is None. Syntactically incorrect
359 headers will not necessarily be parsed as you would want.
360
361 This is easier to describe with some examples:
362
363 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
364 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
365 >>> split_header_words(['text/html; charset="iso-8859-1"'])
366 [[('text/html', None), ('charset', 'iso-8859-1')]]
367 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
368 [[('Basic', None), ('realm', '"foobar"')]]
369
370 """
Raymond Hettingerf7153662005-02-07 14:16:21 +0000371 assert not isinstance(header_values, basestring)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000372 result = []
373 for text in header_values:
374 orig_text = text
375 pairs = []
376 while text:
377 m = HEADER_TOKEN_RE.search(text)
378 if m:
379 text = unmatched(m)
380 name = m.group(1)
381 m = HEADER_QUOTED_VALUE_RE.search(text)
382 if m: # quoted value
383 text = unmatched(m)
384 value = m.group(1)
385 value = HEADER_ESCAPE_RE.sub(r"\1", value)
386 else:
387 m = HEADER_VALUE_RE.search(text)
388 if m: # unquoted value
389 text = unmatched(m)
390 value = m.group(1)
391 value = value.rstrip()
392 else:
393 # no value, a lone token
394 value = None
395 pairs.append((name, value))
396 elif text.lstrip().startswith(","):
397 # concatenated headers, as per RFC 2616 section 4.2
398 text = text.lstrip()[1:]
399 if pairs: result.append(pairs)
400 pairs = []
401 else:
402 # skip junk
403 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
404 assert nr_junk_chars > 0, (
405 "split_header_words bug: '%s', '%s', %s" %
406 (orig_text, text, pairs))
407 text = non_junk
408 if pairs: result.append(pairs)
409 return result
410
411HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
412def join_header_words(lists):
413 """Do the inverse (almost) of the conversion done by split_header_words.
414
415 Takes a list of lists of (key, value) pairs and produces a single header
416 value. Attribute values are quoted if needed.
417
418 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
419 'text/plain; charset="iso-8859/1"'
420 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
421 'text/plain, charset="iso-8859/1"'
422
423 """
424 headers = []
425 for pairs in lists:
426 attr = []
427 for k, v in pairs:
428 if v is not None:
429 if not re.search(r"^\w+$", v):
430 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
431 v = '"%s"' % v
432 k = "%s=%s" % (k, v)
433 attr.append(k)
434 if attr: headers.append("; ".join(attr))
435 return ", ".join(headers)
436
Georg Brandla19baf52010-05-22 11:31:16 +0000437def _strip_quotes(text):
Georg Brandl5d0ca2c2010-05-22 11:29:19 +0000438 if text.startswith('"'):
439 text = text[1:]
440 if text.endswith('"'):
441 text = text[:-1]
442 return text
443
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000444def parse_ns_headers(ns_headers):
445 """Ad-hoc parser for Netscape protocol cookie-attributes.
446
447 The old Netscape cookie format for Set-Cookie can for instance contain
448 an unquoted "," in the expires field, so we have to use this ad-hoc
449 parser instead of split_header_words.
450
451 XXX This may not make the best possible effort to parse all the crap
452 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
453 parser is probably better, so could do worse than following that if
454 this ever gives any trouble.
455
456 Currently, this is also used for parsing RFC 2109 cookies.
457
458 """
459 known_attrs = ("expires", "domain", "path", "secure",
460 # RFC 2109 attrs (may turn up in Netscape cookies, too)
Georg Brandl5d0ca2c2010-05-22 11:29:19 +0000461 "version", "port", "max-age")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000462
463 result = []
464 for ns_header in ns_headers:
465 pairs = []
466 version_set = False
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000467 for ii, param in enumerate(re.split(r";\s*", ns_header)):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000468 param = param.rstrip()
469 if param == "": continue
470 if "=" not in param:
Martin v. Löwisc5574e82005-03-03 10:57:37 +0000471 k, v = param, None
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000472 else:
473 k, v = re.split(r"\s*=\s*", param, 1)
474 k = k.lstrip()
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000475 if ii != 0:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000476 lc = k.lower()
477 if lc in known_attrs:
478 k = lc
479 if k == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000480 # This is an RFC 2109 cookie.
Georg Brandla19baf52010-05-22 11:31:16 +0000481 v = _strip_quotes(v)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000482 version_set = True
483 if k == "expires":
484 # convert expires date to seconds since epoch
Georg Brandla19baf52010-05-22 11:31:16 +0000485 v = http2time(_strip_quotes(v)) # None if invalid
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000486 pairs.append((k, v))
487
488 if pairs:
489 if not version_set:
490 pairs.append(("version", "0"))
491 result.append(pairs)
492
493 return result
494
495
496IPV4_RE = re.compile(r"\.\d+$")
497def is_HDN(text):
498 """Return True if text is a host domain name."""
499 # XXX
500 # This may well be wrong. Which RFC is HDN defined in, if any (for
501 # the purposes of RFC 2965)?
502 # For the current implementation, what about IPv6? Remember to look
503 # at other uses of IPV4_RE also, if change this.
504 if IPV4_RE.search(text):
505 return False
506 if text == "":
507 return False
508 if text[0] == "." or text[-1] == ".":
509 return False
510 return True
511
512def domain_match(A, B):
513 """Return True if domain A domain-matches domain B, according to RFC 2965.
514
515 A and B may be host domain names or IP addresses.
516
517 RFC 2965, section 1:
518
519 Host names can be specified either as an IP address or a HDN string.
520 Sometimes we compare one host name with another. (Such comparisons SHALL
521 be case-insensitive.) Host A's name domain-matches host B's if
522
523 * their host name strings string-compare equal; or
524
525 * A is a HDN string and has the form NB, where N is a non-empty
526 name string, B has the form .B', and B' is a HDN string. (So,
527 x.y.com domain-matches .Y.com but not Y.com.)
528
529 Note that domain-match is not a commutative operation: a.b.c.com
530 domain-matches .c.com, but not the reverse.
531
532 """
533 # Note that, if A or B are IP addresses, the only relevant part of the
534 # definition of the domain-match algorithm is the direct string-compare.
535 A = A.lower()
536 B = B.lower()
537 if A == B:
538 return True
539 if not is_HDN(A):
540 return False
541 i = A.rfind(B)
542 if i == -1 or i == 0:
543 # A does not have form NB, or N is the empty string
544 return False
545 if not B.startswith("."):
546 return False
547 if not is_HDN(B[1:]):
548 return False
549 return True
550
551def liberal_is_HDN(text):
552 """Return True if text is a sort-of-like a host domain name.
553
554 For accepting/blocking domains.
555
556 """
557 if IPV4_RE.search(text):
558 return False
559 return True
560
561def user_domain_match(A, B):
562 """For blocking/accepting domains.
563
564 A and B may be host domain names or IP addresses.
565
566 """
567 A = A.lower()
568 B = B.lower()
569 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
570 if A == B:
571 # equal IP addresses
572 return True
573 return False
574 initial_dot = B.startswith(".")
575 if initial_dot and A.endswith(B):
576 return True
577 if not initial_dot and A == B:
578 return True
579 return False
580
581cut_port_re = re.compile(r":\d+$")
582def request_host(request):
583 """Return request-host, as defined by RFC 2965.
584
585 Variation from RFC: returned value is lowercased, for convenient
586 comparison.
587
588 """
589 url = request.get_full_url()
590 host = urlparse.urlparse(url)[1]
591 if host == "":
592 host = request.get_header("Host", "")
593
594 # remove port, if present
595 host = cut_port_re.sub("", host, 1)
596 return host.lower()
597
598def eff_request_host(request):
599 """Return a tuple (request-host, effective request-host name).
600
601 As defined by RFC 2965, except both are lowercased.
602
603 """
604 erhn = req_host = request_host(request)
605 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
606 erhn = req_host + ".local"
607 return req_host, erhn
608
609def request_path(request):
610 """request-URI, as defined by RFC 2965."""
611 url = request.get_full_url()
612 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
613 #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
614 path, parameters, query, frag = urlparse.urlparse(url)[2:]
615 if parameters:
616 path = "%s;%s" % (path, parameters)
617 path = escape_path(path)
618 req_path = urlparse.urlunparse(("", "", path, "", query, frag))
619 if not req_path.startswith("/"):
620 # fix bad RFC 2396 absoluteURI
621 req_path = "/"+req_path
622 return req_path
623
624def request_port(request):
625 host = request.get_host()
626 i = host.find(':')
627 if i >= 0:
628 port = host[i+1:]
629 try:
630 int(port)
631 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000632 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000633 return None
634 else:
635 port = DEFAULT_HTTP_PORT
636 return port
637
638# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
639# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
640HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
641ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
642def uppercase_escaped_char(match):
643 return "%%%s" % match.group(1).upper()
644def escape_path(path):
645 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
646 # There's no knowing what character encoding was used to create URLs
647 # containing %-escapes, but since we have to pick one to escape invalid
648 # path characters, we pick UTF-8, as recommended in the HTML 4.0
649 # specification:
650 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
651 # And here, kind of: draft-fielding-uri-rfc2396bis-03
652 # (And in draft IRI specification: draft-duerst-iri-05)
653 # (And here, for new URI schemes: RFC 2718)
Neal Norwitz2fa0b9d2004-10-17 16:23:52 +0000654 if isinstance(path, unicode):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000655 path = path.encode("utf-8")
656 path = urllib.quote(path, HTTP_PATH_SAFE)
657 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
658 return path
659
660def reach(h):
661 """Return reach of host h, as defined by RFC 2965, section 1.
662
663 The reach R of a host name H is defined as follows:
664
665 * If
666
667 - H is the host domain name of a host; and,
668
669 - H has the form A.B; and
670
671 - A has no embedded (that is, interior) dots; and
672
673 - B has at least one embedded dot, or B is the string "local".
674 then the reach of H is .B.
675
676 * Otherwise, the reach of H is H.
677
678 >>> reach("www.acme.com")
679 '.acme.com'
680 >>> reach("acme.com")
681 'acme.com'
682 >>> reach("acme.local")
683 '.local'
684
685 """
686 i = h.find(".")
687 if i >= 0:
688 #a = h[:i] # this line is only here to show what a is
689 b = h[i+1:]
690 i = b.find(".")
691 if is_HDN(h) and (i >= 0 or b == "local"):
692 return "."+b
693 return h
694
695def is_third_party(request):
696 """
697
698 RFC 2965, section 3.3.6:
699
700 An unverifiable transaction is to a third-party host if its request-
701 host U does not domain-match the reach R of the request-host O in the
702 origin transaction.
703
704 """
705 req_host = request_host(request)
706 if not domain_match(req_host, reach(request.get_origin_req_host())):
707 return True
708 else:
709 return False
710
711
712class Cookie:
713 """HTTP Cookie.
714
715 This class represents both Netscape and RFC 2965 cookies.
716
717 This is deliberately a very simple class. It just holds attributes. It's
718 possible to construct Cookie instances that don't comply with the cookie
719 standards. CookieJar.make_cookies is the factory function for Cookie
720 objects -- it deals with cookie parsing, supplying defaults, and
721 normalising to the representation used in this class. CookiePolicy is
722 responsible for checking them to see whether they should be accepted from
723 and returned to the server.
724
725 Note that the port may be present in the headers, but unspecified ("Port"
726 rather than"Port=80", for example); if this is the case, port is None.
727
728 """
729
730 def __init__(self, version, name, value,
731 port, port_specified,
732 domain, domain_specified, domain_initial_dot,
733 path, path_specified,
734 secure,
735 expires,
736 discard,
737 comment,
738 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000739 rest,
740 rfc2109=False,
741 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000742
743 if version is not None: version = int(version)
744 if expires is not None: expires = int(expires)
745 if port is None and port_specified is True:
746 raise ValueError("if port is None, port_specified must be false")
747
748 self.version = version
749 self.name = name
750 self.value = value
751 self.port = port
752 self.port_specified = port_specified
753 # normalise case, as per RFC 2965 section 3.3.3
754 self.domain = domain.lower()
755 self.domain_specified = domain_specified
756 # Sigh. We need to know whether the domain given in the
757 # cookie-attribute had an initial dot, in order to follow RFC 2965
758 # (as clarified in draft errata). Needed for the returned $Domain
759 # value.
760 self.domain_initial_dot = domain_initial_dot
761 self.path = path
762 self.path_specified = path_specified
763 self.secure = secure
764 self.expires = expires
765 self.discard = discard
766 self.comment = comment
767 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000768 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000769
770 self._rest = copy.copy(rest)
771
772 def has_nonstandard_attr(self, name):
773 return name in self._rest
774 def get_nonstandard_attr(self, name, default=None):
775 return self._rest.get(name, default)
776 def set_nonstandard_attr(self, name, value):
777 self._rest[name] = value
778
779 def is_expired(self, now=None):
780 if now is None: now = time.time()
781 if (self.expires is not None) and (self.expires <= now):
782 return True
783 return False
784
785 def __str__(self):
786 if self.port is None: p = ""
787 else: p = ":"+self.port
788 limit = self.domain + p + self.path
789 if self.value is not None:
790 namevalue = "%s=%s" % (self.name, self.value)
791 else:
792 namevalue = self.name
793 return "<Cookie %s for %s>" % (namevalue, limit)
794
795 def __repr__(self):
796 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000797 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000798 "port", "port_specified",
799 "domain", "domain_specified", "domain_initial_dot",
800 "path", "path_specified",
801 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000802 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000803 attr = getattr(self, name)
804 args.append("%s=%s" % (name, repr(attr)))
805 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000806 args.append("rfc2109=%s" % repr(self.rfc2109))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000807 return "Cookie(%s)" % ", ".join(args)
808
809
810class CookiePolicy:
811 """Defines which cookies get accepted from and returned to server.
812
813 May also modify cookies, though this is probably a bad idea.
814
815 The subclass DefaultCookiePolicy defines the standard rules for Netscape
816 and RFC 2965 cookies -- override that if you want a customised policy.
817
818 """
819 def set_ok(self, cookie, request):
820 """Return true if (and only if) cookie should be accepted from server.
821
822 Currently, pre-expired cookies never get this far -- the CookieJar
823 class deletes such cookies itself.
824
825 """
826 raise NotImplementedError()
827
828 def return_ok(self, cookie, request):
829 """Return true if (and only if) cookie should be returned to server."""
830 raise NotImplementedError()
831
832 def domain_return_ok(self, domain, request):
833 """Return false if cookies should not be returned, given cookie domain.
834 """
835 return True
836
837 def path_return_ok(self, path, request):
838 """Return false if cookies should not be returned, given cookie path.
839 """
840 return True
841
842
843class DefaultCookiePolicy(CookiePolicy):
844 """Implements the standard rules for accepting and returning cookies."""
845
846 DomainStrictNoDots = 1
847 DomainStrictNonDomain = 2
848 DomainRFC2965Match = 4
849
850 DomainLiberal = 0
851 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
852
853 def __init__(self,
854 blocked_domains=None, allowed_domains=None,
855 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000856 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000857 hide_cookie2=False,
858 strict_domain=False,
859 strict_rfc2965_unverifiable=True,
860 strict_ns_unverifiable=False,
861 strict_ns_domain=DomainLiberal,
862 strict_ns_set_initial_dollar=False,
863 strict_ns_set_path=False,
864 ):
865 """Constructor arguments should be passed as keyword arguments only."""
866 self.netscape = netscape
867 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000868 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000869 self.hide_cookie2 = hide_cookie2
870 self.strict_domain = strict_domain
871 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
872 self.strict_ns_unverifiable = strict_ns_unverifiable
873 self.strict_ns_domain = strict_ns_domain
874 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
875 self.strict_ns_set_path = strict_ns_set_path
876
877 if blocked_domains is not None:
878 self._blocked_domains = tuple(blocked_domains)
879 else:
880 self._blocked_domains = ()
881
882 if allowed_domains is not None:
883 allowed_domains = tuple(allowed_domains)
884 self._allowed_domains = allowed_domains
885
886 def blocked_domains(self):
887 """Return the sequence of blocked domains (as a tuple)."""
888 return self._blocked_domains
889 def set_blocked_domains(self, blocked_domains):
890 """Set the sequence of blocked domains."""
891 self._blocked_domains = tuple(blocked_domains)
892
893 def is_blocked(self, domain):
894 for blocked_domain in self._blocked_domains:
895 if user_domain_match(domain, blocked_domain):
896 return True
897 return False
898
899 def allowed_domains(self):
900 """Return None, or the sequence of allowed domains (as a tuple)."""
901 return self._allowed_domains
902 def set_allowed_domains(self, allowed_domains):
903 """Set the sequence of allowed domains, or None."""
904 if allowed_domains is not None:
905 allowed_domains = tuple(allowed_domains)
906 self._allowed_domains = allowed_domains
907
908 def is_not_allowed(self, domain):
909 if self._allowed_domains is None:
910 return False
911 for allowed_domain in self._allowed_domains:
912 if user_domain_match(domain, allowed_domain):
913 return False
914 return True
915
916 def set_ok(self, cookie, request):
917 """
918 If you override .set_ok(), be sure to call this method. If it returns
919 false, so should your subclass (assuming your subclass wants to be more
920 strict about which cookies to accept).
921
922 """
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000923 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000924
925 assert cookie.name is not None
926
927 for n in "version", "verifiability", "name", "path", "domain", "port":
928 fn_name = "set_ok_"+n
929 fn = getattr(self, fn_name)
930 if not fn(cookie, request):
931 return False
932
933 return True
934
935 def set_ok_version(self, cookie, request):
936 if cookie.version is None:
937 # Version is always set to 0 by parse_ns_headers if it's a Netscape
938 # cookie, so this must be an invalid RFC 2965 cookie.
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000939 _debug(" Set-Cookie2 without version attribute (%s=%s)",
940 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000941 return False
942 if cookie.version > 0 and not self.rfc2965:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000943 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000944 return False
945 elif cookie.version == 0 and not self.netscape:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000946 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947 return False
948 return True
949
950 def set_ok_verifiability(self, cookie, request):
951 if request.is_unverifiable() and is_third_party(request):
952 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000953 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000954 "unverifiable transaction")
955 return False
956 elif cookie.version == 0 and self.strict_ns_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000957 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000958 "unverifiable transaction")
959 return False
960 return True
961
962 def set_ok_name(self, cookie, request):
963 # Try and stop servers setting V0 cookies designed to hack other
964 # servers that know both V0 and V1 protocols.
965 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
966 cookie.name.startswith("$")):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000967 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000968 return False
969 return True
970
971 def set_ok_path(self, cookie, request):
972 if cookie.path_specified:
973 req_path = request_path(request)
974 if ((cookie.version > 0 or
975 (cookie.version == 0 and self.strict_ns_set_path)) and
976 not req_path.startswith(cookie.path)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000977 _debug(" path attribute %s is not a prefix of request "
978 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000979 return False
980 return True
981
982 def set_ok_domain(self, cookie, request):
983 if self.is_blocked(cookie.domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000984 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000985 return False
986 if self.is_not_allowed(cookie.domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000987 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000988 return False
989 if cookie.domain_specified:
990 req_host, erhn = eff_request_host(request)
991 domain = cookie.domain
992 if self.strict_domain and (domain.count(".") >= 2):
Georg Brandle58334a2006-05-07 20:44:34 +0000993 # XXX This should probably be compared with the Konqueror
994 # (kcookiejar.cpp) and Mozilla implementations, but it's a
995 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000996 i = domain.rfind(".")
997 j = domain.rfind(".", 0, i)
998 if j == 0: # domain like .foo.bar
999 tld = domain[i+1:]
1000 sld = domain[j+1:i]
Georg Brandle58334a2006-05-07 20:44:34 +00001001 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1002 "gov", "mil", "int", "aero", "biz", "cat", "coop",
1003 "info", "jobs", "mobi", "museum", "name", "pro",
1004 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001005 # domain like .co.uk
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001006 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001007 return False
1008 if domain.startswith("."):
1009 undotted_domain = domain[1:]
1010 else:
1011 undotted_domain = domain
1012 embedded_dots = (undotted_domain.find(".") >= 0)
1013 if not embedded_dots and domain != ".local":
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001014 _debug(" non-local domain %s contains no embedded dot",
1015 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001016 return False
1017 if cookie.version == 0:
1018 if (not erhn.endswith(domain) and
1019 (not erhn.startswith(".") and
1020 not ("."+erhn).endswith(domain))):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001021 _debug(" effective request-host %s (even with added "
1022 "initial dot) does not end end with %s",
1023 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001024 return False
1025 if (cookie.version > 0 or
1026 (self.strict_ns_domain & self.DomainRFC2965Match)):
1027 if not domain_match(erhn, domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001028 _debug(" effective request-host %s does not domain-match "
1029 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001030 return False
1031 if (cookie.version > 0 or
1032 (self.strict_ns_domain & self.DomainStrictNoDots)):
1033 host_prefix = req_host[:-len(domain)]
1034 if (host_prefix.find(".") >= 0 and
1035 not IPV4_RE.search(req_host)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001036 _debug(" host prefix %s for domain %s contains a dot",
1037 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001038 return False
1039 return True
1040
1041 def set_ok_port(self, cookie, request):
1042 if cookie.port_specified:
1043 req_port = request_port(request)
1044 if req_port is None:
1045 req_port = "80"
1046 else:
1047 req_port = str(req_port)
1048 for p in cookie.port.split(","):
1049 try:
1050 int(p)
1051 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001052 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001053 return False
1054 if p == req_port:
1055 break
1056 else:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001057 _debug(" request port (%s) not found in %s",
1058 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001059 return False
1060 return True
1061
1062 def return_ok(self, cookie, request):
1063 """
1064 If you override .return_ok(), be sure to call this method. If it
1065 returns false, so should your subclass (assuming your subclass wants to
1066 be more strict about which cookies to return).
1067
1068 """
1069 # Path has already been checked by .path_return_ok(), and domain
1070 # blocking done by .domain_return_ok().
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001071 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001072
1073 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1074 fn_name = "return_ok_"+n
1075 fn = getattr(self, fn_name)
1076 if not fn(cookie, request):
1077 return False
1078 return True
1079
1080 def return_ok_version(self, cookie, request):
1081 if cookie.version > 0 and not self.rfc2965:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001082 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001083 return False
1084 elif cookie.version == 0 and not self.netscape:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001085 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001086 return False
1087 return True
1088
1089 def return_ok_verifiability(self, cookie, request):
1090 if request.is_unverifiable() and is_third_party(request):
1091 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001092 _debug(" third-party RFC 2965 cookie during unverifiable "
1093 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001094 return False
1095 elif cookie.version == 0 and self.strict_ns_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001096 _debug(" third-party Netscape cookie during unverifiable "
1097 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001098 return False
1099 return True
1100
1101 def return_ok_secure(self, cookie, request):
1102 if cookie.secure and request.get_type() != "https":
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001103 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001104 return False
1105 return True
1106
1107 def return_ok_expires(self, cookie, request):
1108 if cookie.is_expired(self._now):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001109 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001110 return False
1111 return True
1112
1113 def return_ok_port(self, cookie, request):
1114 if cookie.port:
1115 req_port = request_port(request)
1116 if req_port is None:
1117 req_port = "80"
1118 for p in cookie.port.split(","):
1119 if p == req_port:
1120 break
1121 else:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001122 _debug(" request port %s does not match cookie port %s",
1123 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001124 return False
1125 return True
1126
1127 def return_ok_domain(self, cookie, request):
1128 req_host, erhn = eff_request_host(request)
1129 domain = cookie.domain
1130
1131 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1132 if (cookie.version == 0 and
1133 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1134 not cookie.domain_specified and domain != erhn):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001135 _debug(" cookie with unspecified domain does not string-compare "
1136 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001137 return False
1138
1139 if cookie.version > 0 and not domain_match(erhn, domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001140 _debug(" effective request-host name %s does not domain-match "
1141 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001142 return False
1143 if cookie.version == 0 and not ("."+erhn).endswith(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001144 _debug(" request-host %s does not match Netscape cookie domain "
1145 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001146 return False
1147 return True
1148
1149 def domain_return_ok(self, domain, request):
1150 # Liberal check of. This is here as an optimization to avoid
1151 # having to load lots of MSIE cookie files unless necessary.
1152 req_host, erhn = eff_request_host(request)
1153 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001154 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001155 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001156 erhn = "."+erhn
1157 if not (req_host.endswith(domain) or erhn.endswith(domain)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001158 #_debug(" request domain %s does not match cookie domain %s",
1159 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001160 return False
1161
1162 if self.is_blocked(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001163 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001164 return False
1165 if self.is_not_allowed(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001166 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001167 return False
1168
1169 return True
1170
1171 def path_return_ok(self, path, request):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001172 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001173 req_path = request_path(request)
1174 if not req_path.startswith(path):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001175 _debug(" %s does not path-match %s", req_path, path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001176 return False
1177 return True
1178
1179
1180def vals_sorted_by_key(adict):
1181 keys = adict.keys()
1182 keys.sort()
1183 return map(adict.get, keys)
1184
1185def deepvalues(mapping):
1186 """Iterates over nested mapping, depth-first, in sorted order by key."""
1187 values = vals_sorted_by_key(mapping)
1188 for obj in values:
1189 mapping = False
1190 try:
1191 obj.items
1192 except AttributeError:
1193 pass
1194 else:
1195 mapping = True
1196 for subobj in deepvalues(obj):
1197 yield subobj
1198 if not mapping:
1199 yield obj
1200
1201
1202# Used as second parameter to dict.get() method, to distinguish absent
1203# dict key from one with a None value.
1204class Absent: pass
1205
1206class CookieJar:
1207 """Collection of HTTP cookies.
1208
1209 You may not need to know about this class: try
1210 urllib2.build_opener(HTTPCookieProcessor).open(url).
1211
1212 """
1213
1214 non_word_re = re.compile(r"\W")
1215 quote_re = re.compile(r"([\"\\])")
1216 strict_domain_re = re.compile(r"\.?[^.]*")
1217 domain_re = re.compile(r"[^.]*")
1218 dots_re = re.compile(r"^\.+")
1219
1220 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1221
1222 def __init__(self, policy=None):
1223 if policy is None:
1224 policy = DefaultCookiePolicy()
1225 self._policy = policy
1226
1227 self._cookies_lock = _threading.RLock()
1228 self._cookies = {}
1229
1230 def set_policy(self, policy):
1231 self._policy = policy
1232
1233 def _cookies_for_domain(self, domain, request):
1234 cookies = []
1235 if not self._policy.domain_return_ok(domain, request):
1236 return []
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001237 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001238 cookies_by_path = self._cookies[domain]
1239 for path in cookies_by_path.keys():
1240 if not self._policy.path_return_ok(path, request):
1241 continue
1242 cookies_by_name = cookies_by_path[path]
1243 for cookie in cookies_by_name.values():
1244 if not self._policy.return_ok(cookie, request):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001245 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001246 continue
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001247 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001248 cookies.append(cookie)
1249 return cookies
1250
1251 def _cookies_for_request(self, request):
1252 """Return a list of cookies to be returned to server."""
1253 cookies = []
1254 for domain in self._cookies.keys():
1255 cookies.extend(self._cookies_for_domain(domain, request))
1256 return cookies
1257
1258 def _cookie_attrs(self, cookies):
1259 """Return a list of cookie-attributes to be returned to server.
1260
1261 like ['foo="bar"; $Path="/"', ...]
1262
1263 The $Version attribute is also added when appropriate (currently only
1264 once per request).
1265
1266 """
1267 # add cookies in order of most specific (ie. longest) path first
Brett Cannon52f03c52008-08-03 22:34:25 +00001268 cookies.sort(key=lambda arg: len(arg.path), reverse=True)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001269
1270 version_set = False
1271
1272 attrs = []
1273 for cookie in cookies:
1274 # set version of Cookie header
1275 # XXX
1276 # What should it be if multiple matching Set-Cookie headers have
1277 # different versions themselves?
1278 # Answer: there is no answer; was supposed to be settled by
1279 # RFC 2965 errata, but that may never appear...
1280 version = cookie.version
1281 if not version_set:
1282 version_set = True
1283 if version > 0:
1284 attrs.append("$Version=%s" % version)
1285
1286 # quote cookie value if necessary
1287 # (not for Netscape protocol, which already has any quotes
1288 # intact, due to the poorly-specified Netscape Cookie: syntax)
1289 if ((cookie.value is not None) and
1290 self.non_word_re.search(cookie.value) and version > 0):
1291 value = self.quote_re.sub(r"\\\1", cookie.value)
1292 else:
1293 value = cookie.value
1294
1295 # add cookie-attributes to be returned in Cookie header
1296 if cookie.value is None:
1297 attrs.append(cookie.name)
1298 else:
1299 attrs.append("%s=%s" % (cookie.name, value))
1300 if version > 0:
1301 if cookie.path_specified:
1302 attrs.append('$Path="%s"' % cookie.path)
1303 if cookie.domain.startswith("."):
1304 domain = cookie.domain
1305 if (not cookie.domain_initial_dot and
1306 domain.startswith(".")):
1307 domain = domain[1:]
1308 attrs.append('$Domain="%s"' % domain)
1309 if cookie.port is not None:
1310 p = "$Port"
1311 if cookie.port_specified:
1312 p = p + ('="%s"' % cookie.port)
1313 attrs.append(p)
1314
1315 return attrs
1316
1317 def add_cookie_header(self, request):
1318 """Add correct Cookie: header to request (urllib2.Request object).
1319
1320 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1321
1322 """
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001323 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001324 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001325 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001326
Tim Petersf733abb2007-01-30 03:03:46 +00001327 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001328
Tim Petersf733abb2007-01-30 03:03:46 +00001329 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001330
Tim Petersf733abb2007-01-30 03:03:46 +00001331 attrs = self._cookie_attrs(cookies)
1332 if attrs:
1333 if not request.has_header("Cookie"):
1334 request.add_unredirected_header(
1335 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001336
Tim Petersf733abb2007-01-30 03:03:46 +00001337 # if necessary, advertise that we know RFC 2965
1338 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1339 not request.has_header("Cookie2")):
1340 for cookie in cookies:
1341 if cookie.version != 1:
1342 request.add_unredirected_header("Cookie2", '$Version="1"')
1343 break
1344
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001345 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001346 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001347
1348 self.clear_expired_cookies()
1349
1350 def _normalized_cookie_tuples(self, attrs_set):
1351 """Return list of tuples containing normalised cookie information.
1352
1353 attrs_set is the list of lists of key,value pairs extracted from
1354 the Set-Cookie or Set-Cookie2 headers.
1355
1356 Tuples are name, value, standard, rest, where name and value are the
1357 cookie name and value, standard is a dictionary containing the standard
1358 cookie-attributes (discard, secure, version, expires or max-age,
1359 domain, path and port) and rest is a dictionary containing the rest of
1360 the cookie-attributes.
1361
1362 """
1363 cookie_tuples = []
1364
1365 boolean_attrs = "discard", "secure"
1366 value_attrs = ("version",
1367 "expires", "max-age",
1368 "domain", "path", "port",
1369 "comment", "commenturl")
1370
1371 for cookie_attrs in attrs_set:
1372 name, value = cookie_attrs[0]
1373
1374 # Build dictionary of standard cookie-attributes (standard) and
1375 # dictionary of other cookie-attributes (rest).
1376
1377 # Note: expiry time is normalised to seconds since epoch. V0
1378 # cookies should have the Expires cookie-attribute, and V1 cookies
1379 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1380 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1381 # accept either (but prefer Max-Age).
1382 max_age_set = False
1383
1384 bad_cookie = False
1385
1386 standard = {}
1387 rest = {}
1388 for k, v in cookie_attrs[1:]:
1389 lc = k.lower()
1390 # don't lose case distinction for unknown fields
1391 if lc in value_attrs or lc in boolean_attrs:
1392 k = lc
1393 if k in boolean_attrs and v is None:
1394 # boolean cookie-attribute is present, but has no value
1395 # (like "discard", rather than "port=80")
1396 v = True
1397 if k in standard:
1398 # only first value is significant
1399 continue
1400 if k == "domain":
1401 if v is None:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001402 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001403 bad_cookie = True
1404 break
1405 # RFC 2965 section 3.3.3
1406 v = v.lower()
1407 if k == "expires":
1408 if max_age_set:
1409 # Prefer max-age to expires (like Mozilla)
1410 continue
1411 if v is None:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001412 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001413 "attribute: treating as session cookie")
1414 continue
1415 if k == "max-age":
1416 max_age_set = True
1417 try:
1418 v = int(v)
1419 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001420 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001421 "max-age attribute")
1422 bad_cookie = True
1423 break
1424 # convert RFC 2965 Max-Age to seconds since epoch
1425 # XXX Strictly you're supposed to follow RFC 2616
1426 # age-calculation rules. Remember that zero Max-Age is a
1427 # is a request to discard (old and new) cookie, though.
1428 k = "expires"
1429 v = self._now + v
1430 if (k in value_attrs) or (k in boolean_attrs):
1431 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001432 k not in ("port", "comment", "commenturl")):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001433 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001434 bad_cookie = True
1435 break
1436 standard[k] = v
1437 else:
1438 rest[k] = v
1439
1440 if bad_cookie:
1441 continue
1442
1443 cookie_tuples.append((name, value, standard, rest))
1444
1445 return cookie_tuples
1446
1447 def _cookie_from_cookie_tuple(self, tup, request):
1448 # standard is dict of standard cookie-attributes, rest is dict of the
1449 # rest of them
1450 name, value, standard, rest = tup
1451
1452 domain = standard.get("domain", Absent)
1453 path = standard.get("path", Absent)
1454 port = standard.get("port", Absent)
1455 expires = standard.get("expires", Absent)
1456
1457 # set the easy defaults
1458 version = standard.get("version", None)
Georg Brandl5d0ca2c2010-05-22 11:29:19 +00001459 if version is not None:
1460 try:
1461 version = int(version)
1462 except ValueError:
1463 return None # invalid version, ignore cookie
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001464 secure = standard.get("secure", False)
1465 # (discard is also set if expires is Absent)
1466 discard = standard.get("discard", False)
1467 comment = standard.get("comment", None)
1468 comment_url = standard.get("commenturl", None)
1469
1470 # set default path
1471 if path is not Absent and path != "":
1472 path_specified = True
1473 path = escape_path(path)
1474 else:
1475 path_specified = False
1476 path = request_path(request)
1477 i = path.rfind("/")
1478 if i != -1:
1479 if version == 0:
1480 # Netscape spec parts company from reality here
1481 path = path[:i]
1482 else:
1483 path = path[:i+1]
1484 if len(path) == 0: path = "/"
1485
1486 # set default domain
1487 domain_specified = domain is not Absent
1488 # but first we have to remember whether it starts with a dot
1489 domain_initial_dot = False
1490 if domain_specified:
1491 domain_initial_dot = bool(domain.startswith("."))
1492 if domain is Absent:
1493 req_host, erhn = eff_request_host(request)
1494 domain = erhn
1495 elif not domain.startswith("."):
1496 domain = "."+domain
1497
1498 # set default port
1499 port_specified = False
1500 if port is not Absent:
1501 if port is None:
1502 # Port attr present, but has no value: default to request port.
1503 # Cookie should then only be sent back on that port.
1504 port = request_port(request)
1505 else:
1506 port_specified = True
1507 port = re.sub(r"\s+", "", port)
1508 else:
1509 # No port attr present. Cookie can be sent back on any port.
1510 port = None
1511
1512 # set default expires and discard
1513 if expires is Absent:
1514 expires = None
1515 discard = True
1516 elif expires <= self._now:
1517 # Expiry date in past is request to delete cookie. This can't be
1518 # in DefaultCookiePolicy, because can't delete cookies there.
1519 try:
1520 self.clear(domain, path, name)
1521 except KeyError:
1522 pass
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001523 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1524 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001525 return None
1526
1527 return Cookie(version,
1528 name, value,
1529 port, port_specified,
1530 domain, domain_specified, domain_initial_dot,
1531 path, path_specified,
1532 secure,
1533 expires,
1534 discard,
1535 comment,
1536 comment_url,
1537 rest)
1538
1539 def _cookies_from_attrs_set(self, attrs_set, request):
1540 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1541
1542 cookies = []
1543 for tup in cookie_tuples:
1544 cookie = self._cookie_from_cookie_tuple(tup, request)
1545 if cookie: cookies.append(cookie)
1546 return cookies
1547
Neal Norwitz71dad722005-12-23 21:43:48 +00001548 def _process_rfc2109_cookies(self, cookies):
1549 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1550 if rfc2109_as_ns is None:
1551 rfc2109_as_ns = not self._policy.rfc2965
1552 for cookie in cookies:
1553 if cookie.version == 1:
1554 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001555 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001556 # treat 2109 cookies as Netscape cookies rather than
1557 # as RFC2965 cookies
1558 cookie.version = 0
1559
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001560 def make_cookies(self, response, request):
1561 """Return sequence of Cookie objects extracted from response object."""
1562 # get cookie-attributes for RFC 2965 and Netscape protocols
1563 headers = response.info()
1564 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1565 ns_hdrs = headers.getheaders("Set-Cookie")
1566
1567 rfc2965 = self._policy.rfc2965
1568 netscape = self._policy.netscape
1569
1570 if ((not rfc2965_hdrs and not ns_hdrs) or
1571 (not ns_hdrs and not rfc2965) or
1572 (not rfc2965_hdrs and not netscape) or
1573 (not netscape and not rfc2965)):
1574 return [] # no relevant cookie headers: quick exit
1575
1576 try:
1577 cookies = self._cookies_from_attrs_set(
1578 split_header_words(rfc2965_hdrs), request)
Georg Brandle854e762006-05-08 17:48:01 +00001579 except Exception:
1580 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001581 cookies = []
1582
1583 if ns_hdrs and netscape:
1584 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001585 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001586 ns_cookies = self._cookies_from_attrs_set(
1587 parse_ns_headers(ns_hdrs), request)
Georg Brandle854e762006-05-08 17:48:01 +00001588 except Exception:
1589 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001590 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001591 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001592
1593 # Look for Netscape cookies (from Set-Cookie headers) that match
1594 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1595 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1596 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1597 # bundled in with the Netscape cookies for this purpose, which is
1598 # reasonable behaviour.
1599 if rfc2965:
1600 lookup = {}
1601 for cookie in cookies:
1602 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1603
1604 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1605 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1606 return key not in lookup
1607 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1608
1609 if ns_cookies:
1610 cookies.extend(ns_cookies)
1611
1612 return cookies
1613
1614 def set_cookie_if_ok(self, cookie, request):
1615 """Set a cookie if policy says it's OK to do so."""
1616 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001617 try:
1618 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001619
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001620 if self._policy.set_ok(cookie, request):
1621 self.set_cookie(cookie)
Tim Petersf733abb2007-01-30 03:03:46 +00001622
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001623
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001624 finally:
1625 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001626
1627 def set_cookie(self, cookie):
1628 """Set a cookie, without checking whether or not it should be set."""
1629 c = self._cookies
1630 self._cookies_lock.acquire()
1631 try:
1632 if cookie.domain not in c: c[cookie.domain] = {}
1633 c2 = c[cookie.domain]
1634 if cookie.path not in c2: c2[cookie.path] = {}
1635 c3 = c2[cookie.path]
1636 c3[cookie.name] = cookie
1637 finally:
1638 self._cookies_lock.release()
1639
1640 def extract_cookies(self, response, request):
1641 """Extract cookies from response, where allowable given the request."""
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001642 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001643 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001644 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001645 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001646
Tim Petersf733abb2007-01-30 03:03:46 +00001647 for cookie in self.make_cookies(response, request):
1648 if self._policy.set_ok(cookie, request):
1649 _debug(" setting cookie: %s", cookie)
1650 self.set_cookie(cookie)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001651 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001652 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001653
1654 def clear(self, domain=None, path=None, name=None):
1655 """Clear some cookies.
1656
1657 Invoking this method without arguments will clear all cookies. If
1658 given a single argument, only cookies belonging to that domain will be
1659 removed. If given two arguments, cookies belonging to the specified
1660 path within that domain are removed. If given three arguments, then
1661 the cookie with the specified name, path and domain is removed.
1662
1663 Raises KeyError if no matching cookie exists.
1664
1665 """
1666 if name is not None:
1667 if (domain is None) or (path is None):
1668 raise ValueError(
1669 "domain and path must be given to remove a cookie by name")
1670 del self._cookies[domain][path][name]
1671 elif path is not None:
1672 if domain is None:
1673 raise ValueError(
1674 "domain must be given to remove cookies by path")
1675 del self._cookies[domain][path]
1676 elif domain is not None:
1677 del self._cookies[domain]
1678 else:
1679 self._cookies = {}
1680
1681 def clear_session_cookies(self):
1682 """Discard all session cookies.
1683
1684 Note that the .save() method won't save session cookies anyway, unless
1685 you ask otherwise by passing a true ignore_discard argument.
1686
1687 """
1688 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001689 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001690 for cookie in self:
1691 if cookie.discard:
1692 self.clear(cookie.domain, cookie.path, cookie.name)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001693 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001694 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001695
1696 def clear_expired_cookies(self):
1697 """Discard all expired cookies.
1698
1699 You probably don't need to call this method: expired cookies are never
1700 sent back to the server (provided you're using DefaultCookiePolicy),
1701 this method is called by CookieJar itself every so often, and the
1702 .save() method won't save expired cookies anyway (unless you ask
1703 otherwise by passing a true ignore_expires argument).
1704
1705 """
1706 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001707 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001708 now = time.time()
1709 for cookie in self:
1710 if cookie.is_expired(now):
1711 self.clear(cookie.domain, cookie.path, cookie.name)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001712 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001713 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001714
1715 def __iter__(self):
1716 return deepvalues(self._cookies)
1717
1718 def __len__(self):
1719 """Return number of contained cookies."""
1720 i = 0
1721 for cookie in self: i = i + 1
1722 return i
1723
1724 def __repr__(self):
1725 r = []
1726 for cookie in self: r.append(repr(cookie))
1727 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1728
1729 def __str__(self):
1730 r = []
1731 for cookie in self: r.append(str(cookie))
1732 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1733
1734
Neal Norwitz3e7de592005-12-23 21:24:35 +00001735# derives from IOError for backwards-compatibility with Python 2.4.0
1736class LoadError(IOError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001737
1738class FileCookieJar(CookieJar):
1739 """CookieJar that can be loaded from and saved to a file."""
1740
1741 def __init__(self, filename=None, delayload=False, policy=None):
1742 """
1743 Cookies are NOT loaded from the named file until either the .load() or
1744 .revert() method is called.
1745
1746 """
1747 CookieJar.__init__(self, policy)
1748 if filename is not None:
1749 try:
1750 filename+""
1751 except:
1752 raise ValueError("filename must be string-like")
1753 self.filename = filename
1754 self.delayload = bool(delayload)
1755
1756 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1757 """Save cookies to a file."""
1758 raise NotImplementedError()
1759
1760 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1761 """Load cookies from a file."""
1762 if filename is None:
1763 if self.filename is not None: filename = self.filename
1764 else: raise ValueError(MISSING_FILENAME_TEXT)
1765
1766 f = open(filename)
1767 try:
1768 self._really_load(f, filename, ignore_discard, ignore_expires)
1769 finally:
1770 f.close()
1771
1772 def revert(self, filename=None,
1773 ignore_discard=False, ignore_expires=False):
1774 """Clear all cookies and reload cookies from a saved file.
1775
1776 Raises LoadError (or IOError) if reversion is not successful; the
1777 object's state will not be altered if this happens.
1778
1779 """
1780 if filename is None:
1781 if self.filename is not None: filename = self.filename
1782 else: raise ValueError(MISSING_FILENAME_TEXT)
1783
1784 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001785 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001786
Tim Petersf733abb2007-01-30 03:03:46 +00001787 old_state = copy.deepcopy(self._cookies)
1788 self._cookies = {}
1789 try:
1790 self.load(filename, ignore_discard, ignore_expires)
1791 except (LoadError, IOError):
1792 self._cookies = old_state
1793 raise
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001794
1795 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001796 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001797
1798from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
1799from _MozillaCookieJar import MozillaCookieJar