blob: 5b250905ec5375a28838e80dabee5f7836eaf995 [file] [log] [blame]
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001"""HTTP cookie handling for web clients.
2
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Georg Brandle854e762006-05-08 17:48:01 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Georg Brandle854e762006-05-08 17:48:01 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
Brett Cannon88f801d2008-08-18 00:46:22 +000029 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
30 'MozillaCookieJar']
Georg Brandle854e762006-05-08 17:48:01 +000031
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000032import re, urlparse, copy, time, urllib
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000033try:
34 import threading as _threading
35except ImportError:
36 import dummy_threading as _threading
37import httplib # only for the default HTTP port
38from calendar import timegm
39
Neal Norwitzb678ce52006-05-18 06:51:46 +000040debug = False # set to True to enable debugging via the logging module
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000041logger = None
42
43def _debug(*args):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000044 if not debug:
45 return
Neal Norwitzb678ce52006-05-18 06:51:46 +000046 global logger
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000047 if not logger:
48 import logging
49 logger = logging.getLogger("cookielib")
50 return logger.debug(*args)
51
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000052
53DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
54MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
55 "instance initialised with one)")
56
Georg Brandle854e762006-05-08 17:48:01 +000057def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000058 # There are a few catch-all except: statements in this module, for
Georg Brandle854e762006-05-08 17:48:01 +000059 # catching input that's bad in unexpected ways. Warn if any
60 # exceptions are caught there.
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000061 import warnings, traceback, StringIO
62 f = StringIO.StringIO()
63 traceback.print_exc(None, f)
64 msg = f.getvalue()
65 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000066
67
68# Date/time conversion
69# -----------------------------------------------------------------------------
70
71EPOCH_YEAR = 1970
72def _timegm(tt):
73 year, month, mday, hour, min, sec = tt[:6]
74 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
75 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
76 return timegm(tt)
77 else:
78 return None
79
80DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
81MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
82 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
83MONTHS_LOWER = []
84for month in MONTHS: MONTHS_LOWER.append(month.lower())
85
86def time2isoz(t=None):
87 """Return a string representing time in seconds since epoch, t.
88
89 If the function is called without an argument, it will use the current
90 time.
91
92 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
93 representing Universal Time (UTC, aka GMT). An example of this format is:
94
95 1994-11-24 08:49:37Z
96
97 """
98 if t is None: t = time.time()
99 year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
100 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
101 year, mon, mday, hour, min, sec)
102
103def time2netscape(t=None):
104 """Return a string representing time in seconds since epoch, t.
105
106 If the function is called without an argument, it will use the current
107 time.
108
109 The format of the returned string is like this:
110
111 Wed, DD-Mon-YYYY HH:MM:SS GMT
112
113 """
114 if t is None: t = time.time()
115 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
116 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
117 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
118
119
120UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
121
122TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
123def offset_from_tz_string(tz):
124 offset = None
125 if tz in UTC_ZONES:
126 offset = 0
127 else:
128 m = TIMEZONE_RE.search(tz)
129 if m:
130 offset = 3600 * int(m.group(2))
131 if m.group(3):
132 offset = offset + 60 * int(m.group(3))
133 if m.group(1) == '-':
134 offset = -offset
135 return offset
136
137def _str2time(day, mon, yr, hr, min, sec, tz):
138 # translate month name to number
139 # month numbers start with 1 (January)
140 try:
141 mon = MONTHS_LOWER.index(mon.lower())+1
142 except ValueError:
143 # maybe it's already a number
144 try:
145 imon = int(mon)
146 except ValueError:
147 return None
148 if 1 <= imon <= 12:
149 mon = imon
150 else:
151 return None
152
153 # make sure clock elements are defined
154 if hr is None: hr = 0
155 if min is None: min = 0
156 if sec is None: sec = 0
157
158 yr = int(yr)
159 day = int(day)
160 hr = int(hr)
161 min = int(min)
162 sec = int(sec)
163
164 if yr < 1000:
165 # find "obvious" year
166 cur_yr = time.localtime(time.time())[0]
167 m = cur_yr % 100
168 tmp = yr
169 yr = yr + cur_yr - m
170 m = m - tmp
171 if abs(m) > 50:
172 if m > 0: yr = yr + 100
173 else: yr = yr - 100
174
175 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
176 t = _timegm((yr, mon, day, hr, min, sec, tz))
177
178 if t is not None:
179 # adjust time using timezone string, to get absolute time since epoch
180 if tz is None:
181 tz = "UTC"
182 tz = tz.upper()
183 offset = offset_from_tz_string(tz)
184 if offset is None:
185 return None
186 t = t - offset
187
188 return t
189
190STRICT_DATE_RE = re.compile(
191 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
192 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
193WEEKDAY_RE = re.compile(
194 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
195LOOSE_HTTP_DATE_RE = re.compile(
196 r"""^
197 (\d\d?) # day
198 (?:\s+|[-\/])
199 (\w+) # month
200 (?:\s+|[-\/])
201 (\d+) # year
202 (?:
203 (?:\s+|:) # separator before clock
204 (\d\d?):(\d\d) # hour:min
205 (?::(\d\d))? # optional seconds
206 )? # optional clock
207 \s*
208 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
209 \s*
210 (?:\(\w+\))? # ASCII representation of timezone in parens.
211 \s*$""", re.X)
212def http2time(text):
213 """Returns time in seconds since epoch of time represented by a string.
214
215 Return value is an integer.
216
217 None is returned if the format of str is unrecognized, the time is outside
218 the representable range, or the timezone string is not recognized. If the
219 string contains no timezone, UTC is assumed.
220
221 The timezone in the string may be numerical (like "-0800" or "+0100") or a
222 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
223 timezone strings equivalent to UTC (zero offset) are known to the function.
224
225 The function loosely parses the following formats:
226
227 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
228 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
229 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
230 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
231 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
232 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
233
234 The parser ignores leading and trailing whitespace. The time may be
235 absent.
236
237 If the year is given with only 2 digits, the function will select the
238 century that makes the year closest to the current date.
239
240 """
241 # fast exit for strictly conforming string
242 m = STRICT_DATE_RE.search(text)
243 if m:
244 g = m.groups()
245 mon = MONTHS_LOWER.index(g[1].lower()) + 1
246 tt = (int(g[2]), mon, int(g[0]),
247 int(g[3]), int(g[4]), float(g[5]))
248 return _timegm(tt)
249
250 # No, we need some messy parsing...
251
252 # clean up
253 text = text.lstrip()
254 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
255
256 # tz is time zone specifier string
257 day, mon, yr, hr, min, sec, tz = [None]*7
258
259 # loose regexp parse
260 m = LOOSE_HTTP_DATE_RE.search(text)
261 if m is not None:
262 day, mon, yr, hr, min, sec, tz = m.groups()
263 else:
264 return None # bad format
265
266 return _str2time(day, mon, yr, hr, min, sec, tz)
267
268ISO_DATE_RE = re.compile(
269 """^
270 (\d{4}) # year
271 [-\/]?
272 (\d\d?) # numerical month
273 [-\/]?
274 (\d\d?) # day
275 (?:
276 (?:\s+|[-:Tt]) # separator before clock
277 (\d\d?):?(\d\d) # hour:min
278 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
279 )? # optional clock
280 \s*
281 ([-+]?\d\d?:?(:?\d\d)?
282 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
283 \s*$""", re.X)
284def iso2time(text):
285 """
286 As for http2time, but parses the ISO 8601 formats:
287
288 1994-02-03 14:15:29 -0100 -- ISO 8601 format
289 1994-02-03 14:15:29 -- zone is optional
290 1994-02-03 -- only date
291 1994-02-03T14:15:29 -- Use T as separator
292 19940203T141529Z -- ISO 8601 compact format
293 19940203 -- only date
294
295 """
296 # clean up
297 text = text.lstrip()
298
299 # tz is time zone specifier string
300 day, mon, yr, hr, min, sec, tz = [None]*7
301
302 # loose regexp parse
303 m = ISO_DATE_RE.search(text)
304 if m is not None:
305 # XXX there's an extra bit of the timezone I'm ignoring here: is
306 # this the right thing to do?
307 yr, mon, day, hr, min, sec, tz, _ = m.groups()
308 else:
309 return None # bad format
310
311 return _str2time(day, mon, yr, hr, min, sec, tz)
312
313
314# Header parsing
315# -----------------------------------------------------------------------------
316
317def unmatched(match):
318 """Return unmatched part of re.Match object."""
319 start, end = match.span(0)
320 return match.string[:start]+match.string[end:]
321
322HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
323HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
324HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
325HEADER_ESCAPE_RE = re.compile(r"\\(.)")
326def split_header_words(header_values):
327 r"""Parse header values into a list of lists containing key,value pairs.
328
329 The function knows how to deal with ",", ";" and "=" as well as quoted
330 values after "=". A list of space separated tokens are parsed as if they
331 were separated by ";".
332
333 If the header_values passed as argument contains multiple values, then they
334 are treated as if they were a single value separated by comma ",".
335
336 This means that this function is useful for parsing header fields that
337 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
338 the requirement for tokens).
339
340 headers = #header
341 header = (token | parameter) *( [";"] (token | parameter))
342
343 token = 1*<any CHAR except CTLs or separators>
344 separators = "(" | ")" | "<" | ">" | "@"
345 | "," | ";" | ":" | "\" | <">
346 | "/" | "[" | "]" | "?" | "="
347 | "{" | "}" | SP | HT
348
349 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
350 qdtext = <any TEXT except <">>
351 quoted-pair = "\" CHAR
352
353 parameter = attribute "=" value
354 attribute = token
355 value = token | quoted-string
356
357 Each header is represented by a list of key/value pairs. The value for a
358 simple token (not part of a parameter) is None. Syntactically incorrect
359 headers will not necessarily be parsed as you would want.
360
361 This is easier to describe with some examples:
362
363 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
364 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
365 >>> split_header_words(['text/html; charset="iso-8859-1"'])
366 [[('text/html', None), ('charset', 'iso-8859-1')]]
367 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
368 [[('Basic', None), ('realm', '"foobar"')]]
369
370 """
Raymond Hettingerf7153662005-02-07 14:16:21 +0000371 assert not isinstance(header_values, basestring)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000372 result = []
373 for text in header_values:
374 orig_text = text
375 pairs = []
376 while text:
377 m = HEADER_TOKEN_RE.search(text)
378 if m:
379 text = unmatched(m)
380 name = m.group(1)
381 m = HEADER_QUOTED_VALUE_RE.search(text)
382 if m: # quoted value
383 text = unmatched(m)
384 value = m.group(1)
385 value = HEADER_ESCAPE_RE.sub(r"\1", value)
386 else:
387 m = HEADER_VALUE_RE.search(text)
388 if m: # unquoted value
389 text = unmatched(m)
390 value = m.group(1)
391 value = value.rstrip()
392 else:
393 # no value, a lone token
394 value = None
395 pairs.append((name, value))
396 elif text.lstrip().startswith(","):
397 # concatenated headers, as per RFC 2616 section 4.2
398 text = text.lstrip()[1:]
399 if pairs: result.append(pairs)
400 pairs = []
401 else:
402 # skip junk
403 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
404 assert nr_junk_chars > 0, (
405 "split_header_words bug: '%s', '%s', %s" %
406 (orig_text, text, pairs))
407 text = non_junk
408 if pairs: result.append(pairs)
409 return result
410
411HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
412def join_header_words(lists):
413 """Do the inverse (almost) of the conversion done by split_header_words.
414
415 Takes a list of lists of (key, value) pairs and produces a single header
416 value. Attribute values are quoted if needed.
417
418 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
419 'text/plain; charset="iso-8859/1"'
420 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
421 'text/plain, charset="iso-8859/1"'
422
423 """
424 headers = []
425 for pairs in lists:
426 attr = []
427 for k, v in pairs:
428 if v is not None:
429 if not re.search(r"^\w+$", v):
430 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
431 v = '"%s"' % v
432 k = "%s=%s" % (k, v)
433 attr.append(k)
434 if attr: headers.append("; ".join(attr))
435 return ", ".join(headers)
436
Georg Brandla19baf52010-05-22 11:31:16 +0000437def _strip_quotes(text):
Georg Brandl5d0ca2c2010-05-22 11:29:19 +0000438 if text.startswith('"'):
439 text = text[1:]
440 if text.endswith('"'):
441 text = text[:-1]
442 return text
443
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000444def parse_ns_headers(ns_headers):
445 """Ad-hoc parser for Netscape protocol cookie-attributes.
446
447 The old Netscape cookie format for Set-Cookie can for instance contain
448 an unquoted "," in the expires field, so we have to use this ad-hoc
449 parser instead of split_header_words.
450
451 XXX This may not make the best possible effort to parse all the crap
452 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
453 parser is probably better, so could do worse than following that if
454 this ever gives any trouble.
455
456 Currently, this is also used for parsing RFC 2109 cookies.
457
458 """
459 known_attrs = ("expires", "domain", "path", "secure",
460 # RFC 2109 attrs (may turn up in Netscape cookies, too)
Georg Brandl5d0ca2c2010-05-22 11:29:19 +0000461 "version", "port", "max-age")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000462
463 result = []
464 for ns_header in ns_headers:
465 pairs = []
466 version_set = False
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000467 for ii, param in enumerate(re.split(r";\s*", ns_header)):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000468 param = param.rstrip()
469 if param == "": continue
470 if "=" not in param:
Martin v. Löwisc5574e82005-03-03 10:57:37 +0000471 k, v = param, None
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000472 else:
473 k, v = re.split(r"\s*=\s*", param, 1)
474 k = k.lstrip()
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000475 if ii != 0:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000476 lc = k.lower()
477 if lc in known_attrs:
478 k = lc
479 if k == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000480 # This is an RFC 2109 cookie.
Georg Brandla19baf52010-05-22 11:31:16 +0000481 v = _strip_quotes(v)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000482 version_set = True
483 if k == "expires":
484 # convert expires date to seconds since epoch
Georg Brandla19baf52010-05-22 11:31:16 +0000485 v = http2time(_strip_quotes(v)) # None if invalid
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000486 pairs.append((k, v))
487
488 if pairs:
489 if not version_set:
490 pairs.append(("version", "0"))
491 result.append(pairs)
492
493 return result
494
495
496IPV4_RE = re.compile(r"\.\d+$")
497def is_HDN(text):
498 """Return True if text is a host domain name."""
499 # XXX
500 # This may well be wrong. Which RFC is HDN defined in, if any (for
501 # the purposes of RFC 2965)?
502 # For the current implementation, what about IPv6? Remember to look
503 # at other uses of IPV4_RE also, if change this.
504 if IPV4_RE.search(text):
505 return False
506 if text == "":
507 return False
508 if text[0] == "." or text[-1] == ".":
509 return False
510 return True
511
512def domain_match(A, B):
513 """Return True if domain A domain-matches domain B, according to RFC 2965.
514
515 A and B may be host domain names or IP addresses.
516
517 RFC 2965, section 1:
518
519 Host names can be specified either as an IP address or a HDN string.
520 Sometimes we compare one host name with another. (Such comparisons SHALL
521 be case-insensitive.) Host A's name domain-matches host B's if
522
523 * their host name strings string-compare equal; or
524
525 * A is a HDN string and has the form NB, where N is a non-empty
526 name string, B has the form .B', and B' is a HDN string. (So,
527 x.y.com domain-matches .Y.com but not Y.com.)
528
529 Note that domain-match is not a commutative operation: a.b.c.com
530 domain-matches .c.com, but not the reverse.
531
532 """
533 # Note that, if A or B are IP addresses, the only relevant part of the
534 # definition of the domain-match algorithm is the direct string-compare.
535 A = A.lower()
536 B = B.lower()
537 if A == B:
538 return True
539 if not is_HDN(A):
540 return False
541 i = A.rfind(B)
542 if i == -1 or i == 0:
543 # A does not have form NB, or N is the empty string
544 return False
545 if not B.startswith("."):
546 return False
547 if not is_HDN(B[1:]):
548 return False
549 return True
550
551def liberal_is_HDN(text):
552 """Return True if text is a sort-of-like a host domain name.
553
554 For accepting/blocking domains.
555
556 """
557 if IPV4_RE.search(text):
558 return False
559 return True
560
561def user_domain_match(A, B):
562 """For blocking/accepting domains.
563
564 A and B may be host domain names or IP addresses.
565
566 """
567 A = A.lower()
568 B = B.lower()
569 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
570 if A == B:
571 # equal IP addresses
572 return True
573 return False
574 initial_dot = B.startswith(".")
575 if initial_dot and A.endswith(B):
576 return True
577 if not initial_dot and A == B:
578 return True
579 return False
580
581cut_port_re = re.compile(r":\d+$")
582def request_host(request):
583 """Return request-host, as defined by RFC 2965.
584
585 Variation from RFC: returned value is lowercased, for convenient
586 comparison.
587
588 """
589 url = request.get_full_url()
590 host = urlparse.urlparse(url)[1]
591 if host == "":
592 host = request.get_header("Host", "")
593
594 # remove port, if present
595 host = cut_port_re.sub("", host, 1)
596 return host.lower()
597
598def eff_request_host(request):
599 """Return a tuple (request-host, effective request-host name).
600
601 As defined by RFC 2965, except both are lowercased.
602
603 """
604 erhn = req_host = request_host(request)
605 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
606 erhn = req_host + ".local"
607 return req_host, erhn
608
609def request_path(request):
Gregory P. Smith2a91ce82010-07-25 19:11:36 +0000610 """Path component of request-URI, as defined by RFC 2965."""
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000611 url = request.get_full_url()
Gregory P. Smith2a91ce82010-07-25 19:11:36 +0000612 parts = urlparse.urlsplit(url)
613 path = escape_path(parts.path)
614 if not path.startswith("/"):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000615 # fix bad RFC 2396 absoluteURI
Gregory P. Smith2a91ce82010-07-25 19:11:36 +0000616 path = "/" + path
617 return path
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000618
619def request_port(request):
620 host = request.get_host()
621 i = host.find(':')
622 if i >= 0:
623 port = host[i+1:]
624 try:
625 int(port)
626 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000627 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000628 return None
629 else:
630 port = DEFAULT_HTTP_PORT
631 return port
632
633# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
634# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
635HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
636ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
637def uppercase_escaped_char(match):
638 return "%%%s" % match.group(1).upper()
639def escape_path(path):
640 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
641 # There's no knowing what character encoding was used to create URLs
642 # containing %-escapes, but since we have to pick one to escape invalid
643 # path characters, we pick UTF-8, as recommended in the HTML 4.0
644 # specification:
645 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
646 # And here, kind of: draft-fielding-uri-rfc2396bis-03
647 # (And in draft IRI specification: draft-duerst-iri-05)
648 # (And here, for new URI schemes: RFC 2718)
Neal Norwitz2fa0b9d2004-10-17 16:23:52 +0000649 if isinstance(path, unicode):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000650 path = path.encode("utf-8")
651 path = urllib.quote(path, HTTP_PATH_SAFE)
652 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
653 return path
654
655def reach(h):
656 """Return reach of host h, as defined by RFC 2965, section 1.
657
658 The reach R of a host name H is defined as follows:
659
660 * If
661
662 - H is the host domain name of a host; and,
663
664 - H has the form A.B; and
665
666 - A has no embedded (that is, interior) dots; and
667
668 - B has at least one embedded dot, or B is the string "local".
669 then the reach of H is .B.
670
671 * Otherwise, the reach of H is H.
672
673 >>> reach("www.acme.com")
674 '.acme.com'
675 >>> reach("acme.com")
676 'acme.com'
677 >>> reach("acme.local")
678 '.local'
679
680 """
681 i = h.find(".")
682 if i >= 0:
683 #a = h[:i] # this line is only here to show what a is
684 b = h[i+1:]
685 i = b.find(".")
686 if is_HDN(h) and (i >= 0 or b == "local"):
687 return "."+b
688 return h
689
690def is_third_party(request):
691 """
692
693 RFC 2965, section 3.3.6:
694
695 An unverifiable transaction is to a third-party host if its request-
696 host U does not domain-match the reach R of the request-host O in the
697 origin transaction.
698
699 """
700 req_host = request_host(request)
701 if not domain_match(req_host, reach(request.get_origin_req_host())):
702 return True
703 else:
704 return False
705
706
707class Cookie:
708 """HTTP Cookie.
709
710 This class represents both Netscape and RFC 2965 cookies.
711
712 This is deliberately a very simple class. It just holds attributes. It's
713 possible to construct Cookie instances that don't comply with the cookie
714 standards. CookieJar.make_cookies is the factory function for Cookie
715 objects -- it deals with cookie parsing, supplying defaults, and
716 normalising to the representation used in this class. CookiePolicy is
717 responsible for checking them to see whether they should be accepted from
718 and returned to the server.
719
720 Note that the port may be present in the headers, but unspecified ("Port"
721 rather than"Port=80", for example); if this is the case, port is None.
722
723 """
724
725 def __init__(self, version, name, value,
726 port, port_specified,
727 domain, domain_specified, domain_initial_dot,
728 path, path_specified,
729 secure,
730 expires,
731 discard,
732 comment,
733 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000734 rest,
735 rfc2109=False,
736 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000737
738 if version is not None: version = int(version)
739 if expires is not None: expires = int(expires)
740 if port is None and port_specified is True:
741 raise ValueError("if port is None, port_specified must be false")
742
743 self.version = version
744 self.name = name
745 self.value = value
746 self.port = port
747 self.port_specified = port_specified
748 # normalise case, as per RFC 2965 section 3.3.3
749 self.domain = domain.lower()
750 self.domain_specified = domain_specified
751 # Sigh. We need to know whether the domain given in the
752 # cookie-attribute had an initial dot, in order to follow RFC 2965
753 # (as clarified in draft errata). Needed for the returned $Domain
754 # value.
755 self.domain_initial_dot = domain_initial_dot
756 self.path = path
757 self.path_specified = path_specified
758 self.secure = secure
759 self.expires = expires
760 self.discard = discard
761 self.comment = comment
762 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000763 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000764
765 self._rest = copy.copy(rest)
766
767 def has_nonstandard_attr(self, name):
768 return name in self._rest
769 def get_nonstandard_attr(self, name, default=None):
770 return self._rest.get(name, default)
771 def set_nonstandard_attr(self, name, value):
772 self._rest[name] = value
773
774 def is_expired(self, now=None):
775 if now is None: now = time.time()
776 if (self.expires is not None) and (self.expires <= now):
777 return True
778 return False
779
780 def __str__(self):
781 if self.port is None: p = ""
782 else: p = ":"+self.port
783 limit = self.domain + p + self.path
784 if self.value is not None:
785 namevalue = "%s=%s" % (self.name, self.value)
786 else:
787 namevalue = self.name
788 return "<Cookie %s for %s>" % (namevalue, limit)
789
790 def __repr__(self):
791 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000792 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000793 "port", "port_specified",
794 "domain", "domain_specified", "domain_initial_dot",
795 "path", "path_specified",
796 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000797 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000798 attr = getattr(self, name)
799 args.append("%s=%s" % (name, repr(attr)))
800 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000801 args.append("rfc2109=%s" % repr(self.rfc2109))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000802 return "Cookie(%s)" % ", ".join(args)
803
804
805class CookiePolicy:
806 """Defines which cookies get accepted from and returned to server.
807
808 May also modify cookies, though this is probably a bad idea.
809
810 The subclass DefaultCookiePolicy defines the standard rules for Netscape
811 and RFC 2965 cookies -- override that if you want a customised policy.
812
813 """
814 def set_ok(self, cookie, request):
815 """Return true if (and only if) cookie should be accepted from server.
816
817 Currently, pre-expired cookies never get this far -- the CookieJar
818 class deletes such cookies itself.
819
820 """
821 raise NotImplementedError()
822
823 def return_ok(self, cookie, request):
824 """Return true if (and only if) cookie should be returned to server."""
825 raise NotImplementedError()
826
827 def domain_return_ok(self, domain, request):
828 """Return false if cookies should not be returned, given cookie domain.
829 """
830 return True
831
832 def path_return_ok(self, path, request):
833 """Return false if cookies should not be returned, given cookie path.
834 """
835 return True
836
837
838class DefaultCookiePolicy(CookiePolicy):
839 """Implements the standard rules for accepting and returning cookies."""
840
841 DomainStrictNoDots = 1
842 DomainStrictNonDomain = 2
843 DomainRFC2965Match = 4
844
845 DomainLiberal = 0
846 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
847
848 def __init__(self,
849 blocked_domains=None, allowed_domains=None,
850 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000851 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000852 hide_cookie2=False,
853 strict_domain=False,
854 strict_rfc2965_unverifiable=True,
855 strict_ns_unverifiable=False,
856 strict_ns_domain=DomainLiberal,
857 strict_ns_set_initial_dollar=False,
858 strict_ns_set_path=False,
859 ):
860 """Constructor arguments should be passed as keyword arguments only."""
861 self.netscape = netscape
862 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000863 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000864 self.hide_cookie2 = hide_cookie2
865 self.strict_domain = strict_domain
866 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
867 self.strict_ns_unverifiable = strict_ns_unverifiable
868 self.strict_ns_domain = strict_ns_domain
869 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
870 self.strict_ns_set_path = strict_ns_set_path
871
872 if blocked_domains is not None:
873 self._blocked_domains = tuple(blocked_domains)
874 else:
875 self._blocked_domains = ()
876
877 if allowed_domains is not None:
878 allowed_domains = tuple(allowed_domains)
879 self._allowed_domains = allowed_domains
880
881 def blocked_domains(self):
882 """Return the sequence of blocked domains (as a tuple)."""
883 return self._blocked_domains
884 def set_blocked_domains(self, blocked_domains):
885 """Set the sequence of blocked domains."""
886 self._blocked_domains = tuple(blocked_domains)
887
888 def is_blocked(self, domain):
889 for blocked_domain in self._blocked_domains:
890 if user_domain_match(domain, blocked_domain):
891 return True
892 return False
893
894 def allowed_domains(self):
895 """Return None, or the sequence of allowed domains (as a tuple)."""
896 return self._allowed_domains
897 def set_allowed_domains(self, allowed_domains):
898 """Set the sequence of allowed domains, or None."""
899 if allowed_domains is not None:
900 allowed_domains = tuple(allowed_domains)
901 self._allowed_domains = allowed_domains
902
903 def is_not_allowed(self, domain):
904 if self._allowed_domains is None:
905 return False
906 for allowed_domain in self._allowed_domains:
907 if user_domain_match(domain, allowed_domain):
908 return False
909 return True
910
911 def set_ok(self, cookie, request):
912 """
913 If you override .set_ok(), be sure to call this method. If it returns
914 false, so should your subclass (assuming your subclass wants to be more
915 strict about which cookies to accept).
916
917 """
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000918 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000919
920 assert cookie.name is not None
921
922 for n in "version", "verifiability", "name", "path", "domain", "port":
923 fn_name = "set_ok_"+n
924 fn = getattr(self, fn_name)
925 if not fn(cookie, request):
926 return False
927
928 return True
929
930 def set_ok_version(self, cookie, request):
931 if cookie.version is None:
932 # Version is always set to 0 by parse_ns_headers if it's a Netscape
933 # cookie, so this must be an invalid RFC 2965 cookie.
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000934 _debug(" Set-Cookie2 without version attribute (%s=%s)",
935 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000936 return False
937 if cookie.version > 0 and not self.rfc2965:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000938 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000939 return False
940 elif cookie.version == 0 and not self.netscape:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000941 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000942 return False
943 return True
944
945 def set_ok_verifiability(self, cookie, request):
946 if request.is_unverifiable() and is_third_party(request):
947 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000948 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000949 "unverifiable transaction")
950 return False
951 elif cookie.version == 0 and self.strict_ns_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000952 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000953 "unverifiable transaction")
954 return False
955 return True
956
957 def set_ok_name(self, cookie, request):
958 # Try and stop servers setting V0 cookies designed to hack other
959 # servers that know both V0 and V1 protocols.
960 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
961 cookie.name.startswith("$")):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000962 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000963 return False
964 return True
965
966 def set_ok_path(self, cookie, request):
967 if cookie.path_specified:
968 req_path = request_path(request)
969 if ((cookie.version > 0 or
970 (cookie.version == 0 and self.strict_ns_set_path)) and
971 not req_path.startswith(cookie.path)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000972 _debug(" path attribute %s is not a prefix of request "
973 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000974 return False
975 return True
976
977 def set_ok_domain(self, cookie, request):
978 if self.is_blocked(cookie.domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000979 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000980 return False
981 if self.is_not_allowed(cookie.domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000982 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000983 return False
984 if cookie.domain_specified:
985 req_host, erhn = eff_request_host(request)
986 domain = cookie.domain
987 if self.strict_domain and (domain.count(".") >= 2):
Georg Brandle58334a2006-05-07 20:44:34 +0000988 # XXX This should probably be compared with the Konqueror
989 # (kcookiejar.cpp) and Mozilla implementations, but it's a
990 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000991 i = domain.rfind(".")
992 j = domain.rfind(".", 0, i)
993 if j == 0: # domain like .foo.bar
994 tld = domain[i+1:]
995 sld = domain[j+1:i]
Georg Brandle58334a2006-05-07 20:44:34 +0000996 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
997 "gov", "mil", "int", "aero", "biz", "cat", "coop",
998 "info", "jobs", "mobi", "museum", "name", "pro",
999 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001000 # domain like .co.uk
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001001 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001002 return False
1003 if domain.startswith("."):
1004 undotted_domain = domain[1:]
1005 else:
1006 undotted_domain = domain
1007 embedded_dots = (undotted_domain.find(".") >= 0)
1008 if not embedded_dots and domain != ".local":
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001009 _debug(" non-local domain %s contains no embedded dot",
1010 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001011 return False
1012 if cookie.version == 0:
1013 if (not erhn.endswith(domain) and
1014 (not erhn.startswith(".") and
1015 not ("."+erhn).endswith(domain))):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001016 _debug(" effective request-host %s (even with added "
1017 "initial dot) does not end end with %s",
1018 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001019 return False
1020 if (cookie.version > 0 or
1021 (self.strict_ns_domain & self.DomainRFC2965Match)):
1022 if not domain_match(erhn, domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001023 _debug(" effective request-host %s does not domain-match "
1024 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001025 return False
1026 if (cookie.version > 0 or
1027 (self.strict_ns_domain & self.DomainStrictNoDots)):
1028 host_prefix = req_host[:-len(domain)]
1029 if (host_prefix.find(".") >= 0 and
1030 not IPV4_RE.search(req_host)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001031 _debug(" host prefix %s for domain %s contains a dot",
1032 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001033 return False
1034 return True
1035
1036 def set_ok_port(self, cookie, request):
1037 if cookie.port_specified:
1038 req_port = request_port(request)
1039 if req_port is None:
1040 req_port = "80"
1041 else:
1042 req_port = str(req_port)
1043 for p in cookie.port.split(","):
1044 try:
1045 int(p)
1046 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001047 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001048 return False
1049 if p == req_port:
1050 break
1051 else:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001052 _debug(" request port (%s) not found in %s",
1053 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001054 return False
1055 return True
1056
1057 def return_ok(self, cookie, request):
1058 """
1059 If you override .return_ok(), be sure to call this method. If it
1060 returns false, so should your subclass (assuming your subclass wants to
1061 be more strict about which cookies to return).
1062
1063 """
1064 # Path has already been checked by .path_return_ok(), and domain
1065 # blocking done by .domain_return_ok().
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001066 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001067
1068 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1069 fn_name = "return_ok_"+n
1070 fn = getattr(self, fn_name)
1071 if not fn(cookie, request):
1072 return False
1073 return True
1074
1075 def return_ok_version(self, cookie, request):
1076 if cookie.version > 0 and not self.rfc2965:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001077 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001078 return False
1079 elif cookie.version == 0 and not self.netscape:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001080 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001081 return False
1082 return True
1083
1084 def return_ok_verifiability(self, cookie, request):
1085 if request.is_unverifiable() and is_third_party(request):
1086 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001087 _debug(" third-party RFC 2965 cookie during unverifiable "
1088 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001089 return False
1090 elif cookie.version == 0 and self.strict_ns_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001091 _debug(" third-party Netscape cookie during unverifiable "
1092 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001093 return False
1094 return True
1095
1096 def return_ok_secure(self, cookie, request):
1097 if cookie.secure and request.get_type() != "https":
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001098 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001099 return False
1100 return True
1101
1102 def return_ok_expires(self, cookie, request):
1103 if cookie.is_expired(self._now):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001104 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001105 return False
1106 return True
1107
1108 def return_ok_port(self, cookie, request):
1109 if cookie.port:
1110 req_port = request_port(request)
1111 if req_port is None:
1112 req_port = "80"
1113 for p in cookie.port.split(","):
1114 if p == req_port:
1115 break
1116 else:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001117 _debug(" request port %s does not match cookie port %s",
1118 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001119 return False
1120 return True
1121
1122 def return_ok_domain(self, cookie, request):
1123 req_host, erhn = eff_request_host(request)
1124 domain = cookie.domain
1125
1126 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1127 if (cookie.version == 0 and
1128 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1129 not cookie.domain_specified and domain != erhn):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001130 _debug(" cookie with unspecified domain does not string-compare "
1131 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001132 return False
1133
1134 if cookie.version > 0 and not domain_match(erhn, domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001135 _debug(" effective request-host name %s does not domain-match "
1136 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001137 return False
1138 if cookie.version == 0 and not ("."+erhn).endswith(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001139 _debug(" request-host %s does not match Netscape cookie domain "
1140 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001141 return False
1142 return True
1143
1144 def domain_return_ok(self, domain, request):
1145 # Liberal check of. This is here as an optimization to avoid
1146 # having to load lots of MSIE cookie files unless necessary.
1147 req_host, erhn = eff_request_host(request)
1148 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001149 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001150 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001151 erhn = "."+erhn
1152 if not (req_host.endswith(domain) or erhn.endswith(domain)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001153 #_debug(" request domain %s does not match cookie domain %s",
1154 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001155 return False
1156
1157 if self.is_blocked(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001158 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001159 return False
1160 if self.is_not_allowed(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001161 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001162 return False
1163
1164 return True
1165
1166 def path_return_ok(self, path, request):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001167 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001168 req_path = request_path(request)
1169 if not req_path.startswith(path):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001170 _debug(" %s does not path-match %s", req_path, path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001171 return False
1172 return True
1173
1174
1175def vals_sorted_by_key(adict):
1176 keys = adict.keys()
1177 keys.sort()
1178 return map(adict.get, keys)
1179
1180def deepvalues(mapping):
1181 """Iterates over nested mapping, depth-first, in sorted order by key."""
1182 values = vals_sorted_by_key(mapping)
1183 for obj in values:
1184 mapping = False
1185 try:
1186 obj.items
1187 except AttributeError:
1188 pass
1189 else:
1190 mapping = True
1191 for subobj in deepvalues(obj):
1192 yield subobj
1193 if not mapping:
1194 yield obj
1195
1196
1197# Used as second parameter to dict.get() method, to distinguish absent
1198# dict key from one with a None value.
1199class Absent: pass
1200
1201class CookieJar:
1202 """Collection of HTTP cookies.
1203
1204 You may not need to know about this class: try
1205 urllib2.build_opener(HTTPCookieProcessor).open(url).
1206
1207 """
1208
1209 non_word_re = re.compile(r"\W")
1210 quote_re = re.compile(r"([\"\\])")
1211 strict_domain_re = re.compile(r"\.?[^.]*")
1212 domain_re = re.compile(r"[^.]*")
1213 dots_re = re.compile(r"^\.+")
1214
1215 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1216
1217 def __init__(self, policy=None):
1218 if policy is None:
1219 policy = DefaultCookiePolicy()
1220 self._policy = policy
1221
1222 self._cookies_lock = _threading.RLock()
1223 self._cookies = {}
1224
1225 def set_policy(self, policy):
1226 self._policy = policy
1227
1228 def _cookies_for_domain(self, domain, request):
1229 cookies = []
1230 if not self._policy.domain_return_ok(domain, request):
1231 return []
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001232 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001233 cookies_by_path = self._cookies[domain]
1234 for path in cookies_by_path.keys():
1235 if not self._policy.path_return_ok(path, request):
1236 continue
1237 cookies_by_name = cookies_by_path[path]
1238 for cookie in cookies_by_name.values():
1239 if not self._policy.return_ok(cookie, request):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001240 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001241 continue
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001242 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001243 cookies.append(cookie)
1244 return cookies
1245
1246 def _cookies_for_request(self, request):
1247 """Return a list of cookies to be returned to server."""
1248 cookies = []
1249 for domain in self._cookies.keys():
1250 cookies.extend(self._cookies_for_domain(domain, request))
1251 return cookies
1252
1253 def _cookie_attrs(self, cookies):
1254 """Return a list of cookie-attributes to be returned to server.
1255
1256 like ['foo="bar"; $Path="/"', ...]
1257
1258 The $Version attribute is also added when appropriate (currently only
1259 once per request).
1260
1261 """
1262 # add cookies in order of most specific (ie. longest) path first
Brett Cannon52f03c52008-08-03 22:34:25 +00001263 cookies.sort(key=lambda arg: len(arg.path), reverse=True)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001264
1265 version_set = False
1266
1267 attrs = []
1268 for cookie in cookies:
1269 # set version of Cookie header
1270 # XXX
1271 # What should it be if multiple matching Set-Cookie headers have
1272 # different versions themselves?
1273 # Answer: there is no answer; was supposed to be settled by
1274 # RFC 2965 errata, but that may never appear...
1275 version = cookie.version
1276 if not version_set:
1277 version_set = True
1278 if version > 0:
1279 attrs.append("$Version=%s" % version)
1280
1281 # quote cookie value if necessary
1282 # (not for Netscape protocol, which already has any quotes
1283 # intact, due to the poorly-specified Netscape Cookie: syntax)
1284 if ((cookie.value is not None) and
1285 self.non_word_re.search(cookie.value) and version > 0):
1286 value = self.quote_re.sub(r"\\\1", cookie.value)
1287 else:
1288 value = cookie.value
1289
1290 # add cookie-attributes to be returned in Cookie header
1291 if cookie.value is None:
1292 attrs.append(cookie.name)
1293 else:
1294 attrs.append("%s=%s" % (cookie.name, value))
1295 if version > 0:
1296 if cookie.path_specified:
1297 attrs.append('$Path="%s"' % cookie.path)
1298 if cookie.domain.startswith("."):
1299 domain = cookie.domain
1300 if (not cookie.domain_initial_dot and
1301 domain.startswith(".")):
1302 domain = domain[1:]
1303 attrs.append('$Domain="%s"' % domain)
1304 if cookie.port is not None:
1305 p = "$Port"
1306 if cookie.port_specified:
1307 p = p + ('="%s"' % cookie.port)
1308 attrs.append(p)
1309
1310 return attrs
1311
1312 def add_cookie_header(self, request):
1313 """Add correct Cookie: header to request (urllib2.Request object).
1314
1315 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1316
1317 """
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001318 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001319 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001320 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001321
Tim Petersf733abb2007-01-30 03:03:46 +00001322 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001323
Tim Petersf733abb2007-01-30 03:03:46 +00001324 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001325
Tim Petersf733abb2007-01-30 03:03:46 +00001326 attrs = self._cookie_attrs(cookies)
1327 if attrs:
1328 if not request.has_header("Cookie"):
1329 request.add_unredirected_header(
1330 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001331
Tim Petersf733abb2007-01-30 03:03:46 +00001332 # if necessary, advertise that we know RFC 2965
1333 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1334 not request.has_header("Cookie2")):
1335 for cookie in cookies:
1336 if cookie.version != 1:
1337 request.add_unredirected_header("Cookie2", '$Version="1"')
1338 break
1339
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001340 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001341 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001342
1343 self.clear_expired_cookies()
1344
1345 def _normalized_cookie_tuples(self, attrs_set):
1346 """Return list of tuples containing normalised cookie information.
1347
1348 attrs_set is the list of lists of key,value pairs extracted from
1349 the Set-Cookie or Set-Cookie2 headers.
1350
1351 Tuples are name, value, standard, rest, where name and value are the
1352 cookie name and value, standard is a dictionary containing the standard
1353 cookie-attributes (discard, secure, version, expires or max-age,
1354 domain, path and port) and rest is a dictionary containing the rest of
1355 the cookie-attributes.
1356
1357 """
1358 cookie_tuples = []
1359
1360 boolean_attrs = "discard", "secure"
1361 value_attrs = ("version",
1362 "expires", "max-age",
1363 "domain", "path", "port",
1364 "comment", "commenturl")
1365
1366 for cookie_attrs in attrs_set:
1367 name, value = cookie_attrs[0]
1368
1369 # Build dictionary of standard cookie-attributes (standard) and
1370 # dictionary of other cookie-attributes (rest).
1371
1372 # Note: expiry time is normalised to seconds since epoch. V0
1373 # cookies should have the Expires cookie-attribute, and V1 cookies
1374 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1375 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1376 # accept either (but prefer Max-Age).
1377 max_age_set = False
1378
1379 bad_cookie = False
1380
1381 standard = {}
1382 rest = {}
1383 for k, v in cookie_attrs[1:]:
1384 lc = k.lower()
1385 # don't lose case distinction for unknown fields
1386 if lc in value_attrs or lc in boolean_attrs:
1387 k = lc
1388 if k in boolean_attrs and v is None:
1389 # boolean cookie-attribute is present, but has no value
1390 # (like "discard", rather than "port=80")
1391 v = True
1392 if k in standard:
1393 # only first value is significant
1394 continue
1395 if k == "domain":
1396 if v is None:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001397 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001398 bad_cookie = True
1399 break
1400 # RFC 2965 section 3.3.3
1401 v = v.lower()
1402 if k == "expires":
1403 if max_age_set:
1404 # Prefer max-age to expires (like Mozilla)
1405 continue
1406 if v is None:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001407 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001408 "attribute: treating as session cookie")
1409 continue
1410 if k == "max-age":
1411 max_age_set = True
1412 try:
1413 v = int(v)
1414 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001415 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001416 "max-age attribute")
1417 bad_cookie = True
1418 break
1419 # convert RFC 2965 Max-Age to seconds since epoch
1420 # XXX Strictly you're supposed to follow RFC 2616
1421 # age-calculation rules. Remember that zero Max-Age is a
1422 # is a request to discard (old and new) cookie, though.
1423 k = "expires"
1424 v = self._now + v
1425 if (k in value_attrs) or (k in boolean_attrs):
1426 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001427 k not in ("port", "comment", "commenturl")):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001428 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001429 bad_cookie = True
1430 break
1431 standard[k] = v
1432 else:
1433 rest[k] = v
1434
1435 if bad_cookie:
1436 continue
1437
1438 cookie_tuples.append((name, value, standard, rest))
1439
1440 return cookie_tuples
1441
1442 def _cookie_from_cookie_tuple(self, tup, request):
1443 # standard is dict of standard cookie-attributes, rest is dict of the
1444 # rest of them
1445 name, value, standard, rest = tup
1446
1447 domain = standard.get("domain", Absent)
1448 path = standard.get("path", Absent)
1449 port = standard.get("port", Absent)
1450 expires = standard.get("expires", Absent)
1451
1452 # set the easy defaults
1453 version = standard.get("version", None)
Georg Brandl5d0ca2c2010-05-22 11:29:19 +00001454 if version is not None:
1455 try:
1456 version = int(version)
1457 except ValueError:
1458 return None # invalid version, ignore cookie
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001459 secure = standard.get("secure", False)
1460 # (discard is also set if expires is Absent)
1461 discard = standard.get("discard", False)
1462 comment = standard.get("comment", None)
1463 comment_url = standard.get("commenturl", None)
1464
1465 # set default path
1466 if path is not Absent and path != "":
1467 path_specified = True
1468 path = escape_path(path)
1469 else:
1470 path_specified = False
1471 path = request_path(request)
1472 i = path.rfind("/")
1473 if i != -1:
1474 if version == 0:
1475 # Netscape spec parts company from reality here
1476 path = path[:i]
1477 else:
1478 path = path[:i+1]
1479 if len(path) == 0: path = "/"
1480
1481 # set default domain
1482 domain_specified = domain is not Absent
1483 # but first we have to remember whether it starts with a dot
1484 domain_initial_dot = False
1485 if domain_specified:
1486 domain_initial_dot = bool(domain.startswith("."))
1487 if domain is Absent:
1488 req_host, erhn = eff_request_host(request)
1489 domain = erhn
1490 elif not domain.startswith("."):
1491 domain = "."+domain
1492
1493 # set default port
1494 port_specified = False
1495 if port is not Absent:
1496 if port is None:
1497 # Port attr present, but has no value: default to request port.
1498 # Cookie should then only be sent back on that port.
1499 port = request_port(request)
1500 else:
1501 port_specified = True
1502 port = re.sub(r"\s+", "", port)
1503 else:
1504 # No port attr present. Cookie can be sent back on any port.
1505 port = None
1506
1507 # set default expires and discard
1508 if expires is Absent:
1509 expires = None
1510 discard = True
1511 elif expires <= self._now:
1512 # Expiry date in past is request to delete cookie. This can't be
1513 # in DefaultCookiePolicy, because can't delete cookies there.
1514 try:
1515 self.clear(domain, path, name)
1516 except KeyError:
1517 pass
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001518 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1519 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001520 return None
1521
1522 return Cookie(version,
1523 name, value,
1524 port, port_specified,
1525 domain, domain_specified, domain_initial_dot,
1526 path, path_specified,
1527 secure,
1528 expires,
1529 discard,
1530 comment,
1531 comment_url,
1532 rest)
1533
1534 def _cookies_from_attrs_set(self, attrs_set, request):
1535 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1536
1537 cookies = []
1538 for tup in cookie_tuples:
1539 cookie = self._cookie_from_cookie_tuple(tup, request)
1540 if cookie: cookies.append(cookie)
1541 return cookies
1542
Neal Norwitz71dad722005-12-23 21:43:48 +00001543 def _process_rfc2109_cookies(self, cookies):
1544 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1545 if rfc2109_as_ns is None:
1546 rfc2109_as_ns = not self._policy.rfc2965
1547 for cookie in cookies:
1548 if cookie.version == 1:
1549 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001550 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001551 # treat 2109 cookies as Netscape cookies rather than
1552 # as RFC2965 cookies
1553 cookie.version = 0
1554
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001555 def make_cookies(self, response, request):
1556 """Return sequence of Cookie objects extracted from response object."""
1557 # get cookie-attributes for RFC 2965 and Netscape protocols
1558 headers = response.info()
1559 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1560 ns_hdrs = headers.getheaders("Set-Cookie")
1561
1562 rfc2965 = self._policy.rfc2965
1563 netscape = self._policy.netscape
1564
1565 if ((not rfc2965_hdrs and not ns_hdrs) or
1566 (not ns_hdrs and not rfc2965) or
1567 (not rfc2965_hdrs and not netscape) or
1568 (not netscape and not rfc2965)):
1569 return [] # no relevant cookie headers: quick exit
1570
1571 try:
1572 cookies = self._cookies_from_attrs_set(
1573 split_header_words(rfc2965_hdrs), request)
Georg Brandle854e762006-05-08 17:48:01 +00001574 except Exception:
1575 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001576 cookies = []
1577
1578 if ns_hdrs and netscape:
1579 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001580 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001581 ns_cookies = self._cookies_from_attrs_set(
1582 parse_ns_headers(ns_hdrs), request)
Georg Brandle854e762006-05-08 17:48:01 +00001583 except Exception:
1584 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001585 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001586 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001587
1588 # Look for Netscape cookies (from Set-Cookie headers) that match
1589 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1590 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1591 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1592 # bundled in with the Netscape cookies for this purpose, which is
1593 # reasonable behaviour.
1594 if rfc2965:
1595 lookup = {}
1596 for cookie in cookies:
1597 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1598
1599 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1600 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1601 return key not in lookup
1602 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1603
1604 if ns_cookies:
1605 cookies.extend(ns_cookies)
1606
1607 return cookies
1608
1609 def set_cookie_if_ok(self, cookie, request):
1610 """Set a cookie if policy says it's OK to do so."""
1611 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001612 try:
1613 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001614
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001615 if self._policy.set_ok(cookie, request):
1616 self.set_cookie(cookie)
Tim Petersf733abb2007-01-30 03:03:46 +00001617
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001618
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001619 finally:
1620 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001621
1622 def set_cookie(self, cookie):
1623 """Set a cookie, without checking whether or not it should be set."""
1624 c = self._cookies
1625 self._cookies_lock.acquire()
1626 try:
1627 if cookie.domain not in c: c[cookie.domain] = {}
1628 c2 = c[cookie.domain]
1629 if cookie.path not in c2: c2[cookie.path] = {}
1630 c3 = c2[cookie.path]
1631 c3[cookie.name] = cookie
1632 finally:
1633 self._cookies_lock.release()
1634
1635 def extract_cookies(self, response, request):
1636 """Extract cookies from response, where allowable given the request."""
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001637 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001638 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001639 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001640 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001641
Tim Petersf733abb2007-01-30 03:03:46 +00001642 for cookie in self.make_cookies(response, request):
1643 if self._policy.set_ok(cookie, request):
1644 _debug(" setting cookie: %s", cookie)
1645 self.set_cookie(cookie)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001646 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001647 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001648
1649 def clear(self, domain=None, path=None, name=None):
1650 """Clear some cookies.
1651
1652 Invoking this method without arguments will clear all cookies. If
1653 given a single argument, only cookies belonging to that domain will be
1654 removed. If given two arguments, cookies belonging to the specified
1655 path within that domain are removed. If given three arguments, then
1656 the cookie with the specified name, path and domain is removed.
1657
1658 Raises KeyError if no matching cookie exists.
1659
1660 """
1661 if name is not None:
1662 if (domain is None) or (path is None):
1663 raise ValueError(
1664 "domain and path must be given to remove a cookie by name")
1665 del self._cookies[domain][path][name]
1666 elif path is not None:
1667 if domain is None:
1668 raise ValueError(
1669 "domain must be given to remove cookies by path")
1670 del self._cookies[domain][path]
1671 elif domain is not None:
1672 del self._cookies[domain]
1673 else:
1674 self._cookies = {}
1675
1676 def clear_session_cookies(self):
1677 """Discard all session cookies.
1678
1679 Note that the .save() method won't save session cookies anyway, unless
1680 you ask otherwise by passing a true ignore_discard argument.
1681
1682 """
1683 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001684 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001685 for cookie in self:
1686 if cookie.discard:
1687 self.clear(cookie.domain, cookie.path, cookie.name)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001688 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001689 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001690
1691 def clear_expired_cookies(self):
1692 """Discard all expired cookies.
1693
1694 You probably don't need to call this method: expired cookies are never
1695 sent back to the server (provided you're using DefaultCookiePolicy),
1696 this method is called by CookieJar itself every so often, and the
1697 .save() method won't save expired cookies anyway (unless you ask
1698 otherwise by passing a true ignore_expires argument).
1699
1700 """
1701 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001702 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001703 now = time.time()
1704 for cookie in self:
1705 if cookie.is_expired(now):
1706 self.clear(cookie.domain, cookie.path, cookie.name)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001707 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001708 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001709
1710 def __iter__(self):
1711 return deepvalues(self._cookies)
1712
1713 def __len__(self):
1714 """Return number of contained cookies."""
1715 i = 0
1716 for cookie in self: i = i + 1
1717 return i
1718
1719 def __repr__(self):
1720 r = []
1721 for cookie in self: r.append(repr(cookie))
1722 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1723
1724 def __str__(self):
1725 r = []
1726 for cookie in self: r.append(str(cookie))
1727 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1728
1729
Neal Norwitz3e7de592005-12-23 21:24:35 +00001730# derives from IOError for backwards-compatibility with Python 2.4.0
1731class LoadError(IOError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001732
1733class FileCookieJar(CookieJar):
1734 """CookieJar that can be loaded from and saved to a file."""
1735
1736 def __init__(self, filename=None, delayload=False, policy=None):
1737 """
1738 Cookies are NOT loaded from the named file until either the .load() or
1739 .revert() method is called.
1740
1741 """
1742 CookieJar.__init__(self, policy)
1743 if filename is not None:
1744 try:
1745 filename+""
1746 except:
1747 raise ValueError("filename must be string-like")
1748 self.filename = filename
1749 self.delayload = bool(delayload)
1750
1751 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1752 """Save cookies to a file."""
1753 raise NotImplementedError()
1754
1755 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1756 """Load cookies from a file."""
1757 if filename is None:
1758 if self.filename is not None: filename = self.filename
1759 else: raise ValueError(MISSING_FILENAME_TEXT)
1760
1761 f = open(filename)
1762 try:
1763 self._really_load(f, filename, ignore_discard, ignore_expires)
1764 finally:
1765 f.close()
1766
1767 def revert(self, filename=None,
1768 ignore_discard=False, ignore_expires=False):
1769 """Clear all cookies and reload cookies from a saved file.
1770
1771 Raises LoadError (or IOError) if reversion is not successful; the
1772 object's state will not be altered if this happens.
1773
1774 """
1775 if filename is None:
1776 if self.filename is not None: filename = self.filename
1777 else: raise ValueError(MISSING_FILENAME_TEXT)
1778
1779 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001780 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001781
Tim Petersf733abb2007-01-30 03:03:46 +00001782 old_state = copy.deepcopy(self._cookies)
1783 self._cookies = {}
1784 try:
1785 self.load(filename, ignore_discard, ignore_expires)
1786 except (LoadError, IOError):
1787 self._cookies = old_state
1788 raise
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001789
1790 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001791 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001792
1793from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
1794from _MozillaCookieJar import MozillaCookieJar