blob: 7fd883fd0950d1210851efb08e4872978b996018 [file] [log] [blame]
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001"""HTTP cookie handling for web clients.
2
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Thomas Wouters477c8d52006-05-27 19:21:47 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Thomas Wouters477c8d52006-05-27 19:21:47 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
30
31import re, urlparse, copy, time, urllib
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000032try:
33 import threading as _threading
34except ImportError:
35 import dummy_threading as _threading
36import httplib # only for the default HTTP port
37from calendar import timegm
38
Thomas Wouters477c8d52006-05-27 19:21:47 +000039debug = False # set to True to enable debugging via the logging module
40logger = None
41
42def _debug(*args):
43 if not debug:
44 return
45 global logger
46 if not logger:
47 import logging
48 logger = logging.getLogger("cookielib")
49 return logger.debug(*args)
50
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000051
52DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
53MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
54 "instance initialised with one)")
55
Thomas Wouters477c8d52006-05-27 19:21:47 +000056def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000057 # There are a few catch-all except: statements in this module, for
Thomas Wouters477c8d52006-05-27 19:21:47 +000058 # catching input that's bad in unexpected ways. Warn if any
59 # exceptions are caught there.
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000060 import warnings, traceback, StringIO
61 f = StringIO.StringIO()
62 traceback.print_exc(None, f)
63 msg = f.getvalue()
64 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000065
66
67# Date/time conversion
68# -----------------------------------------------------------------------------
69
70EPOCH_YEAR = 1970
71def _timegm(tt):
72 year, month, mday, hour, min, sec = tt[:6]
73 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
74 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
75 return timegm(tt)
76 else:
77 return None
78
79DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
80MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
81 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
82MONTHS_LOWER = []
83for month in MONTHS: MONTHS_LOWER.append(month.lower())
84
85def time2isoz(t=None):
86 """Return a string representing time in seconds since epoch, t.
87
88 If the function is called without an argument, it will use the current
89 time.
90
91 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
92 representing Universal Time (UTC, aka GMT). An example of this format is:
93
94 1994-11-24 08:49:37Z
95
96 """
97 if t is None: t = time.time()
98 year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
99 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
100 year, mon, mday, hour, min, sec)
101
102def time2netscape(t=None):
103 """Return a string representing time in seconds since epoch, t.
104
105 If the function is called without an argument, it will use the current
106 time.
107
108 The format of the returned string is like this:
109
110 Wed, DD-Mon-YYYY HH:MM:SS GMT
111
112 """
113 if t is None: t = time.time()
114 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
115 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
116 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
117
118
119UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
120
121TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
122def offset_from_tz_string(tz):
123 offset = None
124 if tz in UTC_ZONES:
125 offset = 0
126 else:
127 m = TIMEZONE_RE.search(tz)
128 if m:
129 offset = 3600 * int(m.group(2))
130 if m.group(3):
131 offset = offset + 60 * int(m.group(3))
132 if m.group(1) == '-':
133 offset = -offset
134 return offset
135
136def _str2time(day, mon, yr, hr, min, sec, tz):
137 # translate month name to number
138 # month numbers start with 1 (January)
139 try:
140 mon = MONTHS_LOWER.index(mon.lower())+1
141 except ValueError:
142 # maybe it's already a number
143 try:
144 imon = int(mon)
145 except ValueError:
146 return None
147 if 1 <= imon <= 12:
148 mon = imon
149 else:
150 return None
151
152 # make sure clock elements are defined
153 if hr is None: hr = 0
154 if min is None: min = 0
155 if sec is None: sec = 0
156
157 yr = int(yr)
158 day = int(day)
159 hr = int(hr)
160 min = int(min)
161 sec = int(sec)
162
163 if yr < 1000:
164 # find "obvious" year
165 cur_yr = time.localtime(time.time())[0]
166 m = cur_yr % 100
167 tmp = yr
168 yr = yr + cur_yr - m
169 m = m - tmp
170 if abs(m) > 50:
171 if m > 0: yr = yr + 100
172 else: yr = yr - 100
173
174 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
175 t = _timegm((yr, mon, day, hr, min, sec, tz))
176
177 if t is not None:
178 # adjust time using timezone string, to get absolute time since epoch
179 if tz is None:
180 tz = "UTC"
181 tz = tz.upper()
182 offset = offset_from_tz_string(tz)
183 if offset is None:
184 return None
185 t = t - offset
186
187 return t
188
189STRICT_DATE_RE = re.compile(
190 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
191 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
192WEEKDAY_RE = re.compile(
193 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
194LOOSE_HTTP_DATE_RE = re.compile(
195 r"""^
196 (\d\d?) # day
197 (?:\s+|[-\/])
198 (\w+) # month
199 (?:\s+|[-\/])
200 (\d+) # year
201 (?:
202 (?:\s+|:) # separator before clock
203 (\d\d?):(\d\d) # hour:min
204 (?::(\d\d))? # optional seconds
205 )? # optional clock
206 \s*
207 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
208 \s*
209 (?:\(\w+\))? # ASCII representation of timezone in parens.
210 \s*$""", re.X)
211def http2time(text):
212 """Returns time in seconds since epoch of time represented by a string.
213
214 Return value is an integer.
215
216 None is returned if the format of str is unrecognized, the time is outside
217 the representable range, or the timezone string is not recognized. If the
218 string contains no timezone, UTC is assumed.
219
220 The timezone in the string may be numerical (like "-0800" or "+0100") or a
221 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
222 timezone strings equivalent to UTC (zero offset) are known to the function.
223
224 The function loosely parses the following formats:
225
226 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
227 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
228 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
229 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
230 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
231 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
232
233 The parser ignores leading and trailing whitespace. The time may be
234 absent.
235
236 If the year is given with only 2 digits, the function will select the
237 century that makes the year closest to the current date.
238
239 """
240 # fast exit for strictly conforming string
241 m = STRICT_DATE_RE.search(text)
242 if m:
243 g = m.groups()
244 mon = MONTHS_LOWER.index(g[1].lower()) + 1
245 tt = (int(g[2]), mon, int(g[0]),
246 int(g[3]), int(g[4]), float(g[5]))
247 return _timegm(tt)
248
249 # No, we need some messy parsing...
250
251 # clean up
252 text = text.lstrip()
253 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
254
255 # tz is time zone specifier string
256 day, mon, yr, hr, min, sec, tz = [None]*7
257
258 # loose regexp parse
259 m = LOOSE_HTTP_DATE_RE.search(text)
260 if m is not None:
261 day, mon, yr, hr, min, sec, tz = m.groups()
262 else:
263 return None # bad format
264
265 return _str2time(day, mon, yr, hr, min, sec, tz)
266
267ISO_DATE_RE = re.compile(
268 """^
269 (\d{4}) # year
270 [-\/]?
271 (\d\d?) # numerical month
272 [-\/]?
273 (\d\d?) # day
274 (?:
275 (?:\s+|[-:Tt]) # separator before clock
276 (\d\d?):?(\d\d) # hour:min
277 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
278 )? # optional clock
279 \s*
280 ([-+]?\d\d?:?(:?\d\d)?
281 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
282 \s*$""", re.X)
283def iso2time(text):
284 """
285 As for http2time, but parses the ISO 8601 formats:
286
287 1994-02-03 14:15:29 -0100 -- ISO 8601 format
288 1994-02-03 14:15:29 -- zone is optional
289 1994-02-03 -- only date
290 1994-02-03T14:15:29 -- Use T as separator
291 19940203T141529Z -- ISO 8601 compact format
292 19940203 -- only date
293
294 """
295 # clean up
296 text = text.lstrip()
297
298 # tz is time zone specifier string
299 day, mon, yr, hr, min, sec, tz = [None]*7
300
301 # loose regexp parse
302 m = ISO_DATE_RE.search(text)
303 if m is not None:
304 # XXX there's an extra bit of the timezone I'm ignoring here: is
305 # this the right thing to do?
306 yr, mon, day, hr, min, sec, tz, _ = m.groups()
307 else:
308 return None # bad format
309
310 return _str2time(day, mon, yr, hr, min, sec, tz)
311
312
313# Header parsing
314# -----------------------------------------------------------------------------
315
316def unmatched(match):
317 """Return unmatched part of re.Match object."""
318 start, end = match.span(0)
319 return match.string[:start]+match.string[end:]
320
321HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
322HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
323HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
324HEADER_ESCAPE_RE = re.compile(r"\\(.)")
325def split_header_words(header_values):
326 r"""Parse header values into a list of lists containing key,value pairs.
327
328 The function knows how to deal with ",", ";" and "=" as well as quoted
329 values after "=". A list of space separated tokens are parsed as if they
330 were separated by ";".
331
332 If the header_values passed as argument contains multiple values, then they
333 are treated as if they were a single value separated by comma ",".
334
335 This means that this function is useful for parsing header fields that
336 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
337 the requirement for tokens).
338
339 headers = #header
340 header = (token | parameter) *( [";"] (token | parameter))
341
342 token = 1*<any CHAR except CTLs or separators>
343 separators = "(" | ")" | "<" | ">" | "@"
344 | "," | ";" | ":" | "\" | <">
345 | "/" | "[" | "]" | "?" | "="
346 | "{" | "}" | SP | HT
347
348 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
349 qdtext = <any TEXT except <">>
350 quoted-pair = "\" CHAR
351
352 parameter = attribute "=" value
353 attribute = token
354 value = token | quoted-string
355
356 Each header is represented by a list of key/value pairs. The value for a
357 simple token (not part of a parameter) is None. Syntactically incorrect
358 headers will not necessarily be parsed as you would want.
359
360 This is easier to describe with some examples:
361
362 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
363 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
364 >>> split_header_words(['text/html; charset="iso-8859-1"'])
365 [[('text/html', None), ('charset', 'iso-8859-1')]]
366 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
367 [[('Basic', None), ('realm', '"foobar"')]]
368
369 """
Raymond Hettingerf7153662005-02-07 14:16:21 +0000370 assert not isinstance(header_values, basestring)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000371 result = []
372 for text in header_values:
373 orig_text = text
374 pairs = []
375 while text:
376 m = HEADER_TOKEN_RE.search(text)
377 if m:
378 text = unmatched(m)
379 name = m.group(1)
380 m = HEADER_QUOTED_VALUE_RE.search(text)
381 if m: # quoted value
382 text = unmatched(m)
383 value = m.group(1)
384 value = HEADER_ESCAPE_RE.sub(r"\1", value)
385 else:
386 m = HEADER_VALUE_RE.search(text)
387 if m: # unquoted value
388 text = unmatched(m)
389 value = m.group(1)
390 value = value.rstrip()
391 else:
392 # no value, a lone token
393 value = None
394 pairs.append((name, value))
395 elif text.lstrip().startswith(","):
396 # concatenated headers, as per RFC 2616 section 4.2
397 text = text.lstrip()[1:]
398 if pairs: result.append(pairs)
399 pairs = []
400 else:
401 # skip junk
402 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
403 assert nr_junk_chars > 0, (
404 "split_header_words bug: '%s', '%s', %s" %
405 (orig_text, text, pairs))
406 text = non_junk
407 if pairs: result.append(pairs)
408 return result
409
410HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
411def join_header_words(lists):
412 """Do the inverse (almost) of the conversion done by split_header_words.
413
414 Takes a list of lists of (key, value) pairs and produces a single header
415 value. Attribute values are quoted if needed.
416
417 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
418 'text/plain; charset="iso-8859/1"'
419 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
420 'text/plain, charset="iso-8859/1"'
421
422 """
423 headers = []
424 for pairs in lists:
425 attr = []
426 for k, v in pairs:
427 if v is not None:
428 if not re.search(r"^\w+$", v):
429 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
430 v = '"%s"' % v
431 k = "%s=%s" % (k, v)
432 attr.append(k)
433 if attr: headers.append("; ".join(attr))
434 return ", ".join(headers)
435
436def parse_ns_headers(ns_headers):
437 """Ad-hoc parser for Netscape protocol cookie-attributes.
438
439 The old Netscape cookie format for Set-Cookie can for instance contain
440 an unquoted "," in the expires field, so we have to use this ad-hoc
441 parser instead of split_header_words.
442
443 XXX This may not make the best possible effort to parse all the crap
444 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
445 parser is probably better, so could do worse than following that if
446 this ever gives any trouble.
447
448 Currently, this is also used for parsing RFC 2109 cookies.
449
450 """
451 known_attrs = ("expires", "domain", "path", "secure",
452 # RFC 2109 attrs (may turn up in Netscape cookies, too)
453 "port", "max-age")
454
455 result = []
456 for ns_header in ns_headers:
457 pairs = []
458 version_set = False
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000459 for ii, param in enumerate(re.split(r";\s*", ns_header)):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000460 param = param.rstrip()
461 if param == "": continue
462 if "=" not in param:
Martin v. Löwisc5574e82005-03-03 10:57:37 +0000463 k, v = param, None
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000464 else:
465 k, v = re.split(r"\s*=\s*", param, 1)
466 k = k.lstrip()
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000467 if ii != 0:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000468 lc = k.lower()
469 if lc in known_attrs:
470 k = lc
471 if k == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000472 # This is an RFC 2109 cookie.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000473 version_set = True
474 if k == "expires":
475 # convert expires date to seconds since epoch
476 if v.startswith('"'): v = v[1:]
477 if v.endswith('"'): v = v[:-1]
478 v = http2time(v) # None if invalid
479 pairs.append((k, v))
480
481 if pairs:
482 if not version_set:
483 pairs.append(("version", "0"))
484 result.append(pairs)
485
486 return result
487
488
489IPV4_RE = re.compile(r"\.\d+$")
490def is_HDN(text):
491 """Return True if text is a host domain name."""
492 # XXX
493 # This may well be wrong. Which RFC is HDN defined in, if any (for
494 # the purposes of RFC 2965)?
495 # For the current implementation, what about IPv6? Remember to look
496 # at other uses of IPV4_RE also, if change this.
497 if IPV4_RE.search(text):
498 return False
499 if text == "":
500 return False
501 if text[0] == "." or text[-1] == ".":
502 return False
503 return True
504
505def domain_match(A, B):
506 """Return True if domain A domain-matches domain B, according to RFC 2965.
507
508 A and B may be host domain names or IP addresses.
509
510 RFC 2965, section 1:
511
512 Host names can be specified either as an IP address or a HDN string.
513 Sometimes we compare one host name with another. (Such comparisons SHALL
514 be case-insensitive.) Host A's name domain-matches host B's if
515
516 * their host name strings string-compare equal; or
517
518 * A is a HDN string and has the form NB, where N is a non-empty
519 name string, B has the form .B', and B' is a HDN string. (So,
520 x.y.com domain-matches .Y.com but not Y.com.)
521
522 Note that domain-match is not a commutative operation: a.b.c.com
523 domain-matches .c.com, but not the reverse.
524
525 """
526 # Note that, if A or B are IP addresses, the only relevant part of the
527 # definition of the domain-match algorithm is the direct string-compare.
528 A = A.lower()
529 B = B.lower()
530 if A == B:
531 return True
532 if not is_HDN(A):
533 return False
534 i = A.rfind(B)
535 if i == -1 or i == 0:
536 # A does not have form NB, or N is the empty string
537 return False
538 if not B.startswith("."):
539 return False
540 if not is_HDN(B[1:]):
541 return False
542 return True
543
544def liberal_is_HDN(text):
545 """Return True if text is a sort-of-like a host domain name.
546
547 For accepting/blocking domains.
548
549 """
550 if IPV4_RE.search(text):
551 return False
552 return True
553
554def user_domain_match(A, B):
555 """For blocking/accepting domains.
556
557 A and B may be host domain names or IP addresses.
558
559 """
560 A = A.lower()
561 B = B.lower()
562 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
563 if A == B:
564 # equal IP addresses
565 return True
566 return False
567 initial_dot = B.startswith(".")
568 if initial_dot and A.endswith(B):
569 return True
570 if not initial_dot and A == B:
571 return True
572 return False
573
574cut_port_re = re.compile(r":\d+$")
575def request_host(request):
576 """Return request-host, as defined by RFC 2965.
577
578 Variation from RFC: returned value is lowercased, for convenient
579 comparison.
580
581 """
582 url = request.get_full_url()
583 host = urlparse.urlparse(url)[1]
584 if host == "":
585 host = request.get_header("Host", "")
586
587 # remove port, if present
588 host = cut_port_re.sub("", host, 1)
589 return host.lower()
590
591def eff_request_host(request):
592 """Return a tuple (request-host, effective request-host name).
593
594 As defined by RFC 2965, except both are lowercased.
595
596 """
597 erhn = req_host = request_host(request)
598 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
599 erhn = req_host + ".local"
600 return req_host, erhn
601
602def request_path(request):
603 """request-URI, as defined by RFC 2965."""
604 url = request.get_full_url()
605 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
606 #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
607 path, parameters, query, frag = urlparse.urlparse(url)[2:]
608 if parameters:
609 path = "%s;%s" % (path, parameters)
610 path = escape_path(path)
611 req_path = urlparse.urlunparse(("", "", path, "", query, frag))
612 if not req_path.startswith("/"):
613 # fix bad RFC 2396 absoluteURI
614 req_path = "/"+req_path
615 return req_path
616
617def request_port(request):
618 host = request.get_host()
619 i = host.find(':')
620 if i >= 0:
621 port = host[i+1:]
622 try:
623 int(port)
624 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000625 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000626 return None
627 else:
628 port = DEFAULT_HTTP_PORT
629 return port
630
631# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
632# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
633HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
634ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
635def uppercase_escaped_char(match):
636 return "%%%s" % match.group(1).upper()
637def escape_path(path):
638 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
639 # There's no knowing what character encoding was used to create URLs
640 # containing %-escapes, but since we have to pick one to escape invalid
641 # path characters, we pick UTF-8, as recommended in the HTML 4.0
642 # specification:
643 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
644 # And here, kind of: draft-fielding-uri-rfc2396bis-03
645 # (And in draft IRI specification: draft-duerst-iri-05)
646 # (And here, for new URI schemes: RFC 2718)
Neal Norwitz2fa0b9d2004-10-17 16:23:52 +0000647 if isinstance(path, unicode):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000648 path = path.encode("utf-8")
649 path = urllib.quote(path, HTTP_PATH_SAFE)
650 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
651 return path
652
653def reach(h):
654 """Return reach of host h, as defined by RFC 2965, section 1.
655
656 The reach R of a host name H is defined as follows:
657
658 * If
659
660 - H is the host domain name of a host; and,
661
662 - H has the form A.B; and
663
664 - A has no embedded (that is, interior) dots; and
665
666 - B has at least one embedded dot, or B is the string "local".
667 then the reach of H is .B.
668
669 * Otherwise, the reach of H is H.
670
671 >>> reach("www.acme.com")
672 '.acme.com'
673 >>> reach("acme.com")
674 'acme.com'
675 >>> reach("acme.local")
676 '.local'
677
678 """
679 i = h.find(".")
680 if i >= 0:
681 #a = h[:i] # this line is only here to show what a is
682 b = h[i+1:]
683 i = b.find(".")
684 if is_HDN(h) and (i >= 0 or b == "local"):
685 return "."+b
686 return h
687
688def is_third_party(request):
689 """
690
691 RFC 2965, section 3.3.6:
692
693 An unverifiable transaction is to a third-party host if its request-
694 host U does not domain-match the reach R of the request-host O in the
695 origin transaction.
696
697 """
698 req_host = request_host(request)
699 if not domain_match(req_host, reach(request.get_origin_req_host())):
700 return True
701 else:
702 return False
703
704
705class Cookie:
706 """HTTP Cookie.
707
708 This class represents both Netscape and RFC 2965 cookies.
709
710 This is deliberately a very simple class. It just holds attributes. It's
711 possible to construct Cookie instances that don't comply with the cookie
712 standards. CookieJar.make_cookies is the factory function for Cookie
713 objects -- it deals with cookie parsing, supplying defaults, and
714 normalising to the representation used in this class. CookiePolicy is
715 responsible for checking them to see whether they should be accepted from
716 and returned to the server.
717
718 Note that the port may be present in the headers, but unspecified ("Port"
719 rather than"Port=80", for example); if this is the case, port is None.
720
721 """
722
723 def __init__(self, version, name, value,
724 port, port_specified,
725 domain, domain_specified, domain_initial_dot,
726 path, path_specified,
727 secure,
728 expires,
729 discard,
730 comment,
731 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000732 rest,
733 rfc2109=False,
734 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000735
736 if version is not None: version = int(version)
737 if expires is not None: expires = int(expires)
738 if port is None and port_specified is True:
739 raise ValueError("if port is None, port_specified must be false")
740
741 self.version = version
742 self.name = name
743 self.value = value
744 self.port = port
745 self.port_specified = port_specified
746 # normalise case, as per RFC 2965 section 3.3.3
747 self.domain = domain.lower()
748 self.domain_specified = domain_specified
749 # Sigh. We need to know whether the domain given in the
750 # cookie-attribute had an initial dot, in order to follow RFC 2965
751 # (as clarified in draft errata). Needed for the returned $Domain
752 # value.
753 self.domain_initial_dot = domain_initial_dot
754 self.path = path
755 self.path_specified = path_specified
756 self.secure = secure
757 self.expires = expires
758 self.discard = discard
759 self.comment = comment
760 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000761 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000762
763 self._rest = copy.copy(rest)
764
765 def has_nonstandard_attr(self, name):
766 return name in self._rest
767 def get_nonstandard_attr(self, name, default=None):
768 return self._rest.get(name, default)
769 def set_nonstandard_attr(self, name, value):
770 self._rest[name] = value
771
772 def is_expired(self, now=None):
773 if now is None: now = time.time()
774 if (self.expires is not None) and (self.expires <= now):
775 return True
776 return False
777
778 def __str__(self):
779 if self.port is None: p = ""
780 else: p = ":"+self.port
781 limit = self.domain + p + self.path
782 if self.value is not None:
783 namevalue = "%s=%s" % (self.name, self.value)
784 else:
785 namevalue = self.name
786 return "<Cookie %s for %s>" % (namevalue, limit)
787
788 def __repr__(self):
789 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000790 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000791 "port", "port_specified",
792 "domain", "domain_specified", "domain_initial_dot",
793 "path", "path_specified",
794 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000795 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000796 attr = getattr(self, name)
797 args.append("%s=%s" % (name, repr(attr)))
798 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000799 args.append("rfc2109=%s" % repr(self.rfc2109))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000800 return "Cookie(%s)" % ", ".join(args)
801
802
803class CookiePolicy:
804 """Defines which cookies get accepted from and returned to server.
805
806 May also modify cookies, though this is probably a bad idea.
807
808 The subclass DefaultCookiePolicy defines the standard rules for Netscape
809 and RFC 2965 cookies -- override that if you want a customised policy.
810
811 """
812 def set_ok(self, cookie, request):
813 """Return true if (and only if) cookie should be accepted from server.
814
815 Currently, pre-expired cookies never get this far -- the CookieJar
816 class deletes such cookies itself.
817
818 """
819 raise NotImplementedError()
820
821 def return_ok(self, cookie, request):
822 """Return true if (and only if) cookie should be returned to server."""
823 raise NotImplementedError()
824
825 def domain_return_ok(self, domain, request):
826 """Return false if cookies should not be returned, given cookie domain.
827 """
828 return True
829
830 def path_return_ok(self, path, request):
831 """Return false if cookies should not be returned, given cookie path.
832 """
833 return True
834
835
836class DefaultCookiePolicy(CookiePolicy):
837 """Implements the standard rules for accepting and returning cookies."""
838
839 DomainStrictNoDots = 1
840 DomainStrictNonDomain = 2
841 DomainRFC2965Match = 4
842
843 DomainLiberal = 0
844 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
845
846 def __init__(self,
847 blocked_domains=None, allowed_domains=None,
848 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000849 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000850 hide_cookie2=False,
851 strict_domain=False,
852 strict_rfc2965_unverifiable=True,
853 strict_ns_unverifiable=False,
854 strict_ns_domain=DomainLiberal,
855 strict_ns_set_initial_dollar=False,
856 strict_ns_set_path=False,
857 ):
858 """Constructor arguments should be passed as keyword arguments only."""
859 self.netscape = netscape
860 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000861 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000862 self.hide_cookie2 = hide_cookie2
863 self.strict_domain = strict_domain
864 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
865 self.strict_ns_unverifiable = strict_ns_unverifiable
866 self.strict_ns_domain = strict_ns_domain
867 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
868 self.strict_ns_set_path = strict_ns_set_path
869
870 if blocked_domains is not None:
871 self._blocked_domains = tuple(blocked_domains)
872 else:
873 self._blocked_domains = ()
874
875 if allowed_domains is not None:
876 allowed_domains = tuple(allowed_domains)
877 self._allowed_domains = allowed_domains
878
879 def blocked_domains(self):
880 """Return the sequence of blocked domains (as a tuple)."""
881 return self._blocked_domains
882 def set_blocked_domains(self, blocked_domains):
883 """Set the sequence of blocked domains."""
884 self._blocked_domains = tuple(blocked_domains)
885
886 def is_blocked(self, domain):
887 for blocked_domain in self._blocked_domains:
888 if user_domain_match(domain, blocked_domain):
889 return True
890 return False
891
892 def allowed_domains(self):
893 """Return None, or the sequence of allowed domains (as a tuple)."""
894 return self._allowed_domains
895 def set_allowed_domains(self, allowed_domains):
896 """Set the sequence of allowed domains, or None."""
897 if allowed_domains is not None:
898 allowed_domains = tuple(allowed_domains)
899 self._allowed_domains = allowed_domains
900
901 def is_not_allowed(self, domain):
902 if self._allowed_domains is None:
903 return False
904 for allowed_domain in self._allowed_domains:
905 if user_domain_match(domain, allowed_domain):
906 return False
907 return True
908
909 def set_ok(self, cookie, request):
910 """
911 If you override .set_ok(), be sure to call this method. If it returns
912 false, so should your subclass (assuming your subclass wants to be more
913 strict about which cookies to accept).
914
915 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000916 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000917
918 assert cookie.name is not None
919
920 for n in "version", "verifiability", "name", "path", "domain", "port":
921 fn_name = "set_ok_"+n
922 fn = getattr(self, fn_name)
923 if not fn(cookie, request):
924 return False
925
926 return True
927
928 def set_ok_version(self, cookie, request):
929 if cookie.version is None:
930 # Version is always set to 0 by parse_ns_headers if it's a Netscape
931 # cookie, so this must be an invalid RFC 2965 cookie.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000932 _debug(" Set-Cookie2 without version attribute (%s=%s)",
933 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000934 return False
935 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000936 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000937 return False
938 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000939 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000940 return False
941 return True
942
943 def set_ok_verifiability(self, cookie, request):
944 if request.is_unverifiable() and is_third_party(request):
945 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000946 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947 "unverifiable transaction")
948 return False
949 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000950 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000951 "unverifiable transaction")
952 return False
953 return True
954
955 def set_ok_name(self, cookie, request):
956 # Try and stop servers setting V0 cookies designed to hack other
957 # servers that know both V0 and V1 protocols.
958 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
959 cookie.name.startswith("$")):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000960 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000961 return False
962 return True
963
964 def set_ok_path(self, cookie, request):
965 if cookie.path_specified:
966 req_path = request_path(request)
967 if ((cookie.version > 0 or
968 (cookie.version == 0 and self.strict_ns_set_path)) and
969 not req_path.startswith(cookie.path)):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000970 _debug(" path attribute %s is not a prefix of request "
971 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000972 return False
973 return True
974
975 def set_ok_domain(self, cookie, request):
976 if self.is_blocked(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000977 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000978 return False
979 if self.is_not_allowed(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000980 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000981 return False
982 if cookie.domain_specified:
983 req_host, erhn = eff_request_host(request)
984 domain = cookie.domain
985 if self.strict_domain and (domain.count(".") >= 2):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000986 # XXX This should probably be compared with the Konqueror
987 # (kcookiejar.cpp) and Mozilla implementations, but it's a
988 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000989 i = domain.rfind(".")
990 j = domain.rfind(".", 0, i)
991 if j == 0: # domain like .foo.bar
992 tld = domain[i+1:]
993 sld = domain[j+1:i]
Thomas Wouters477c8d52006-05-27 19:21:47 +0000994 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
995 "gov", "mil", "int", "aero", "biz", "cat", "coop",
996 "info", "jobs", "mobi", "museum", "name", "pro",
997 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000998 # domain like .co.uk
Thomas Wouters477c8d52006-05-27 19:21:47 +0000999 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001000 return False
1001 if domain.startswith("."):
1002 undotted_domain = domain[1:]
1003 else:
1004 undotted_domain = domain
1005 embedded_dots = (undotted_domain.find(".") >= 0)
1006 if not embedded_dots and domain != ".local":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001007 _debug(" non-local domain %s contains no embedded dot",
1008 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001009 return False
1010 if cookie.version == 0:
1011 if (not erhn.endswith(domain) and
1012 (not erhn.startswith(".") and
1013 not ("."+erhn).endswith(domain))):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001014 _debug(" effective request-host %s (even with added "
1015 "initial dot) does not end end with %s",
1016 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001017 return False
1018 if (cookie.version > 0 or
1019 (self.strict_ns_domain & self.DomainRFC2965Match)):
1020 if not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001021 _debug(" effective request-host %s does not domain-match "
1022 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001023 return False
1024 if (cookie.version > 0 or
1025 (self.strict_ns_domain & self.DomainStrictNoDots)):
1026 host_prefix = req_host[:-len(domain)]
1027 if (host_prefix.find(".") >= 0 and
1028 not IPV4_RE.search(req_host)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001029 _debug(" host prefix %s for domain %s contains a dot",
1030 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001031 return False
1032 return True
1033
1034 def set_ok_port(self, cookie, request):
1035 if cookie.port_specified:
1036 req_port = request_port(request)
1037 if req_port is None:
1038 req_port = "80"
1039 else:
1040 req_port = str(req_port)
1041 for p in cookie.port.split(","):
1042 try:
1043 int(p)
1044 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001045 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001046 return False
1047 if p == req_port:
1048 break
1049 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001050 _debug(" request port (%s) not found in %s",
1051 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001052 return False
1053 return True
1054
1055 def return_ok(self, cookie, request):
1056 """
1057 If you override .return_ok(), be sure to call this method. If it
1058 returns false, so should your subclass (assuming your subclass wants to
1059 be more strict about which cookies to return).
1060
1061 """
1062 # Path has already been checked by .path_return_ok(), and domain
1063 # blocking done by .domain_return_ok().
Thomas Wouters477c8d52006-05-27 19:21:47 +00001064 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001065
1066 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1067 fn_name = "return_ok_"+n
1068 fn = getattr(self, fn_name)
1069 if not fn(cookie, request):
1070 return False
1071 return True
1072
1073 def return_ok_version(self, cookie, request):
1074 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001076 return False
1077 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001078 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001079 return False
1080 return True
1081
1082 def return_ok_verifiability(self, cookie, request):
1083 if request.is_unverifiable() and is_third_party(request):
1084 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001085 _debug(" third-party RFC 2965 cookie during unverifiable "
1086 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001087 return False
1088 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001089 _debug(" third-party Netscape cookie during unverifiable "
1090 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001091 return False
1092 return True
1093
1094 def return_ok_secure(self, cookie, request):
1095 if cookie.secure and request.get_type() != "https":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001096 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001097 return False
1098 return True
1099
1100 def return_ok_expires(self, cookie, request):
1101 if cookie.is_expired(self._now):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001102 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001103 return False
1104 return True
1105
1106 def return_ok_port(self, cookie, request):
1107 if cookie.port:
1108 req_port = request_port(request)
1109 if req_port is None:
1110 req_port = "80"
1111 for p in cookie.port.split(","):
1112 if p == req_port:
1113 break
1114 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001115 _debug(" request port %s does not match cookie port %s",
1116 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001117 return False
1118 return True
1119
1120 def return_ok_domain(self, cookie, request):
1121 req_host, erhn = eff_request_host(request)
1122 domain = cookie.domain
1123
1124 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1125 if (cookie.version == 0 and
1126 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1127 not cookie.domain_specified and domain != erhn):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001128 _debug(" cookie with unspecified domain does not string-compare "
1129 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001130 return False
1131
1132 if cookie.version > 0 and not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001133 _debug(" effective request-host name %s does not domain-match "
1134 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001135 return False
1136 if cookie.version == 0 and not ("."+erhn).endswith(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001137 _debug(" request-host %s does not match Netscape cookie domain "
1138 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001139 return False
1140 return True
1141
1142 def domain_return_ok(self, domain, request):
1143 # Liberal check of. This is here as an optimization to avoid
1144 # having to load lots of MSIE cookie files unless necessary.
1145 req_host, erhn = eff_request_host(request)
1146 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001147 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001148 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001149 erhn = "."+erhn
1150 if not (req_host.endswith(domain) or erhn.endswith(domain)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001151 #_debug(" request domain %s does not match cookie domain %s",
1152 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001153 return False
1154
1155 if self.is_blocked(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001156 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001157 return False
1158 if self.is_not_allowed(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001159 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001160 return False
1161
1162 return True
1163
1164 def path_return_ok(self, path, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001165 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001166 req_path = request_path(request)
1167 if not req_path.startswith(path):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001168 _debug(" %s does not path-match %s", req_path, path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001169 return False
1170 return True
1171
1172
1173def vals_sorted_by_key(adict):
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001174 keys = sorted(adict.keys())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001175 return map(adict.get, keys)
1176
1177def deepvalues(mapping):
1178 """Iterates over nested mapping, depth-first, in sorted order by key."""
1179 values = vals_sorted_by_key(mapping)
1180 for obj in values:
1181 mapping = False
1182 try:
1183 obj.items
1184 except AttributeError:
1185 pass
1186 else:
1187 mapping = True
1188 for subobj in deepvalues(obj):
1189 yield subobj
1190 if not mapping:
1191 yield obj
1192
1193
1194# Used as second parameter to dict.get() method, to distinguish absent
1195# dict key from one with a None value.
1196class Absent: pass
1197
1198class CookieJar:
1199 """Collection of HTTP cookies.
1200
1201 You may not need to know about this class: try
1202 urllib2.build_opener(HTTPCookieProcessor).open(url).
1203
1204 """
1205
1206 non_word_re = re.compile(r"\W")
1207 quote_re = re.compile(r"([\"\\])")
1208 strict_domain_re = re.compile(r"\.?[^.]*")
1209 domain_re = re.compile(r"[^.]*")
1210 dots_re = re.compile(r"^\.+")
1211
1212 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1213
1214 def __init__(self, policy=None):
1215 if policy is None:
1216 policy = DefaultCookiePolicy()
1217 self._policy = policy
1218
1219 self._cookies_lock = _threading.RLock()
1220 self._cookies = {}
1221
1222 def set_policy(self, policy):
1223 self._policy = policy
1224
1225 def _cookies_for_domain(self, domain, request):
1226 cookies = []
1227 if not self._policy.domain_return_ok(domain, request):
1228 return []
Thomas Wouters477c8d52006-05-27 19:21:47 +00001229 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001230 cookies_by_path = self._cookies[domain]
1231 for path in cookies_by_path.keys():
1232 if not self._policy.path_return_ok(path, request):
1233 continue
1234 cookies_by_name = cookies_by_path[path]
1235 for cookie in cookies_by_name.values():
1236 if not self._policy.return_ok(cookie, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001237 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001238 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001239 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001240 cookies.append(cookie)
1241 return cookies
1242
1243 def _cookies_for_request(self, request):
1244 """Return a list of cookies to be returned to server."""
1245 cookies = []
1246 for domain in self._cookies.keys():
1247 cookies.extend(self._cookies_for_domain(domain, request))
1248 return cookies
1249
1250 def _cookie_attrs(self, cookies):
1251 """Return a list of cookie-attributes to be returned to server.
1252
1253 like ['foo="bar"; $Path="/"', ...]
1254
1255 The $Version attribute is also added when appropriate (currently only
1256 once per request).
1257
1258 """
1259 # add cookies in order of most specific (ie. longest) path first
1260 def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1261 cookies.sort(decreasing_size)
1262
1263 version_set = False
1264
1265 attrs = []
1266 for cookie in cookies:
1267 # set version of Cookie header
1268 # XXX
1269 # What should it be if multiple matching Set-Cookie headers have
1270 # different versions themselves?
1271 # Answer: there is no answer; was supposed to be settled by
1272 # RFC 2965 errata, but that may never appear...
1273 version = cookie.version
1274 if not version_set:
1275 version_set = True
1276 if version > 0:
1277 attrs.append("$Version=%s" % version)
1278
1279 # quote cookie value if necessary
1280 # (not for Netscape protocol, which already has any quotes
1281 # intact, due to the poorly-specified Netscape Cookie: syntax)
1282 if ((cookie.value is not None) and
1283 self.non_word_re.search(cookie.value) and version > 0):
1284 value = self.quote_re.sub(r"\\\1", cookie.value)
1285 else:
1286 value = cookie.value
1287
1288 # add cookie-attributes to be returned in Cookie header
1289 if cookie.value is None:
1290 attrs.append(cookie.name)
1291 else:
1292 attrs.append("%s=%s" % (cookie.name, value))
1293 if version > 0:
1294 if cookie.path_specified:
1295 attrs.append('$Path="%s"' % cookie.path)
1296 if cookie.domain.startswith("."):
1297 domain = cookie.domain
1298 if (not cookie.domain_initial_dot and
1299 domain.startswith(".")):
1300 domain = domain[1:]
1301 attrs.append('$Domain="%s"' % domain)
1302 if cookie.port is not None:
1303 p = "$Port"
1304 if cookie.port_specified:
1305 p = p + ('="%s"' % cookie.port)
1306 attrs.append(p)
1307
1308 return attrs
1309
1310 def add_cookie_header(self, request):
1311 """Add correct Cookie: header to request (urllib2.Request object).
1312
1313 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1314
1315 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001316 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001317 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001318 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001319
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001320 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001321
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001322 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001323
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001324 attrs = self._cookie_attrs(cookies)
1325 if attrs:
1326 if not request.has_header("Cookie"):
1327 request.add_unredirected_header(
1328 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001329
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001330 # if necessary, advertise that we know RFC 2965
1331 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1332 not request.has_header("Cookie2")):
1333 for cookie in cookies:
1334 if cookie.version != 1:
1335 request.add_unredirected_header("Cookie2", '$Version="1"')
1336 break
1337
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001338 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001339 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001340
1341 self.clear_expired_cookies()
1342
1343 def _normalized_cookie_tuples(self, attrs_set):
1344 """Return list of tuples containing normalised cookie information.
1345
1346 attrs_set is the list of lists of key,value pairs extracted from
1347 the Set-Cookie or Set-Cookie2 headers.
1348
1349 Tuples are name, value, standard, rest, where name and value are the
1350 cookie name and value, standard is a dictionary containing the standard
1351 cookie-attributes (discard, secure, version, expires or max-age,
1352 domain, path and port) and rest is a dictionary containing the rest of
1353 the cookie-attributes.
1354
1355 """
1356 cookie_tuples = []
1357
1358 boolean_attrs = "discard", "secure"
1359 value_attrs = ("version",
1360 "expires", "max-age",
1361 "domain", "path", "port",
1362 "comment", "commenturl")
1363
1364 for cookie_attrs in attrs_set:
1365 name, value = cookie_attrs[0]
1366
1367 # Build dictionary of standard cookie-attributes (standard) and
1368 # dictionary of other cookie-attributes (rest).
1369
1370 # Note: expiry time is normalised to seconds since epoch. V0
1371 # cookies should have the Expires cookie-attribute, and V1 cookies
1372 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1373 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1374 # accept either (but prefer Max-Age).
1375 max_age_set = False
1376
1377 bad_cookie = False
1378
1379 standard = {}
1380 rest = {}
1381 for k, v in cookie_attrs[1:]:
1382 lc = k.lower()
1383 # don't lose case distinction for unknown fields
1384 if lc in value_attrs or lc in boolean_attrs:
1385 k = lc
1386 if k in boolean_attrs and v is None:
1387 # boolean cookie-attribute is present, but has no value
1388 # (like "discard", rather than "port=80")
1389 v = True
1390 if k in standard:
1391 # only first value is significant
1392 continue
1393 if k == "domain":
1394 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001395 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001396 bad_cookie = True
1397 break
1398 # RFC 2965 section 3.3.3
1399 v = v.lower()
1400 if k == "expires":
1401 if max_age_set:
1402 # Prefer max-age to expires (like Mozilla)
1403 continue
1404 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001406 "attribute: treating as session cookie")
1407 continue
1408 if k == "max-age":
1409 max_age_set = True
1410 try:
1411 v = int(v)
1412 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001413 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001414 "max-age attribute")
1415 bad_cookie = True
1416 break
1417 # convert RFC 2965 Max-Age to seconds since epoch
1418 # XXX Strictly you're supposed to follow RFC 2616
1419 # age-calculation rules. Remember that zero Max-Age is a
1420 # is a request to discard (old and new) cookie, though.
1421 k = "expires"
1422 v = self._now + v
1423 if (k in value_attrs) or (k in boolean_attrs):
1424 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001425 k not in ("port", "comment", "commenturl")):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001426 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001427 bad_cookie = True
1428 break
1429 standard[k] = v
1430 else:
1431 rest[k] = v
1432
1433 if bad_cookie:
1434 continue
1435
1436 cookie_tuples.append((name, value, standard, rest))
1437
1438 return cookie_tuples
1439
1440 def _cookie_from_cookie_tuple(self, tup, request):
1441 # standard is dict of standard cookie-attributes, rest is dict of the
1442 # rest of them
1443 name, value, standard, rest = tup
1444
1445 domain = standard.get("domain", Absent)
1446 path = standard.get("path", Absent)
1447 port = standard.get("port", Absent)
1448 expires = standard.get("expires", Absent)
1449
1450 # set the easy defaults
1451 version = standard.get("version", None)
1452 if version is not None: version = int(version)
1453 secure = standard.get("secure", False)
1454 # (discard is also set if expires is Absent)
1455 discard = standard.get("discard", False)
1456 comment = standard.get("comment", None)
1457 comment_url = standard.get("commenturl", None)
1458
1459 # set default path
1460 if path is not Absent and path != "":
1461 path_specified = True
1462 path = escape_path(path)
1463 else:
1464 path_specified = False
1465 path = request_path(request)
1466 i = path.rfind("/")
1467 if i != -1:
1468 if version == 0:
1469 # Netscape spec parts company from reality here
1470 path = path[:i]
1471 else:
1472 path = path[:i+1]
1473 if len(path) == 0: path = "/"
1474
1475 # set default domain
1476 domain_specified = domain is not Absent
1477 # but first we have to remember whether it starts with a dot
1478 domain_initial_dot = False
1479 if domain_specified:
1480 domain_initial_dot = bool(domain.startswith("."))
1481 if domain is Absent:
1482 req_host, erhn = eff_request_host(request)
1483 domain = erhn
1484 elif not domain.startswith("."):
1485 domain = "."+domain
1486
1487 # set default port
1488 port_specified = False
1489 if port is not Absent:
1490 if port is None:
1491 # Port attr present, but has no value: default to request port.
1492 # Cookie should then only be sent back on that port.
1493 port = request_port(request)
1494 else:
1495 port_specified = True
1496 port = re.sub(r"\s+", "", port)
1497 else:
1498 # No port attr present. Cookie can be sent back on any port.
1499 port = None
1500
1501 # set default expires and discard
1502 if expires is Absent:
1503 expires = None
1504 discard = True
1505 elif expires <= self._now:
1506 # Expiry date in past is request to delete cookie. This can't be
1507 # in DefaultCookiePolicy, because can't delete cookies there.
1508 try:
1509 self.clear(domain, path, name)
1510 except KeyError:
1511 pass
Thomas Wouters477c8d52006-05-27 19:21:47 +00001512 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1513 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001514 return None
1515
1516 return Cookie(version,
1517 name, value,
1518 port, port_specified,
1519 domain, domain_specified, domain_initial_dot,
1520 path, path_specified,
1521 secure,
1522 expires,
1523 discard,
1524 comment,
1525 comment_url,
1526 rest)
1527
1528 def _cookies_from_attrs_set(self, attrs_set, request):
1529 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1530
1531 cookies = []
1532 for tup in cookie_tuples:
1533 cookie = self._cookie_from_cookie_tuple(tup, request)
1534 if cookie: cookies.append(cookie)
1535 return cookies
1536
Neal Norwitz71dad722005-12-23 21:43:48 +00001537 def _process_rfc2109_cookies(self, cookies):
1538 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1539 if rfc2109_as_ns is None:
1540 rfc2109_as_ns = not self._policy.rfc2965
1541 for cookie in cookies:
1542 if cookie.version == 1:
1543 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001544 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001545 # treat 2109 cookies as Netscape cookies rather than
1546 # as RFC2965 cookies
1547 cookie.version = 0
1548
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001549 def make_cookies(self, response, request):
1550 """Return sequence of Cookie objects extracted from response object."""
1551 # get cookie-attributes for RFC 2965 and Netscape protocols
1552 headers = response.info()
1553 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1554 ns_hdrs = headers.getheaders("Set-Cookie")
1555
1556 rfc2965 = self._policy.rfc2965
1557 netscape = self._policy.netscape
1558
1559 if ((not rfc2965_hdrs and not ns_hdrs) or
1560 (not ns_hdrs and not rfc2965) or
1561 (not rfc2965_hdrs and not netscape) or
1562 (not netscape and not rfc2965)):
1563 return [] # no relevant cookie headers: quick exit
1564
1565 try:
1566 cookies = self._cookies_from_attrs_set(
1567 split_header_words(rfc2965_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001568 except Exception:
1569 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001570 cookies = []
1571
1572 if ns_hdrs and netscape:
1573 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001574 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001575 ns_cookies = self._cookies_from_attrs_set(
1576 parse_ns_headers(ns_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 except Exception:
1578 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001579 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001580 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001581
1582 # Look for Netscape cookies (from Set-Cookie headers) that match
1583 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1584 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1585 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1586 # bundled in with the Netscape cookies for this purpose, which is
1587 # reasonable behaviour.
1588 if rfc2965:
1589 lookup = {}
1590 for cookie in cookies:
1591 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1592
1593 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1594 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1595 return key not in lookup
1596 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1597
1598 if ns_cookies:
1599 cookies.extend(ns_cookies)
1600
1601 return cookies
1602
1603 def set_cookie_if_ok(self, cookie, request):
1604 """Set a cookie if policy says it's OK to do so."""
1605 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001606 try:
1607 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001608
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001609 if self._policy.set_ok(cookie, request):
1610 self.set_cookie(cookie)
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001611
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001612
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001613 finally:
1614 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001615
1616 def set_cookie(self, cookie):
1617 """Set a cookie, without checking whether or not it should be set."""
1618 c = self._cookies
1619 self._cookies_lock.acquire()
1620 try:
1621 if cookie.domain not in c: c[cookie.domain] = {}
1622 c2 = c[cookie.domain]
1623 if cookie.path not in c2: c2[cookie.path] = {}
1624 c3 = c2[cookie.path]
1625 c3[cookie.name] = cookie
1626 finally:
1627 self._cookies_lock.release()
1628
1629 def extract_cookies(self, response, request):
1630 """Extract cookies from response, where allowable given the request."""
Thomas Wouters477c8d52006-05-27 19:21:47 +00001631 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001632 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001633 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001634 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001635
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001636 for cookie in self.make_cookies(response, request):
1637 if self._policy.set_ok(cookie, request):
1638 _debug(" setting cookie: %s", cookie)
1639 self.set_cookie(cookie)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001640 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001641 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001642
1643 def clear(self, domain=None, path=None, name=None):
1644 """Clear some cookies.
1645
1646 Invoking this method without arguments will clear all cookies. If
1647 given a single argument, only cookies belonging to that domain will be
1648 removed. If given two arguments, cookies belonging to the specified
1649 path within that domain are removed. If given three arguments, then
1650 the cookie with the specified name, path and domain is removed.
1651
1652 Raises KeyError if no matching cookie exists.
1653
1654 """
1655 if name is not None:
1656 if (domain is None) or (path is None):
1657 raise ValueError(
1658 "domain and path must be given to remove a cookie by name")
1659 del self._cookies[domain][path][name]
1660 elif path is not None:
1661 if domain is None:
1662 raise ValueError(
1663 "domain must be given to remove cookies by path")
1664 del self._cookies[domain][path]
1665 elif domain is not None:
1666 del self._cookies[domain]
1667 else:
1668 self._cookies = {}
1669
1670 def clear_session_cookies(self):
1671 """Discard all session cookies.
1672
1673 Note that the .save() method won't save session cookies anyway, unless
1674 you ask otherwise by passing a true ignore_discard argument.
1675
1676 """
1677 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001678 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001679 for cookie in self:
1680 if cookie.discard:
1681 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001682 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001683 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001684
1685 def clear_expired_cookies(self):
1686 """Discard all expired cookies.
1687
1688 You probably don't need to call this method: expired cookies are never
1689 sent back to the server (provided you're using DefaultCookiePolicy),
1690 this method is called by CookieJar itself every so often, and the
1691 .save() method won't save expired cookies anyway (unless you ask
1692 otherwise by passing a true ignore_expires argument).
1693
1694 """
1695 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001696 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001697 now = time.time()
1698 for cookie in self:
1699 if cookie.is_expired(now):
1700 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001701 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001702 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001703
1704 def __iter__(self):
1705 return deepvalues(self._cookies)
1706
1707 def __len__(self):
1708 """Return number of contained cookies."""
1709 i = 0
1710 for cookie in self: i = i + 1
1711 return i
1712
1713 def __repr__(self):
1714 r = []
1715 for cookie in self: r.append(repr(cookie))
1716 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1717
1718 def __str__(self):
1719 r = []
1720 for cookie in self: r.append(str(cookie))
1721 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1722
1723
Neal Norwitz3e7de592005-12-23 21:24:35 +00001724# derives from IOError for backwards-compatibility with Python 2.4.0
1725class LoadError(IOError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001726
1727class FileCookieJar(CookieJar):
1728 """CookieJar that can be loaded from and saved to a file."""
1729
1730 def __init__(self, filename=None, delayload=False, policy=None):
1731 """
1732 Cookies are NOT loaded from the named file until either the .load() or
1733 .revert() method is called.
1734
1735 """
1736 CookieJar.__init__(self, policy)
1737 if filename is not None:
1738 try:
1739 filename+""
1740 except:
1741 raise ValueError("filename must be string-like")
1742 self.filename = filename
1743 self.delayload = bool(delayload)
1744
1745 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1746 """Save cookies to a file."""
1747 raise NotImplementedError()
1748
1749 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1750 """Load cookies from a file."""
1751 if filename is None:
1752 if self.filename is not None: filename = self.filename
1753 else: raise ValueError(MISSING_FILENAME_TEXT)
1754
1755 f = open(filename)
1756 try:
1757 self._really_load(f, filename, ignore_discard, ignore_expires)
1758 finally:
1759 f.close()
1760
1761 def revert(self, filename=None,
1762 ignore_discard=False, ignore_expires=False):
1763 """Clear all cookies and reload cookies from a saved file.
1764
1765 Raises LoadError (or IOError) if reversion is not successful; the
1766 object's state will not be altered if this happens.
1767
1768 """
1769 if filename is None:
1770 if self.filename is not None: filename = self.filename
1771 else: raise ValueError(MISSING_FILENAME_TEXT)
1772
1773 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001774 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001775
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001776 old_state = copy.deepcopy(self._cookies)
1777 self._cookies = {}
1778 try:
1779 self.load(filename, ignore_discard, ignore_expires)
1780 except (LoadError, IOError):
1781 self._cookies = old_state
1782 raise
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001783
1784 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001785 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001786
1787from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
1788from _MozillaCookieJar import MozillaCookieJar