blob: 2dd7c48728e0980d89a85424e792ceca9a339b42 [file] [log] [blame]
Éric Araujo06efea32012-02-26 01:33:22 +01001r"""HTTP cookie handling for web clients.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00002
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Georg Brandle854e762006-05-08 17:48:01 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Georg Brandle854e762006-05-08 17:48:01 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
Brett Cannon88f801d2008-08-18 00:46:22 +000029 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
30 'MozillaCookieJar']
Georg Brandle854e762006-05-08 17:48:01 +000031
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000032import re, urlparse, copy, time, urllib
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000033try:
34 import threading as _threading
35except ImportError:
36 import dummy_threading as _threading
37import httplib # only for the default HTTP port
38from calendar import timegm
39
Neal Norwitzb678ce52006-05-18 06:51:46 +000040debug = False # set to True to enable debugging via the logging module
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000041logger = None
42
43def _debug(*args):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000044 if not debug:
45 return
Neal Norwitzb678ce52006-05-18 06:51:46 +000046 global logger
Georg Brandlfeb0a3b2006-05-17 14:45:06 +000047 if not logger:
48 import logging
49 logger = logging.getLogger("cookielib")
50 return logger.debug(*args)
51
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000052
53DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
54MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
55 "instance initialised with one)")
56
Georg Brandle854e762006-05-08 17:48:01 +000057def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000058 # There are a few catch-all except: statements in this module, for
Georg Brandle854e762006-05-08 17:48:01 +000059 # catching input that's bad in unexpected ways. Warn if any
60 # exceptions are caught there.
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000061 import warnings, traceback, StringIO
62 f = StringIO.StringIO()
63 traceback.print_exc(None, f)
64 msg = f.getvalue()
65 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000066
67
68# Date/time conversion
69# -----------------------------------------------------------------------------
70
71EPOCH_YEAR = 1970
72def _timegm(tt):
73 year, month, mday, hour, min, sec = tt[:6]
74 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
75 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
76 return timegm(tt)
77 else:
78 return None
79
80DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
81MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
82 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
83MONTHS_LOWER = []
84for month in MONTHS: MONTHS_LOWER.append(month.lower())
85
86def time2isoz(t=None):
87 """Return a string representing time in seconds since epoch, t.
88
89 If the function is called without an argument, it will use the current
90 time.
91
92 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
93 representing Universal Time (UTC, aka GMT). An example of this format is:
94
95 1994-11-24 08:49:37Z
96
97 """
98 if t is None: t = time.time()
99 year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
100 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
101 year, mon, mday, hour, min, sec)
102
103def time2netscape(t=None):
104 """Return a string representing time in seconds since epoch, t.
105
106 If the function is called without an argument, it will use the current
107 time.
108
109 The format of the returned string is like this:
110
111 Wed, DD-Mon-YYYY HH:MM:SS GMT
112
113 """
114 if t is None: t = time.time()
115 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
Senthil Kumaran2d2e6be2016-07-10 08:34:21 -0700116 return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000117 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
118
119
120UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
121
122TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
123def offset_from_tz_string(tz):
124 offset = None
125 if tz in UTC_ZONES:
126 offset = 0
127 else:
128 m = TIMEZONE_RE.search(tz)
129 if m:
130 offset = 3600 * int(m.group(2))
131 if m.group(3):
132 offset = offset + 60 * int(m.group(3))
133 if m.group(1) == '-':
134 offset = -offset
135 return offset
136
137def _str2time(day, mon, yr, hr, min, sec, tz):
138 # translate month name to number
139 # month numbers start with 1 (January)
140 try:
141 mon = MONTHS_LOWER.index(mon.lower())+1
142 except ValueError:
143 # maybe it's already a number
144 try:
145 imon = int(mon)
146 except ValueError:
147 return None
148 if 1 <= imon <= 12:
149 mon = imon
150 else:
151 return None
152
153 # make sure clock elements are defined
154 if hr is None: hr = 0
155 if min is None: min = 0
156 if sec is None: sec = 0
157
158 yr = int(yr)
159 day = int(day)
160 hr = int(hr)
161 min = int(min)
162 sec = int(sec)
163
164 if yr < 1000:
165 # find "obvious" year
166 cur_yr = time.localtime(time.time())[0]
167 m = cur_yr % 100
168 tmp = yr
169 yr = yr + cur_yr - m
170 m = m - tmp
171 if abs(m) > 50:
172 if m > 0: yr = yr + 100
173 else: yr = yr - 100
174
175 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
176 t = _timegm((yr, mon, day, hr, min, sec, tz))
177
178 if t is not None:
179 # adjust time using timezone string, to get absolute time since epoch
180 if tz is None:
181 tz = "UTC"
182 tz = tz.upper()
183 offset = offset_from_tz_string(tz)
184 if offset is None:
185 return None
186 t = t - offset
187
188 return t
189
190STRICT_DATE_RE = re.compile(
191 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
192 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
193WEEKDAY_RE = re.compile(
194 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
195LOOSE_HTTP_DATE_RE = re.compile(
196 r"""^
197 (\d\d?) # day
198 (?:\s+|[-\/])
199 (\w+) # month
200 (?:\s+|[-\/])
201 (\d+) # year
202 (?:
203 (?:\s+|:) # separator before clock
204 (\d\d?):(\d\d) # hour:min
205 (?::(\d\d))? # optional seconds
206 )? # optional clock
207 \s*
208 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
209 \s*
210 (?:\(\w+\))? # ASCII representation of timezone in parens.
211 \s*$""", re.X)
212def http2time(text):
213 """Returns time in seconds since epoch of time represented by a string.
214
215 Return value is an integer.
216
217 None is returned if the format of str is unrecognized, the time is outside
218 the representable range, or the timezone string is not recognized. If the
219 string contains no timezone, UTC is assumed.
220
221 The timezone in the string may be numerical (like "-0800" or "+0100") or a
222 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
223 timezone strings equivalent to UTC (zero offset) are known to the function.
224
225 The function loosely parses the following formats:
226
227 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
228 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
229 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
230 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
231 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
232 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
233
234 The parser ignores leading and trailing whitespace. The time may be
235 absent.
236
237 If the year is given with only 2 digits, the function will select the
238 century that makes the year closest to the current date.
239
240 """
241 # fast exit for strictly conforming string
242 m = STRICT_DATE_RE.search(text)
243 if m:
244 g = m.groups()
245 mon = MONTHS_LOWER.index(g[1].lower()) + 1
246 tt = (int(g[2]), mon, int(g[0]),
247 int(g[3]), int(g[4]), float(g[5]))
248 return _timegm(tt)
249
250 # No, we need some messy parsing...
251
252 # clean up
253 text = text.lstrip()
254 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
255
256 # tz is time zone specifier string
257 day, mon, yr, hr, min, sec, tz = [None]*7
258
259 # loose regexp parse
260 m = LOOSE_HTTP_DATE_RE.search(text)
261 if m is not None:
262 day, mon, yr, hr, min, sec, tz = m.groups()
263 else:
264 return None # bad format
265
266 return _str2time(day, mon, yr, hr, min, sec, tz)
267
268ISO_DATE_RE = re.compile(
269 """^
270 (\d{4}) # year
271 [-\/]?
272 (\d\d?) # numerical month
273 [-\/]?
274 (\d\d?) # day
275 (?:
276 (?:\s+|[-:Tt]) # separator before clock
277 (\d\d?):?(\d\d) # hour:min
278 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
279 )? # optional clock
280 \s*
281 ([-+]?\d\d?:?(:?\d\d)?
282 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
283 \s*$""", re.X)
284def iso2time(text):
285 """
286 As for http2time, but parses the ISO 8601 formats:
287
288 1994-02-03 14:15:29 -0100 -- ISO 8601 format
289 1994-02-03 14:15:29 -- zone is optional
290 1994-02-03 -- only date
291 1994-02-03T14:15:29 -- Use T as separator
292 19940203T141529Z -- ISO 8601 compact format
293 19940203 -- only date
294
295 """
296 # clean up
297 text = text.lstrip()
298
299 # tz is time zone specifier string
300 day, mon, yr, hr, min, sec, tz = [None]*7
301
302 # loose regexp parse
303 m = ISO_DATE_RE.search(text)
304 if m is not None:
305 # XXX there's an extra bit of the timezone I'm ignoring here: is
306 # this the right thing to do?
307 yr, mon, day, hr, min, sec, tz, _ = m.groups()
308 else:
309 return None # bad format
310
311 return _str2time(day, mon, yr, hr, min, sec, tz)
312
313
314# Header parsing
315# -----------------------------------------------------------------------------
316
317def unmatched(match):
318 """Return unmatched part of re.Match object."""
319 start, end = match.span(0)
320 return match.string[:start]+match.string[end:]
321
322HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
323HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
324HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
325HEADER_ESCAPE_RE = re.compile(r"\\(.)")
326def split_header_words(header_values):
327 r"""Parse header values into a list of lists containing key,value pairs.
328
329 The function knows how to deal with ",", ";" and "=" as well as quoted
330 values after "=". A list of space separated tokens are parsed as if they
331 were separated by ";".
332
333 If the header_values passed as argument contains multiple values, then they
334 are treated as if they were a single value separated by comma ",".
335
336 This means that this function is useful for parsing header fields that
337 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
338 the requirement for tokens).
339
340 headers = #header
341 header = (token | parameter) *( [";"] (token | parameter))
342
343 token = 1*<any CHAR except CTLs or separators>
344 separators = "(" | ")" | "<" | ">" | "@"
345 | "," | ";" | ":" | "\" | <">
346 | "/" | "[" | "]" | "?" | "="
347 | "{" | "}" | SP | HT
348
349 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
350 qdtext = <any TEXT except <">>
351 quoted-pair = "\" CHAR
352
353 parameter = attribute "=" value
354 attribute = token
355 value = token | quoted-string
356
357 Each header is represented by a list of key/value pairs. The value for a
358 simple token (not part of a parameter) is None. Syntactically incorrect
359 headers will not necessarily be parsed as you would want.
360
361 This is easier to describe with some examples:
362
363 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
364 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
365 >>> split_header_words(['text/html; charset="iso-8859-1"'])
366 [[('text/html', None), ('charset', 'iso-8859-1')]]
367 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
368 [[('Basic', None), ('realm', '"foobar"')]]
369
370 """
Raymond Hettingerf7153662005-02-07 14:16:21 +0000371 assert not isinstance(header_values, basestring)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000372 result = []
373 for text in header_values:
374 orig_text = text
375 pairs = []
376 while text:
377 m = HEADER_TOKEN_RE.search(text)
378 if m:
379 text = unmatched(m)
380 name = m.group(1)
381 m = HEADER_QUOTED_VALUE_RE.search(text)
382 if m: # quoted value
383 text = unmatched(m)
384 value = m.group(1)
385 value = HEADER_ESCAPE_RE.sub(r"\1", value)
386 else:
387 m = HEADER_VALUE_RE.search(text)
388 if m: # unquoted value
389 text = unmatched(m)
390 value = m.group(1)
391 value = value.rstrip()
392 else:
393 # no value, a lone token
394 value = None
395 pairs.append((name, value))
396 elif text.lstrip().startswith(","):
397 # concatenated headers, as per RFC 2616 section 4.2
398 text = text.lstrip()[1:]
399 if pairs: result.append(pairs)
400 pairs = []
401 else:
402 # skip junk
403 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
404 assert nr_junk_chars > 0, (
405 "split_header_words bug: '%s', '%s', %s" %
406 (orig_text, text, pairs))
407 text = non_junk
408 if pairs: result.append(pairs)
409 return result
410
411HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
412def join_header_words(lists):
413 """Do the inverse (almost) of the conversion done by split_header_words.
414
415 Takes a list of lists of (key, value) pairs and produces a single header
416 value. Attribute values are quoted if needed.
417
418 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
419 'text/plain; charset="iso-8859/1"'
420 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
421 'text/plain, charset="iso-8859/1"'
422
423 """
424 headers = []
425 for pairs in lists:
426 attr = []
427 for k, v in pairs:
428 if v is not None:
429 if not re.search(r"^\w+$", v):
430 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
431 v = '"%s"' % v
432 k = "%s=%s" % (k, v)
433 attr.append(k)
434 if attr: headers.append("; ".join(attr))
435 return ", ".join(headers)
436
Georg Brandla19baf52010-05-22 11:31:16 +0000437def _strip_quotes(text):
Georg Brandl5d0ca2c2010-05-22 11:29:19 +0000438 if text.startswith('"'):
439 text = text[1:]
440 if text.endswith('"'):
441 text = text[:-1]
442 return text
443
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000444def parse_ns_headers(ns_headers):
445 """Ad-hoc parser for Netscape protocol cookie-attributes.
446
447 The old Netscape cookie format for Set-Cookie can for instance contain
448 an unquoted "," in the expires field, so we have to use this ad-hoc
449 parser instead of split_header_words.
450
451 XXX This may not make the best possible effort to parse all the crap
452 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
453 parser is probably better, so could do worse than following that if
454 this ever gives any trouble.
455
456 Currently, this is also used for parsing RFC 2109 cookies.
457
458 """
459 known_attrs = ("expires", "domain", "path", "secure",
460 # RFC 2109 attrs (may turn up in Netscape cookies, too)
Georg Brandl5d0ca2c2010-05-22 11:29:19 +0000461 "version", "port", "max-age")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000462
463 result = []
464 for ns_header in ns_headers:
465 pairs = []
466 version_set = False
Serhiy Storchakaf2496712015-03-13 09:04:34 +0200467
468 # XXX: The following does not strictly adhere to RFCs in that empty
469 # names and values are legal (the former will only appear once and will
470 # be overwritten if multiple occurrences are present). This is
471 # mostly to deal with backwards compatibility.
472 for ii, param in enumerate(ns_header.split(';')):
473 param = param.strip()
474
475 key, sep, val = param.partition('=')
476 key = key.strip()
477
478 if not key:
479 if ii == 0:
480 break
481 else:
482 continue
483
484 # allow for a distinction between present and empty and missing
485 # altogether
486 val = val.strip() if sep else None
487
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000488 if ii != 0:
Serhiy Storchakaf2496712015-03-13 09:04:34 +0200489 lc = key.lower()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000490 if lc in known_attrs:
Serhiy Storchakaf2496712015-03-13 09:04:34 +0200491 key = lc
492
493 if key == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000494 # This is an RFC 2109 cookie.
Serhiy Storchakaf2496712015-03-13 09:04:34 +0200495 if val is not None:
496 val = _strip_quotes(val)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000497 version_set = True
Serhiy Storchakaf2496712015-03-13 09:04:34 +0200498 elif key == "expires":
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000499 # convert expires date to seconds since epoch
Serhiy Storchakaf2496712015-03-13 09:04:34 +0200500 if val is not None:
501 val = http2time(_strip_quotes(val)) # None if invalid
502 pairs.append((key, val))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000503
504 if pairs:
505 if not version_set:
506 pairs.append(("version", "0"))
507 result.append(pairs)
508
509 return result
510
511
512IPV4_RE = re.compile(r"\.\d+$")
513def is_HDN(text):
514 """Return True if text is a host domain name."""
515 # XXX
516 # This may well be wrong. Which RFC is HDN defined in, if any (for
517 # the purposes of RFC 2965)?
518 # For the current implementation, what about IPv6? Remember to look
519 # at other uses of IPV4_RE also, if change this.
520 if IPV4_RE.search(text):
521 return False
522 if text == "":
523 return False
524 if text[0] == "." or text[-1] == ".":
525 return False
526 return True
527
528def domain_match(A, B):
529 """Return True if domain A domain-matches domain B, according to RFC 2965.
530
531 A and B may be host domain names or IP addresses.
532
533 RFC 2965, section 1:
534
535 Host names can be specified either as an IP address or a HDN string.
536 Sometimes we compare one host name with another. (Such comparisons SHALL
537 be case-insensitive.) Host A's name domain-matches host B's if
538
539 * their host name strings string-compare equal; or
540
541 * A is a HDN string and has the form NB, where N is a non-empty
542 name string, B has the form .B', and B' is a HDN string. (So,
543 x.y.com domain-matches .Y.com but not Y.com.)
544
545 Note that domain-match is not a commutative operation: a.b.c.com
546 domain-matches .c.com, but not the reverse.
547
548 """
549 # Note that, if A or B are IP addresses, the only relevant part of the
550 # definition of the domain-match algorithm is the direct string-compare.
551 A = A.lower()
552 B = B.lower()
553 if A == B:
554 return True
555 if not is_HDN(A):
556 return False
557 i = A.rfind(B)
558 if i == -1 or i == 0:
559 # A does not have form NB, or N is the empty string
560 return False
561 if not B.startswith("."):
562 return False
563 if not is_HDN(B[1:]):
564 return False
565 return True
566
567def liberal_is_HDN(text):
568 """Return True if text is a sort-of-like a host domain name.
569
570 For accepting/blocking domains.
571
572 """
573 if IPV4_RE.search(text):
574 return False
575 return True
576
577def user_domain_match(A, B):
578 """For blocking/accepting domains.
579
580 A and B may be host domain names or IP addresses.
581
582 """
583 A = A.lower()
584 B = B.lower()
585 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
586 if A == B:
587 # equal IP addresses
588 return True
589 return False
590 initial_dot = B.startswith(".")
591 if initial_dot and A.endswith(B):
592 return True
593 if not initial_dot and A == B:
594 return True
595 return False
596
597cut_port_re = re.compile(r":\d+$")
598def request_host(request):
599 """Return request-host, as defined by RFC 2965.
600
601 Variation from RFC: returned value is lowercased, for convenient
602 comparison.
603
604 """
605 url = request.get_full_url()
606 host = urlparse.urlparse(url)[1]
607 if host == "":
608 host = request.get_header("Host", "")
609
610 # remove port, if present
611 host = cut_port_re.sub("", host, 1)
612 return host.lower()
613
614def eff_request_host(request):
615 """Return a tuple (request-host, effective request-host name).
616
617 As defined by RFC 2965, except both are lowercased.
618
619 """
620 erhn = req_host = request_host(request)
621 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
622 erhn = req_host + ".local"
623 return req_host, erhn
624
625def request_path(request):
Gregory P. Smith2a91ce82010-07-25 19:11:36 +0000626 """Path component of request-URI, as defined by RFC 2965."""
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000627 url = request.get_full_url()
Gregory P. Smith2a91ce82010-07-25 19:11:36 +0000628 parts = urlparse.urlsplit(url)
629 path = escape_path(parts.path)
630 if not path.startswith("/"):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000631 # fix bad RFC 2396 absoluteURI
Gregory P. Smith2a91ce82010-07-25 19:11:36 +0000632 path = "/" + path
633 return path
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000634
635def request_port(request):
636 host = request.get_host()
637 i = host.find(':')
638 if i >= 0:
639 port = host[i+1:]
640 try:
641 int(port)
642 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000643 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000644 return None
645 else:
646 port = DEFAULT_HTTP_PORT
647 return port
648
649# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
650# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
651HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
652ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
653def uppercase_escaped_char(match):
654 return "%%%s" % match.group(1).upper()
655def escape_path(path):
656 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
657 # There's no knowing what character encoding was used to create URLs
658 # containing %-escapes, but since we have to pick one to escape invalid
659 # path characters, we pick UTF-8, as recommended in the HTML 4.0
660 # specification:
661 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
662 # And here, kind of: draft-fielding-uri-rfc2396bis-03
663 # (And in draft IRI specification: draft-duerst-iri-05)
664 # (And here, for new URI schemes: RFC 2718)
Neal Norwitz2fa0b9d2004-10-17 16:23:52 +0000665 if isinstance(path, unicode):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000666 path = path.encode("utf-8")
667 path = urllib.quote(path, HTTP_PATH_SAFE)
668 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
669 return path
670
671def reach(h):
672 """Return reach of host h, as defined by RFC 2965, section 1.
673
674 The reach R of a host name H is defined as follows:
675
676 * If
677
678 - H is the host domain name of a host; and,
679
680 - H has the form A.B; and
681
682 - A has no embedded (that is, interior) dots; and
683
684 - B has at least one embedded dot, or B is the string "local".
685 then the reach of H is .B.
686
687 * Otherwise, the reach of H is H.
688
689 >>> reach("www.acme.com")
690 '.acme.com'
691 >>> reach("acme.com")
692 'acme.com'
693 >>> reach("acme.local")
694 '.local'
695
696 """
697 i = h.find(".")
698 if i >= 0:
699 #a = h[:i] # this line is only here to show what a is
700 b = h[i+1:]
701 i = b.find(".")
702 if is_HDN(h) and (i >= 0 or b == "local"):
703 return "."+b
704 return h
705
706def is_third_party(request):
707 """
708
709 RFC 2965, section 3.3.6:
710
711 An unverifiable transaction is to a third-party host if its request-
712 host U does not domain-match the reach R of the request-host O in the
713 origin transaction.
714
715 """
716 req_host = request_host(request)
717 if not domain_match(req_host, reach(request.get_origin_req_host())):
718 return True
719 else:
720 return False
721
722
723class Cookie:
724 """HTTP Cookie.
725
726 This class represents both Netscape and RFC 2965 cookies.
727
728 This is deliberately a very simple class. It just holds attributes. It's
729 possible to construct Cookie instances that don't comply with the cookie
730 standards. CookieJar.make_cookies is the factory function for Cookie
731 objects -- it deals with cookie parsing, supplying defaults, and
732 normalising to the representation used in this class. CookiePolicy is
733 responsible for checking them to see whether they should be accepted from
734 and returned to the server.
735
736 Note that the port may be present in the headers, but unspecified ("Port"
737 rather than"Port=80", for example); if this is the case, port is None.
738
739 """
740
741 def __init__(self, version, name, value,
742 port, port_specified,
743 domain, domain_specified, domain_initial_dot,
744 path, path_specified,
745 secure,
746 expires,
747 discard,
748 comment,
749 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000750 rest,
751 rfc2109=False,
752 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000753
754 if version is not None: version = int(version)
755 if expires is not None: expires = int(expires)
756 if port is None and port_specified is True:
757 raise ValueError("if port is None, port_specified must be false")
758
759 self.version = version
760 self.name = name
761 self.value = value
762 self.port = port
763 self.port_specified = port_specified
764 # normalise case, as per RFC 2965 section 3.3.3
765 self.domain = domain.lower()
766 self.domain_specified = domain_specified
767 # Sigh. We need to know whether the domain given in the
768 # cookie-attribute had an initial dot, in order to follow RFC 2965
769 # (as clarified in draft errata). Needed for the returned $Domain
770 # value.
771 self.domain_initial_dot = domain_initial_dot
772 self.path = path
773 self.path_specified = path_specified
774 self.secure = secure
775 self.expires = expires
776 self.discard = discard
777 self.comment = comment
778 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000779 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000780
781 self._rest = copy.copy(rest)
782
783 def has_nonstandard_attr(self, name):
784 return name in self._rest
785 def get_nonstandard_attr(self, name, default=None):
786 return self._rest.get(name, default)
787 def set_nonstandard_attr(self, name, value):
788 self._rest[name] = value
789
790 def is_expired(self, now=None):
791 if now is None: now = time.time()
792 if (self.expires is not None) and (self.expires <= now):
793 return True
794 return False
795
796 def __str__(self):
797 if self.port is None: p = ""
798 else: p = ":"+self.port
799 limit = self.domain + p + self.path
800 if self.value is not None:
801 namevalue = "%s=%s" % (self.name, self.value)
802 else:
803 namevalue = self.name
804 return "<Cookie %s for %s>" % (namevalue, limit)
805
806 def __repr__(self):
807 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000808 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000809 "port", "port_specified",
810 "domain", "domain_specified", "domain_initial_dot",
811 "path", "path_specified",
812 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000813 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000814 attr = getattr(self, name)
815 args.append("%s=%s" % (name, repr(attr)))
816 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000817 args.append("rfc2109=%s" % repr(self.rfc2109))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000818 return "Cookie(%s)" % ", ".join(args)
819
820
821class CookiePolicy:
822 """Defines which cookies get accepted from and returned to server.
823
824 May also modify cookies, though this is probably a bad idea.
825
826 The subclass DefaultCookiePolicy defines the standard rules for Netscape
827 and RFC 2965 cookies -- override that if you want a customised policy.
828
829 """
830 def set_ok(self, cookie, request):
831 """Return true if (and only if) cookie should be accepted from server.
832
833 Currently, pre-expired cookies never get this far -- the CookieJar
834 class deletes such cookies itself.
835
836 """
837 raise NotImplementedError()
838
839 def return_ok(self, cookie, request):
840 """Return true if (and only if) cookie should be returned to server."""
841 raise NotImplementedError()
842
843 def domain_return_ok(self, domain, request):
844 """Return false if cookies should not be returned, given cookie domain.
845 """
846 return True
847
848 def path_return_ok(self, path, request):
849 """Return false if cookies should not be returned, given cookie path.
850 """
851 return True
852
853
854class DefaultCookiePolicy(CookiePolicy):
855 """Implements the standard rules for accepting and returning cookies."""
856
857 DomainStrictNoDots = 1
858 DomainStrictNonDomain = 2
859 DomainRFC2965Match = 4
860
861 DomainLiberal = 0
862 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
863
864 def __init__(self,
865 blocked_domains=None, allowed_domains=None,
866 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000867 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000868 hide_cookie2=False,
869 strict_domain=False,
870 strict_rfc2965_unverifiable=True,
871 strict_ns_unverifiable=False,
872 strict_ns_domain=DomainLiberal,
873 strict_ns_set_initial_dollar=False,
874 strict_ns_set_path=False,
875 ):
876 """Constructor arguments should be passed as keyword arguments only."""
877 self.netscape = netscape
878 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000879 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000880 self.hide_cookie2 = hide_cookie2
881 self.strict_domain = strict_domain
882 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
883 self.strict_ns_unverifiable = strict_ns_unverifiable
884 self.strict_ns_domain = strict_ns_domain
885 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
886 self.strict_ns_set_path = strict_ns_set_path
887
888 if blocked_domains is not None:
889 self._blocked_domains = tuple(blocked_domains)
890 else:
891 self._blocked_domains = ()
892
893 if allowed_domains is not None:
894 allowed_domains = tuple(allowed_domains)
895 self._allowed_domains = allowed_domains
896
897 def blocked_domains(self):
898 """Return the sequence of blocked domains (as a tuple)."""
899 return self._blocked_domains
900 def set_blocked_domains(self, blocked_domains):
901 """Set the sequence of blocked domains."""
902 self._blocked_domains = tuple(blocked_domains)
903
904 def is_blocked(self, domain):
905 for blocked_domain in self._blocked_domains:
906 if user_domain_match(domain, blocked_domain):
907 return True
908 return False
909
910 def allowed_domains(self):
911 """Return None, or the sequence of allowed domains (as a tuple)."""
912 return self._allowed_domains
913 def set_allowed_domains(self, allowed_domains):
914 """Set the sequence of allowed domains, or None."""
915 if allowed_domains is not None:
916 allowed_domains = tuple(allowed_domains)
917 self._allowed_domains = allowed_domains
918
919 def is_not_allowed(self, domain):
920 if self._allowed_domains is None:
921 return False
922 for allowed_domain in self._allowed_domains:
923 if user_domain_match(domain, allowed_domain):
924 return False
925 return True
926
927 def set_ok(self, cookie, request):
928 """
929 If you override .set_ok(), be sure to call this method. If it returns
930 false, so should your subclass (assuming your subclass wants to be more
931 strict about which cookies to accept).
932
933 """
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000934 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000935
936 assert cookie.name is not None
937
938 for n in "version", "verifiability", "name", "path", "domain", "port":
939 fn_name = "set_ok_"+n
940 fn = getattr(self, fn_name)
941 if not fn(cookie, request):
942 return False
943
944 return True
945
946 def set_ok_version(self, cookie, request):
947 if cookie.version is None:
948 # Version is always set to 0 by parse_ns_headers if it's a Netscape
949 # cookie, so this must be an invalid RFC 2965 cookie.
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000950 _debug(" Set-Cookie2 without version attribute (%s=%s)",
951 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000952 return False
953 if cookie.version > 0 and not self.rfc2965:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000954 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000955 return False
956 elif cookie.version == 0 and not self.netscape:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000957 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000958 return False
959 return True
960
961 def set_ok_verifiability(self, cookie, request):
962 if request.is_unverifiable() and is_third_party(request):
963 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000964 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000965 "unverifiable transaction")
966 return False
967 elif cookie.version == 0 and self.strict_ns_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000968 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000969 "unverifiable transaction")
970 return False
971 return True
972
973 def set_ok_name(self, cookie, request):
974 # Try and stop servers setting V0 cookies designed to hack other
975 # servers that know both V0 and V1 protocols.
976 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
977 cookie.name.startswith("$")):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000978 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000979 return False
980 return True
981
982 def set_ok_path(self, cookie, request):
983 if cookie.path_specified:
984 req_path = request_path(request)
985 if ((cookie.version > 0 or
986 (cookie.version == 0 and self.strict_ns_set_path)) and
987 not req_path.startswith(cookie.path)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000988 _debug(" path attribute %s is not a prefix of request "
989 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000990 return False
991 return True
992
993 def set_ok_domain(self, cookie, request):
994 if self.is_blocked(cookie.domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000995 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000996 return False
997 if self.is_not_allowed(cookie.domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +0000998 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000999 return False
1000 if cookie.domain_specified:
1001 req_host, erhn = eff_request_host(request)
1002 domain = cookie.domain
1003 if self.strict_domain and (domain.count(".") >= 2):
Georg Brandle58334a2006-05-07 20:44:34 +00001004 # XXX This should probably be compared with the Konqueror
1005 # (kcookiejar.cpp) and Mozilla implementations, but it's a
1006 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001007 i = domain.rfind(".")
1008 j = domain.rfind(".", 0, i)
1009 if j == 0: # domain like .foo.bar
1010 tld = domain[i+1:]
1011 sld = domain[j+1:i]
Georg Brandle58334a2006-05-07 20:44:34 +00001012 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1013 "gov", "mil", "int", "aero", "biz", "cat", "coop",
1014 "info", "jobs", "mobi", "museum", "name", "pro",
1015 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001016 # domain like .co.uk
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001017 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001018 return False
1019 if domain.startswith("."):
1020 undotted_domain = domain[1:]
1021 else:
1022 undotted_domain = domain
1023 embedded_dots = (undotted_domain.find(".") >= 0)
1024 if not embedded_dots and domain != ".local":
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001025 _debug(" non-local domain %s contains no embedded dot",
1026 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001027 return False
1028 if cookie.version == 0:
1029 if (not erhn.endswith(domain) and
1030 (not erhn.startswith(".") and
1031 not ("."+erhn).endswith(domain))):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001032 _debug(" effective request-host %s (even with added "
Ezio Melotti1e87da12011-10-19 10:39:35 +03001033 "initial dot) does not end with %s",
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001034 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001035 return False
1036 if (cookie.version > 0 or
1037 (self.strict_ns_domain & self.DomainRFC2965Match)):
1038 if not domain_match(erhn, domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001039 _debug(" effective request-host %s does not domain-match "
1040 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001041 return False
1042 if (cookie.version > 0 or
1043 (self.strict_ns_domain & self.DomainStrictNoDots)):
1044 host_prefix = req_host[:-len(domain)]
1045 if (host_prefix.find(".") >= 0 and
1046 not IPV4_RE.search(req_host)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001047 _debug(" host prefix %s for domain %s contains a dot",
1048 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001049 return False
1050 return True
1051
1052 def set_ok_port(self, cookie, request):
1053 if cookie.port_specified:
1054 req_port = request_port(request)
1055 if req_port is None:
1056 req_port = "80"
1057 else:
1058 req_port = str(req_port)
1059 for p in cookie.port.split(","):
1060 try:
1061 int(p)
1062 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001063 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001064 return False
1065 if p == req_port:
1066 break
1067 else:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001068 _debug(" request port (%s) not found in %s",
1069 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001070 return False
1071 return True
1072
1073 def return_ok(self, cookie, request):
1074 """
1075 If you override .return_ok(), be sure to call this method. If it
1076 returns false, so should your subclass (assuming your subclass wants to
1077 be more strict about which cookies to return).
1078
1079 """
1080 # Path has already been checked by .path_return_ok(), and domain
1081 # blocking done by .domain_return_ok().
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001082 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001083
1084 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1085 fn_name = "return_ok_"+n
1086 fn = getattr(self, fn_name)
1087 if not fn(cookie, request):
1088 return False
1089 return True
1090
1091 def return_ok_version(self, cookie, request):
1092 if cookie.version > 0 and not self.rfc2965:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001093 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001094 return False
1095 elif cookie.version == 0 and not self.netscape:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001096 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001097 return False
1098 return True
1099
1100 def return_ok_verifiability(self, cookie, request):
1101 if request.is_unverifiable() and is_third_party(request):
1102 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001103 _debug(" third-party RFC 2965 cookie during unverifiable "
1104 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001105 return False
1106 elif cookie.version == 0 and self.strict_ns_unverifiable:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001107 _debug(" third-party Netscape cookie during unverifiable "
1108 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001109 return False
1110 return True
1111
1112 def return_ok_secure(self, cookie, request):
1113 if cookie.secure and request.get_type() != "https":
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001114 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001115 return False
1116 return True
1117
1118 def return_ok_expires(self, cookie, request):
1119 if cookie.is_expired(self._now):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001120 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001121 return False
1122 return True
1123
1124 def return_ok_port(self, cookie, request):
1125 if cookie.port:
1126 req_port = request_port(request)
1127 if req_port is None:
1128 req_port = "80"
1129 for p in cookie.port.split(","):
1130 if p == req_port:
1131 break
1132 else:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001133 _debug(" request port %s does not match cookie port %s",
1134 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001135 return False
1136 return True
1137
1138 def return_ok_domain(self, cookie, request):
1139 req_host, erhn = eff_request_host(request)
1140 domain = cookie.domain
1141
1142 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1143 if (cookie.version == 0 and
1144 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1145 not cookie.domain_specified and domain != erhn):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001146 _debug(" cookie with unspecified domain does not string-compare "
1147 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001148 return False
1149
1150 if cookie.version > 0 and not domain_match(erhn, domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001151 _debug(" effective request-host name %s does not domain-match "
1152 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001153 return False
1154 if cookie.version == 0 and not ("."+erhn).endswith(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001155 _debug(" request-host %s does not match Netscape cookie domain "
1156 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001157 return False
1158 return True
1159
1160 def domain_return_ok(self, domain, request):
1161 # Liberal check of. This is here as an optimization to avoid
1162 # having to load lots of MSIE cookie files unless necessary.
1163 req_host, erhn = eff_request_host(request)
1164 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001165 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001166 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001167 erhn = "."+erhn
1168 if not (req_host.endswith(domain) or erhn.endswith(domain)):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001169 #_debug(" request domain %s does not match cookie domain %s",
1170 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001171 return False
1172
1173 if self.is_blocked(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001174 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001175 return False
1176 if self.is_not_allowed(domain):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001177 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001178 return False
1179
1180 return True
1181
1182 def path_return_ok(self, path, request):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001183 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001184 req_path = request_path(request)
1185 if not req_path.startswith(path):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001186 _debug(" %s does not path-match %s", req_path, path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001187 return False
1188 return True
1189
1190
1191def vals_sorted_by_key(adict):
1192 keys = adict.keys()
1193 keys.sort()
1194 return map(adict.get, keys)
1195
1196def deepvalues(mapping):
1197 """Iterates over nested mapping, depth-first, in sorted order by key."""
1198 values = vals_sorted_by_key(mapping)
1199 for obj in values:
1200 mapping = False
1201 try:
1202 obj.items
1203 except AttributeError:
1204 pass
1205 else:
1206 mapping = True
1207 for subobj in deepvalues(obj):
1208 yield subobj
1209 if not mapping:
1210 yield obj
1211
1212
1213# Used as second parameter to dict.get() method, to distinguish absent
1214# dict key from one with a None value.
1215class Absent: pass
1216
1217class CookieJar:
1218 """Collection of HTTP cookies.
1219
1220 You may not need to know about this class: try
1221 urllib2.build_opener(HTTPCookieProcessor).open(url).
1222
1223 """
1224
1225 non_word_re = re.compile(r"\W")
1226 quote_re = re.compile(r"([\"\\])")
1227 strict_domain_re = re.compile(r"\.?[^.]*")
1228 domain_re = re.compile(r"[^.]*")
1229 dots_re = re.compile(r"^\.+")
1230
1231 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1232
1233 def __init__(self, policy=None):
1234 if policy is None:
1235 policy = DefaultCookiePolicy()
1236 self._policy = policy
1237
1238 self._cookies_lock = _threading.RLock()
1239 self._cookies = {}
1240
1241 def set_policy(self, policy):
1242 self._policy = policy
1243
1244 def _cookies_for_domain(self, domain, request):
1245 cookies = []
1246 if not self._policy.domain_return_ok(domain, request):
1247 return []
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001248 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001249 cookies_by_path = self._cookies[domain]
1250 for path in cookies_by_path.keys():
1251 if not self._policy.path_return_ok(path, request):
1252 continue
1253 cookies_by_name = cookies_by_path[path]
1254 for cookie in cookies_by_name.values():
1255 if not self._policy.return_ok(cookie, request):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001256 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001257 continue
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001258 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001259 cookies.append(cookie)
1260 return cookies
1261
1262 def _cookies_for_request(self, request):
1263 """Return a list of cookies to be returned to server."""
1264 cookies = []
1265 for domain in self._cookies.keys():
1266 cookies.extend(self._cookies_for_domain(domain, request))
1267 return cookies
1268
1269 def _cookie_attrs(self, cookies):
1270 """Return a list of cookie-attributes to be returned to server.
1271
1272 like ['foo="bar"; $Path="/"', ...]
1273
1274 The $Version attribute is also added when appropriate (currently only
1275 once per request).
1276
1277 """
1278 # add cookies in order of most specific (ie. longest) path first
Brett Cannon52f03c52008-08-03 22:34:25 +00001279 cookies.sort(key=lambda arg: len(arg.path), reverse=True)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001280
1281 version_set = False
1282
1283 attrs = []
1284 for cookie in cookies:
1285 # set version of Cookie header
1286 # XXX
1287 # What should it be if multiple matching Set-Cookie headers have
1288 # different versions themselves?
1289 # Answer: there is no answer; was supposed to be settled by
1290 # RFC 2965 errata, but that may never appear...
1291 version = cookie.version
1292 if not version_set:
1293 version_set = True
1294 if version > 0:
1295 attrs.append("$Version=%s" % version)
1296
1297 # quote cookie value if necessary
1298 # (not for Netscape protocol, which already has any quotes
1299 # intact, due to the poorly-specified Netscape Cookie: syntax)
1300 if ((cookie.value is not None) and
1301 self.non_word_re.search(cookie.value) and version > 0):
1302 value = self.quote_re.sub(r"\\\1", cookie.value)
1303 else:
1304 value = cookie.value
1305
1306 # add cookie-attributes to be returned in Cookie header
1307 if cookie.value is None:
1308 attrs.append(cookie.name)
1309 else:
1310 attrs.append("%s=%s" % (cookie.name, value))
1311 if version > 0:
1312 if cookie.path_specified:
1313 attrs.append('$Path="%s"' % cookie.path)
1314 if cookie.domain.startswith("."):
1315 domain = cookie.domain
1316 if (not cookie.domain_initial_dot and
1317 domain.startswith(".")):
1318 domain = domain[1:]
1319 attrs.append('$Domain="%s"' % domain)
1320 if cookie.port is not None:
1321 p = "$Port"
1322 if cookie.port_specified:
1323 p = p + ('="%s"' % cookie.port)
1324 attrs.append(p)
1325
1326 return attrs
1327
1328 def add_cookie_header(self, request):
1329 """Add correct Cookie: header to request (urllib2.Request object).
1330
1331 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1332
1333 """
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001334 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001335 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001336 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001337
Tim Petersf733abb2007-01-30 03:03:46 +00001338 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001339
Tim Petersf733abb2007-01-30 03:03:46 +00001340 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001341
Tim Petersf733abb2007-01-30 03:03:46 +00001342 attrs = self._cookie_attrs(cookies)
1343 if attrs:
1344 if not request.has_header("Cookie"):
1345 request.add_unredirected_header(
1346 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001347
Tim Petersf733abb2007-01-30 03:03:46 +00001348 # if necessary, advertise that we know RFC 2965
1349 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1350 not request.has_header("Cookie2")):
1351 for cookie in cookies:
1352 if cookie.version != 1:
1353 request.add_unredirected_header("Cookie2", '$Version="1"')
1354 break
1355
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001356 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001357 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001358
1359 self.clear_expired_cookies()
1360
1361 def _normalized_cookie_tuples(self, attrs_set):
1362 """Return list of tuples containing normalised cookie information.
1363
1364 attrs_set is the list of lists of key,value pairs extracted from
1365 the Set-Cookie or Set-Cookie2 headers.
1366
1367 Tuples are name, value, standard, rest, where name and value are the
1368 cookie name and value, standard is a dictionary containing the standard
1369 cookie-attributes (discard, secure, version, expires or max-age,
1370 domain, path and port) and rest is a dictionary containing the rest of
1371 the cookie-attributes.
1372
1373 """
1374 cookie_tuples = []
1375
1376 boolean_attrs = "discard", "secure"
1377 value_attrs = ("version",
1378 "expires", "max-age",
1379 "domain", "path", "port",
1380 "comment", "commenturl")
1381
1382 for cookie_attrs in attrs_set:
1383 name, value = cookie_attrs[0]
1384
1385 # Build dictionary of standard cookie-attributes (standard) and
1386 # dictionary of other cookie-attributes (rest).
1387
1388 # Note: expiry time is normalised to seconds since epoch. V0
1389 # cookies should have the Expires cookie-attribute, and V1 cookies
1390 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1391 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1392 # accept either (but prefer Max-Age).
1393 max_age_set = False
1394
1395 bad_cookie = False
1396
1397 standard = {}
1398 rest = {}
1399 for k, v in cookie_attrs[1:]:
1400 lc = k.lower()
1401 # don't lose case distinction for unknown fields
1402 if lc in value_attrs or lc in boolean_attrs:
1403 k = lc
1404 if k in boolean_attrs and v is None:
1405 # boolean cookie-attribute is present, but has no value
1406 # (like "discard", rather than "port=80")
1407 v = True
1408 if k in standard:
1409 # only first value is significant
1410 continue
1411 if k == "domain":
1412 if v is None:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001413 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001414 bad_cookie = True
1415 break
1416 # RFC 2965 section 3.3.3
1417 v = v.lower()
1418 if k == "expires":
1419 if max_age_set:
1420 # Prefer max-age to expires (like Mozilla)
1421 continue
1422 if v is None:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001423 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001424 "attribute: treating as session cookie")
1425 continue
1426 if k == "max-age":
1427 max_age_set = True
1428 try:
1429 v = int(v)
1430 except ValueError:
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001431 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001432 "max-age attribute")
1433 bad_cookie = True
1434 break
1435 # convert RFC 2965 Max-Age to seconds since epoch
1436 # XXX Strictly you're supposed to follow RFC 2616
Serhiy Storchakac72e66a2015-11-02 15:06:09 +02001437 # age-calculation rules. Remember that zero Max-Age
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001438 # is a request to discard (old and new) cookie, though.
1439 k = "expires"
1440 v = self._now + v
1441 if (k in value_attrs) or (k in boolean_attrs):
1442 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001443 k not in ("port", "comment", "commenturl")):
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001444 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001445 bad_cookie = True
1446 break
1447 standard[k] = v
1448 else:
1449 rest[k] = v
1450
1451 if bad_cookie:
1452 continue
1453
1454 cookie_tuples.append((name, value, standard, rest))
1455
1456 return cookie_tuples
1457
1458 def _cookie_from_cookie_tuple(self, tup, request):
1459 # standard is dict of standard cookie-attributes, rest is dict of the
1460 # rest of them
1461 name, value, standard, rest = tup
1462
1463 domain = standard.get("domain", Absent)
1464 path = standard.get("path", Absent)
1465 port = standard.get("port", Absent)
1466 expires = standard.get("expires", Absent)
1467
1468 # set the easy defaults
1469 version = standard.get("version", None)
Georg Brandl5d0ca2c2010-05-22 11:29:19 +00001470 if version is not None:
1471 try:
1472 version = int(version)
1473 except ValueError:
1474 return None # invalid version, ignore cookie
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001475 secure = standard.get("secure", False)
1476 # (discard is also set if expires is Absent)
1477 discard = standard.get("discard", False)
1478 comment = standard.get("comment", None)
1479 comment_url = standard.get("commenturl", None)
1480
1481 # set default path
1482 if path is not Absent and path != "":
1483 path_specified = True
1484 path = escape_path(path)
1485 else:
1486 path_specified = False
1487 path = request_path(request)
1488 i = path.rfind("/")
1489 if i != -1:
1490 if version == 0:
1491 # Netscape spec parts company from reality here
1492 path = path[:i]
1493 else:
1494 path = path[:i+1]
1495 if len(path) == 0: path = "/"
1496
1497 # set default domain
1498 domain_specified = domain is not Absent
1499 # but first we have to remember whether it starts with a dot
1500 domain_initial_dot = False
1501 if domain_specified:
1502 domain_initial_dot = bool(domain.startswith("."))
1503 if domain is Absent:
1504 req_host, erhn = eff_request_host(request)
1505 domain = erhn
1506 elif not domain.startswith("."):
1507 domain = "."+domain
1508
1509 # set default port
1510 port_specified = False
1511 if port is not Absent:
1512 if port is None:
1513 # Port attr present, but has no value: default to request port.
1514 # Cookie should then only be sent back on that port.
1515 port = request_port(request)
1516 else:
1517 port_specified = True
1518 port = re.sub(r"\s+", "", port)
1519 else:
1520 # No port attr present. Cookie can be sent back on any port.
1521 port = None
1522
1523 # set default expires and discard
1524 if expires is Absent:
1525 expires = None
1526 discard = True
1527 elif expires <= self._now:
1528 # Expiry date in past is request to delete cookie. This can't be
1529 # in DefaultCookiePolicy, because can't delete cookies there.
1530 try:
1531 self.clear(domain, path, name)
1532 except KeyError:
1533 pass
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001534 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1535 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001536 return None
1537
1538 return Cookie(version,
1539 name, value,
1540 port, port_specified,
1541 domain, domain_specified, domain_initial_dot,
1542 path, path_specified,
1543 secure,
1544 expires,
1545 discard,
1546 comment,
1547 comment_url,
1548 rest)
1549
1550 def _cookies_from_attrs_set(self, attrs_set, request):
1551 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1552
1553 cookies = []
1554 for tup in cookie_tuples:
1555 cookie = self._cookie_from_cookie_tuple(tup, request)
1556 if cookie: cookies.append(cookie)
1557 return cookies
1558
Neal Norwitz71dad722005-12-23 21:43:48 +00001559 def _process_rfc2109_cookies(self, cookies):
1560 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1561 if rfc2109_as_ns is None:
1562 rfc2109_as_ns = not self._policy.rfc2965
1563 for cookie in cookies:
1564 if cookie.version == 1:
1565 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001566 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001567 # treat 2109 cookies as Netscape cookies rather than
1568 # as RFC2965 cookies
1569 cookie.version = 0
1570
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001571 def make_cookies(self, response, request):
1572 """Return sequence of Cookie objects extracted from response object."""
1573 # get cookie-attributes for RFC 2965 and Netscape protocols
1574 headers = response.info()
1575 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1576 ns_hdrs = headers.getheaders("Set-Cookie")
1577
1578 rfc2965 = self._policy.rfc2965
1579 netscape = self._policy.netscape
1580
1581 if ((not rfc2965_hdrs and not ns_hdrs) or
1582 (not ns_hdrs and not rfc2965) or
1583 (not rfc2965_hdrs and not netscape) or
1584 (not netscape and not rfc2965)):
1585 return [] # no relevant cookie headers: quick exit
1586
1587 try:
1588 cookies = self._cookies_from_attrs_set(
1589 split_header_words(rfc2965_hdrs), request)
Georg Brandle854e762006-05-08 17:48:01 +00001590 except Exception:
1591 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001592 cookies = []
1593
1594 if ns_hdrs and netscape:
1595 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001596 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001597 ns_cookies = self._cookies_from_attrs_set(
1598 parse_ns_headers(ns_hdrs), request)
Georg Brandle854e762006-05-08 17:48:01 +00001599 except Exception:
1600 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001601 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001602 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001603
1604 # Look for Netscape cookies (from Set-Cookie headers) that match
1605 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1606 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1607 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1608 # bundled in with the Netscape cookies for this purpose, which is
1609 # reasonable behaviour.
1610 if rfc2965:
1611 lookup = {}
1612 for cookie in cookies:
1613 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1614
1615 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1616 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1617 return key not in lookup
1618 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1619
1620 if ns_cookies:
1621 cookies.extend(ns_cookies)
1622
1623 return cookies
1624
1625 def set_cookie_if_ok(self, cookie, request):
1626 """Set a cookie if policy says it's OK to do so."""
1627 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001628 try:
1629 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001630
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001631 if self._policy.set_ok(cookie, request):
1632 self.set_cookie(cookie)
Tim Petersf733abb2007-01-30 03:03:46 +00001633
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001634
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001635 finally:
1636 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001637
1638 def set_cookie(self, cookie):
1639 """Set a cookie, without checking whether or not it should be set."""
1640 c = self._cookies
1641 self._cookies_lock.acquire()
1642 try:
1643 if cookie.domain not in c: c[cookie.domain] = {}
1644 c2 = c[cookie.domain]
1645 if cookie.path not in c2: c2[cookie.path] = {}
1646 c3 = c2[cookie.path]
1647 c3[cookie.name] = cookie
1648 finally:
1649 self._cookies_lock.release()
1650
1651 def extract_cookies(self, response, request):
1652 """Extract cookies from response, where allowable given the request."""
Georg Brandlfeb0a3b2006-05-17 14:45:06 +00001653 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001654 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001655 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001656 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001657
Tim Petersf733abb2007-01-30 03:03:46 +00001658 for cookie in self.make_cookies(response, request):
1659 if self._policy.set_ok(cookie, request):
1660 _debug(" setting cookie: %s", cookie)
1661 self.set_cookie(cookie)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001662 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001663 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001664
1665 def clear(self, domain=None, path=None, name=None):
1666 """Clear some cookies.
1667
1668 Invoking this method without arguments will clear all cookies. If
1669 given a single argument, only cookies belonging to that domain will be
1670 removed. If given two arguments, cookies belonging to the specified
1671 path within that domain are removed. If given three arguments, then
1672 the cookie with the specified name, path and domain is removed.
1673
1674 Raises KeyError if no matching cookie exists.
1675
1676 """
1677 if name is not None:
1678 if (domain is None) or (path is None):
1679 raise ValueError(
1680 "domain and path must be given to remove a cookie by name")
1681 del self._cookies[domain][path][name]
1682 elif path is not None:
1683 if domain is None:
1684 raise ValueError(
1685 "domain must be given to remove cookies by path")
1686 del self._cookies[domain][path]
1687 elif domain is not None:
1688 del self._cookies[domain]
1689 else:
1690 self._cookies = {}
1691
1692 def clear_session_cookies(self):
1693 """Discard all session cookies.
1694
1695 Note that the .save() method won't save session cookies anyway, unless
1696 you ask otherwise by passing a true ignore_discard argument.
1697
1698 """
1699 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001700 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001701 for cookie in self:
1702 if cookie.discard:
1703 self.clear(cookie.domain, cookie.path, cookie.name)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001704 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001705 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001706
1707 def clear_expired_cookies(self):
1708 """Discard all expired cookies.
1709
1710 You probably don't need to call this method: expired cookies are never
1711 sent back to the server (provided you're using DefaultCookiePolicy),
1712 this method is called by CookieJar itself every so often, and the
1713 .save() method won't save expired cookies anyway (unless you ask
1714 otherwise by passing a true ignore_expires argument).
1715
1716 """
1717 self._cookies_lock.acquire()
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001718 try:
Tim Petersf733abb2007-01-30 03:03:46 +00001719 now = time.time()
1720 for cookie in self:
1721 if cookie.is_expired(now):
1722 self.clear(cookie.domain, cookie.path, cookie.name)
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001723 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001724 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001725
1726 def __iter__(self):
1727 return deepvalues(self._cookies)
1728
1729 def __len__(self):
1730 """Return number of contained cookies."""
1731 i = 0
1732 for cookie in self: i = i + 1
1733 return i
1734
1735 def __repr__(self):
1736 r = []
1737 for cookie in self: r.append(repr(cookie))
Serhiy Storchakafc1ae6c2014-07-22 11:11:01 +03001738 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001739
1740 def __str__(self):
1741 r = []
1742 for cookie in self: r.append(str(cookie))
Serhiy Storchakafc1ae6c2014-07-22 11:11:01 +03001743 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001744
1745
Neal Norwitz3e7de592005-12-23 21:24:35 +00001746# derives from IOError for backwards-compatibility with Python 2.4.0
1747class LoadError(IOError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001748
1749class FileCookieJar(CookieJar):
1750 """CookieJar that can be loaded from and saved to a file."""
1751
1752 def __init__(self, filename=None, delayload=False, policy=None):
1753 """
1754 Cookies are NOT loaded from the named file until either the .load() or
1755 .revert() method is called.
1756
1757 """
1758 CookieJar.__init__(self, policy)
1759 if filename is not None:
1760 try:
1761 filename+""
1762 except:
1763 raise ValueError("filename must be string-like")
1764 self.filename = filename
1765 self.delayload = bool(delayload)
1766
1767 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1768 """Save cookies to a file."""
1769 raise NotImplementedError()
1770
1771 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1772 """Load cookies from a file."""
1773 if filename is None:
1774 if self.filename is not None: filename = self.filename
1775 else: raise ValueError(MISSING_FILENAME_TEXT)
1776
1777 f = open(filename)
1778 try:
1779 self._really_load(f, filename, ignore_discard, ignore_expires)
1780 finally:
1781 f.close()
1782
1783 def revert(self, filename=None,
1784 ignore_discard=False, ignore_expires=False):
1785 """Clear all cookies and reload cookies from a saved file.
1786
1787 Raises LoadError (or IOError) if reversion is not successful; the
1788 object's state will not be altered if this happens.
1789
1790 """
1791 if filename is None:
1792 if self.filename is not None: filename = self.filename
1793 else: raise ValueError(MISSING_FILENAME_TEXT)
1794
1795 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001796 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001797
Tim Petersf733abb2007-01-30 03:03:46 +00001798 old_state = copy.deepcopy(self._cookies)
1799 self._cookies = {}
1800 try:
1801 self.load(filename, ignore_discard, ignore_expires)
1802 except (LoadError, IOError):
1803 self._cookies = old_state
1804 raise
Andrew M. Kuchling29ff4612006-12-19 15:43:10 +00001805
1806 finally:
Tim Petersf733abb2007-01-30 03:03:46 +00001807 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001808
1809from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
1810from _MozillaCookieJar import MozillaCookieJar