blob: db82382357181332129cd432a551e4fda403d797 [file] [log] [blame]
Éric Araujo23760e92011-11-07 17:52:48 +01001r"""HTTP cookie handling for web clients.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00002
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Thomas Wouters477c8d52006-05-27 19:21:47 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Thomas Wouters477c8d52006-05-27 19:21:47 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
30
Stéphane Wirtel4b219ce2019-03-01 21:40:54 +010031import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032import copy
Victor Stinner628225c2011-03-21 02:38:51 +010033import datetime
Jeremy Hylton1afc1692008-06-18 20:49:58 +000034import re
35import time
36import urllib.parse, urllib.request
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020037import threading as _threading
Georg Brandl24420152008-05-26 16:32:26 +000038import http.client # only for the default HTTP port
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000039from calendar import timegm
40
Thomas Wouters477c8d52006-05-27 19:21:47 +000041debug = False # set to True to enable debugging via the logging module
42logger = None
43
44def _debug(*args):
45 if not debug:
46 return
47 global logger
48 if not logger:
49 import logging
Georg Brandl24420152008-05-26 16:32:26 +000050 logger = logging.getLogger("http.cookiejar")
Thomas Wouters477c8d52006-05-27 19:21:47 +000051 return logger.debug(*args)
52
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000053
Georg Brandl24420152008-05-26 16:32:26 +000054DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000055MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
56 "instance initialised with one)")
57
Thomas Wouters477c8d52006-05-27 19:21:47 +000058def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000059 # There are a few catch-all except: statements in this module, for
Thomas Wouters477c8d52006-05-27 19:21:47 +000060 # catching input that's bad in unexpected ways. Warn if any
61 # exceptions are caught there.
Jeremy Hylton7ecf3dc2008-05-10 20:38:40 +000062 import io, warnings, traceback
Guido van Rossum34d19282007-08-09 01:03:29 +000063 f = io.StringIO()
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000064 traceback.print_exc(None, f)
65 msg = f.getvalue()
Georg Brandl24420152008-05-26 16:32:26 +000066 warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000067
68
69# Date/time conversion
70# -----------------------------------------------------------------------------
71
72EPOCH_YEAR = 1970
73def _timegm(tt):
74 year, month, mday, hour, min, sec = tt[:6]
75 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
76 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
77 return timegm(tt)
78 else:
79 return None
80
81DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
82MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
83 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
84MONTHS_LOWER = []
85for month in MONTHS: MONTHS_LOWER.append(month.lower())
86
87def time2isoz(t=None):
88 """Return a string representing time in seconds since epoch, t.
89
90 If the function is called without an argument, it will use the current
91 time.
92
93 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
94 representing Universal Time (UTC, aka GMT). An example of this format is:
95
96 1994-11-24 08:49:37Z
97
98 """
Victor Stinner628225c2011-03-21 02:38:51 +010099 if t is None:
100 dt = datetime.datetime.utcnow()
101 else:
102 dt = datetime.datetime.utcfromtimestamp(t)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000103 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
Victor Stinner628225c2011-03-21 02:38:51 +0100104 dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000105
106def time2netscape(t=None):
107 """Return a string representing time in seconds since epoch, t.
108
109 If the function is called without an argument, it will use the current
110 time.
111
112 The format of the returned string is like this:
113
114 Wed, DD-Mon-YYYY HH:MM:SS GMT
115
116 """
Victor Stinner628225c2011-03-21 02:38:51 +0100117 if t is None:
118 dt = datetime.datetime.utcnow()
119 else:
120 dt = datetime.datetime.utcfromtimestamp(t)
Senthil Kumarand5b47fb2016-07-10 06:45:38 -0700121 return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (
Victor Stinner628225c2011-03-21 02:38:51 +0100122 DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
123 dt.year, dt.hour, dt.minute, dt.second)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000124
125
126UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
127
Antoine Pitroufd036452008-08-19 17:56:33 +0000128TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000129def offset_from_tz_string(tz):
130 offset = None
131 if tz in UTC_ZONES:
132 offset = 0
133 else:
134 m = TIMEZONE_RE.search(tz)
135 if m:
136 offset = 3600 * int(m.group(2))
137 if m.group(3):
138 offset = offset + 60 * int(m.group(3))
139 if m.group(1) == '-':
140 offset = -offset
141 return offset
142
143def _str2time(day, mon, yr, hr, min, sec, tz):
Berker Peksag20be53e2016-03-14 05:48:02 +0200144 yr = int(yr)
145 if yr > datetime.MAXYEAR:
146 return None
147
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000148 # translate month name to number
149 # month numbers start with 1 (January)
150 try:
151 mon = MONTHS_LOWER.index(mon.lower())+1
152 except ValueError:
153 # maybe it's already a number
154 try:
155 imon = int(mon)
156 except ValueError:
157 return None
158 if 1 <= imon <= 12:
159 mon = imon
160 else:
161 return None
162
163 # make sure clock elements are defined
164 if hr is None: hr = 0
165 if min is None: min = 0
166 if sec is None: sec = 0
167
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000168 day = int(day)
169 hr = int(hr)
170 min = int(min)
171 sec = int(sec)
172
173 if yr < 1000:
174 # find "obvious" year
175 cur_yr = time.localtime(time.time())[0]
176 m = cur_yr % 100
177 tmp = yr
178 yr = yr + cur_yr - m
179 m = m - tmp
180 if abs(m) > 50:
181 if m > 0: yr = yr + 100
182 else: yr = yr - 100
183
184 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
185 t = _timegm((yr, mon, day, hr, min, sec, tz))
186
187 if t is not None:
188 # adjust time using timezone string, to get absolute time since epoch
189 if tz is None:
190 tz = "UTC"
191 tz = tz.upper()
192 offset = offset_from_tz_string(tz)
193 if offset is None:
194 return None
195 t = t - offset
196
197 return t
198
199STRICT_DATE_RE = re.compile(
200 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
R David Murray44b548d2016-09-08 13:59:53 -0400201 r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000202WEEKDAY_RE = re.compile(
Antoine Pitroufd036452008-08-19 17:56:33 +0000203 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000204LOOSE_HTTP_DATE_RE = re.compile(
205 r"""^
206 (\d\d?) # day
207 (?:\s+|[-\/])
208 (\w+) # month
209 (?:\s+|[-\/])
210 (\d+) # year
211 (?:
212 (?:\s+|:) # separator before clock
213 (\d\d?):(\d\d) # hour:min
214 (?::(\d\d))? # optional seconds
215 )? # optional clock
216 \s*
217 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
218 \s*
219 (?:\(\w+\))? # ASCII representation of timezone in parens.
Antoine Pitroufd036452008-08-19 17:56:33 +0000220 \s*$""", re.X | re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000221def http2time(text):
222 """Returns time in seconds since epoch of time represented by a string.
223
224 Return value is an integer.
225
226 None is returned if the format of str is unrecognized, the time is outside
227 the representable range, or the timezone string is not recognized. If the
228 string contains no timezone, UTC is assumed.
229
230 The timezone in the string may be numerical (like "-0800" or "+0100") or a
231 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
232 timezone strings equivalent to UTC (zero offset) are known to the function.
233
234 The function loosely parses the following formats:
235
236 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
237 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
238 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
239 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
240 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
241 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
242
243 The parser ignores leading and trailing whitespace. The time may be
244 absent.
245
246 If the year is given with only 2 digits, the function will select the
247 century that makes the year closest to the current date.
248
249 """
250 # fast exit for strictly conforming string
251 m = STRICT_DATE_RE.search(text)
252 if m:
253 g = m.groups()
254 mon = MONTHS_LOWER.index(g[1].lower()) + 1
255 tt = (int(g[2]), mon, int(g[0]),
256 int(g[3]), int(g[4]), float(g[5]))
257 return _timegm(tt)
258
259 # No, we need some messy parsing...
260
261 # clean up
262 text = text.lstrip()
263 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
264
265 # tz is time zone specifier string
266 day, mon, yr, hr, min, sec, tz = [None]*7
267
268 # loose regexp parse
269 m = LOOSE_HTTP_DATE_RE.search(text)
270 if m is not None:
271 day, mon, yr, hr, min, sec, tz = m.groups()
272 else:
273 return None # bad format
274
275 return _str2time(day, mon, yr, hr, min, sec, tz)
276
277ISO_DATE_RE = re.compile(
R David Murray44b548d2016-09-08 13:59:53 -0400278 r"""^
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000279 (\d{4}) # year
280 [-\/]?
281 (\d\d?) # numerical month
282 [-\/]?
283 (\d\d?) # day
284 (?:
285 (?:\s+|[-:Tt]) # separator before clock
286 (\d\d?):?(\d\d) # hour:min
287 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
288 )? # optional clock
289 \s*
290 ([-+]?\d\d?:?(:?\d\d)?
291 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
Antoine Pitroufd036452008-08-19 17:56:33 +0000292 \s*$""", re.X | re. ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000293def iso2time(text):
294 """
295 As for http2time, but parses the ISO 8601 formats:
296
297 1994-02-03 14:15:29 -0100 -- ISO 8601 format
298 1994-02-03 14:15:29 -- zone is optional
299 1994-02-03 -- only date
300 1994-02-03T14:15:29 -- Use T as separator
301 19940203T141529Z -- ISO 8601 compact format
302 19940203 -- only date
303
304 """
305 # clean up
306 text = text.lstrip()
307
308 # tz is time zone specifier string
309 day, mon, yr, hr, min, sec, tz = [None]*7
310
311 # loose regexp parse
312 m = ISO_DATE_RE.search(text)
313 if m is not None:
314 # XXX there's an extra bit of the timezone I'm ignoring here: is
315 # this the right thing to do?
316 yr, mon, day, hr, min, sec, tz, _ = m.groups()
317 else:
318 return None # bad format
319
320 return _str2time(day, mon, yr, hr, min, sec, tz)
321
322
323# Header parsing
324# -----------------------------------------------------------------------------
325
326def unmatched(match):
327 """Return unmatched part of re.Match object."""
328 start, end = match.span(0)
329 return match.string[:start]+match.string[end:]
330
331HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
332HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
333HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
334HEADER_ESCAPE_RE = re.compile(r"\\(.)")
335def split_header_words(header_values):
336 r"""Parse header values into a list of lists containing key,value pairs.
337
338 The function knows how to deal with ",", ";" and "=" as well as quoted
339 values after "=". A list of space separated tokens are parsed as if they
340 were separated by ";".
341
342 If the header_values passed as argument contains multiple values, then they
343 are treated as if they were a single value separated by comma ",".
344
345 This means that this function is useful for parsing header fields that
346 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
347 the requirement for tokens).
348
349 headers = #header
350 header = (token | parameter) *( [";"] (token | parameter))
351
352 token = 1*<any CHAR except CTLs or separators>
353 separators = "(" | ")" | "<" | ">" | "@"
354 | "," | ";" | ":" | "\" | <">
355 | "/" | "[" | "]" | "?" | "="
356 | "{" | "}" | SP | HT
357
358 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
359 qdtext = <any TEXT except <">>
360 quoted-pair = "\" CHAR
361
362 parameter = attribute "=" value
363 attribute = token
364 value = token | quoted-string
365
366 Each header is represented by a list of key/value pairs. The value for a
367 simple token (not part of a parameter) is None. Syntactically incorrect
368 headers will not necessarily be parsed as you would want.
369
370 This is easier to describe with some examples:
371
372 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
373 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
374 >>> split_header_words(['text/html; charset="iso-8859-1"'])
375 [[('text/html', None), ('charset', 'iso-8859-1')]]
376 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
377 [[('Basic', None), ('realm', '"foobar"')]]
378
379 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000380 assert not isinstance(header_values, str)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000381 result = []
382 for text in header_values:
383 orig_text = text
384 pairs = []
385 while text:
386 m = HEADER_TOKEN_RE.search(text)
387 if m:
388 text = unmatched(m)
389 name = m.group(1)
390 m = HEADER_QUOTED_VALUE_RE.search(text)
391 if m: # quoted value
392 text = unmatched(m)
393 value = m.group(1)
394 value = HEADER_ESCAPE_RE.sub(r"\1", value)
395 else:
396 m = HEADER_VALUE_RE.search(text)
397 if m: # unquoted value
398 text = unmatched(m)
399 value = m.group(1)
400 value = value.rstrip()
401 else:
402 # no value, a lone token
403 value = None
404 pairs.append((name, value))
405 elif text.lstrip().startswith(","):
406 # concatenated headers, as per RFC 2616 section 4.2
407 text = text.lstrip()[1:]
408 if pairs: result.append(pairs)
409 pairs = []
410 else:
411 # skip junk
R David Murray44b548d2016-09-08 13:59:53 -0400412 non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000413 assert nr_junk_chars > 0, (
414 "split_header_words bug: '%s', '%s', %s" %
415 (orig_text, text, pairs))
416 text = non_junk
417 if pairs: result.append(pairs)
418 return result
419
420HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
421def join_header_words(lists):
422 """Do the inverse (almost) of the conversion done by split_header_words.
423
424 Takes a list of lists of (key, value) pairs and produces a single header
425 value. Attribute values are quoted if needed.
426
Martin Panterac34e092015-11-14 00:58:32 +0000427 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
428 'text/plain; charset="iso-8859-1"'
429 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
430 'text/plain, charset="iso-8859-1"'
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000431
432 """
433 headers = []
434 for pairs in lists:
435 attr = []
436 for k, v in pairs:
437 if v is not None:
438 if not re.search(r"^\w+$", v):
439 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
440 v = '"%s"' % v
441 k = "%s=%s" % (k, v)
442 attr.append(k)
443 if attr: headers.append("; ".join(attr))
444 return ", ".join(headers)
445
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +0000446def strip_quotes(text):
447 if text.startswith('"'):
448 text = text[1:]
449 if text.endswith('"'):
450 text = text[:-1]
451 return text
452
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000453def parse_ns_headers(ns_headers):
454 """Ad-hoc parser for Netscape protocol cookie-attributes.
455
456 The old Netscape cookie format for Set-Cookie can for instance contain
457 an unquoted "," in the expires field, so we have to use this ad-hoc
458 parser instead of split_header_words.
459
460 XXX This may not make the best possible effort to parse all the crap
461 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
462 parser is probably better, so could do worse than following that if
463 this ever gives any trouble.
464
465 Currently, this is also used for parsing RFC 2109 cookies.
466
467 """
468 known_attrs = ("expires", "domain", "path", "secure",
469 # RFC 2109 attrs (may turn up in Netscape cookies, too)
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +0000470 "version", "port", "max-age")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000471
472 result = []
473 for ns_header in ns_headers:
474 pairs = []
475 version_set = False
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200476
477 # XXX: The following does not strictly adhere to RFCs in that empty
478 # names and values are legal (the former will only appear once and will
479 # be overwritten if multiple occurrences are present). This is
480 # mostly to deal with backwards compatibility.
481 for ii, param in enumerate(ns_header.split(';')):
482 param = param.strip()
483
484 key, sep, val = param.partition('=')
485 key = key.strip()
486
487 if not key:
488 if ii == 0:
489 break
490 else:
491 continue
492
493 # allow for a distinction between present and empty and missing
494 # altogether
495 val = val.strip() if sep else None
496
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000497 if ii != 0:
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200498 lc = key.lower()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000499 if lc in known_attrs:
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200500 key = lc
501
502 if key == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000503 # This is an RFC 2109 cookie.
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200504 if val is not None:
505 val = strip_quotes(val)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000506 version_set = True
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200507 elif key == "expires":
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000508 # convert expires date to seconds since epoch
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200509 if val is not None:
510 val = http2time(strip_quotes(val)) # None if invalid
511 pairs.append((key, val))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000512
513 if pairs:
514 if not version_set:
515 pairs.append(("version", "0"))
516 result.append(pairs)
517
518 return result
519
520
Antoine Pitroufd036452008-08-19 17:56:33 +0000521IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000522def is_HDN(text):
523 """Return True if text is a host domain name."""
524 # XXX
525 # This may well be wrong. Which RFC is HDN defined in, if any (for
526 # the purposes of RFC 2965)?
527 # For the current implementation, what about IPv6? Remember to look
528 # at other uses of IPV4_RE also, if change this.
529 if IPV4_RE.search(text):
530 return False
531 if text == "":
532 return False
533 if text[0] == "." or text[-1] == ".":
534 return False
535 return True
536
537def domain_match(A, B):
538 """Return True if domain A domain-matches domain B, according to RFC 2965.
539
540 A and B may be host domain names or IP addresses.
541
542 RFC 2965, section 1:
543
544 Host names can be specified either as an IP address or a HDN string.
545 Sometimes we compare one host name with another. (Such comparisons SHALL
546 be case-insensitive.) Host A's name domain-matches host B's if
547
548 * their host name strings string-compare equal; or
549
550 * A is a HDN string and has the form NB, where N is a non-empty
551 name string, B has the form .B', and B' is a HDN string. (So,
552 x.y.com domain-matches .Y.com but not Y.com.)
553
554 Note that domain-match is not a commutative operation: a.b.c.com
555 domain-matches .c.com, but not the reverse.
556
557 """
558 # Note that, if A or B are IP addresses, the only relevant part of the
559 # definition of the domain-match algorithm is the direct string-compare.
560 A = A.lower()
561 B = B.lower()
562 if A == B:
563 return True
564 if not is_HDN(A):
565 return False
566 i = A.rfind(B)
567 if i == -1 or i == 0:
568 # A does not have form NB, or N is the empty string
569 return False
570 if not B.startswith("."):
571 return False
572 if not is_HDN(B[1:]):
573 return False
574 return True
575
576def liberal_is_HDN(text):
577 """Return True if text is a sort-of-like a host domain name.
578
579 For accepting/blocking domains.
580
581 """
582 if IPV4_RE.search(text):
583 return False
584 return True
585
586def user_domain_match(A, B):
587 """For blocking/accepting domains.
588
589 A and B may be host domain names or IP addresses.
590
591 """
592 A = A.lower()
593 B = B.lower()
594 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
595 if A == B:
596 # equal IP addresses
597 return True
598 return False
599 initial_dot = B.startswith(".")
600 if initial_dot and A.endswith(B):
601 return True
602 if not initial_dot and A == B:
603 return True
604 return False
605
Antoine Pitroufd036452008-08-19 17:56:33 +0000606cut_port_re = re.compile(r":\d+$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000607def request_host(request):
608 """Return request-host, as defined by RFC 2965.
609
610 Variation from RFC: returned value is lowercased, for convenient
611 comparison.
612
613 """
614 url = request.get_full_url()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000615 host = urllib.parse.urlparse(url)[1]
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000616 if host == "":
617 host = request.get_header("Host", "")
618
619 # remove port, if present
620 host = cut_port_re.sub("", host, 1)
621 return host.lower()
622
623def eff_request_host(request):
624 """Return a tuple (request-host, effective request-host name).
625
626 As defined by RFC 2965, except both are lowercased.
627
628 """
629 erhn = req_host = request_host(request)
630 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
631 erhn = req_host + ".local"
632 return req_host, erhn
633
634def request_path(request):
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000635 """Path component of request-URI, as defined by RFC 2965."""
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000636 url = request.get_full_url()
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000637 parts = urllib.parse.urlsplit(url)
638 path = escape_path(parts.path)
639 if not path.startswith("/"):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000640 # fix bad RFC 2396 absoluteURI
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000641 path = "/" + path
642 return path
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000643
644def request_port(request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -0500645 host = request.host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000646 i = host.find(':')
647 if i >= 0:
648 port = host[i+1:]
649 try:
650 int(port)
651 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000652 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000653 return None
654 else:
655 port = DEFAULT_HTTP_PORT
656 return port
657
658# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
659# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
660HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
661ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
662def uppercase_escaped_char(match):
663 return "%%%s" % match.group(1).upper()
664def escape_path(path):
665 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
666 # There's no knowing what character encoding was used to create URLs
667 # containing %-escapes, but since we have to pick one to escape invalid
668 # path characters, we pick UTF-8, as recommended in the HTML 4.0
669 # specification:
670 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
671 # And here, kind of: draft-fielding-uri-rfc2396bis-03
672 # (And in draft IRI specification: draft-duerst-iri-05)
673 # (And here, for new URI schemes: RFC 2718)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000674 path = urllib.parse.quote(path, HTTP_PATH_SAFE)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000675 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
676 return path
677
678def reach(h):
679 """Return reach of host h, as defined by RFC 2965, section 1.
680
681 The reach R of a host name H is defined as follows:
682
683 * If
684
685 - H is the host domain name of a host; and,
686
687 - H has the form A.B; and
688
689 - A has no embedded (that is, interior) dots; and
690
691 - B has at least one embedded dot, or B is the string "local".
692 then the reach of H is .B.
693
694 * Otherwise, the reach of H is H.
695
696 >>> reach("www.acme.com")
697 '.acme.com'
698 >>> reach("acme.com")
699 'acme.com'
700 >>> reach("acme.local")
701 '.local'
702
703 """
704 i = h.find(".")
705 if i >= 0:
706 #a = h[:i] # this line is only here to show what a is
707 b = h[i+1:]
708 i = b.find(".")
709 if is_HDN(h) and (i >= 0 or b == "local"):
710 return "."+b
711 return h
712
713def is_third_party(request):
714 """
715
716 RFC 2965, section 3.3.6:
717
718 An unverifiable transaction is to a third-party host if its request-
719 host U does not domain-match the reach R of the request-host O in the
720 origin transaction.
721
722 """
723 req_host = request_host(request)
Senthil Kumarand9fbf362013-04-09 07:07:59 -0700724 if not domain_match(req_host, reach(request.origin_req_host)):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000725 return True
726 else:
727 return False
728
729
730class Cookie:
731 """HTTP Cookie.
732
733 This class represents both Netscape and RFC 2965 cookies.
734
735 This is deliberately a very simple class. It just holds attributes. It's
736 possible to construct Cookie instances that don't comply with the cookie
737 standards. CookieJar.make_cookies is the factory function for Cookie
738 objects -- it deals with cookie parsing, supplying defaults, and
739 normalising to the representation used in this class. CookiePolicy is
740 responsible for checking them to see whether they should be accepted from
741 and returned to the server.
742
743 Note that the port may be present in the headers, but unspecified ("Port"
744 rather than"Port=80", for example); if this is the case, port is None.
745
746 """
747
748 def __init__(self, version, name, value,
749 port, port_specified,
750 domain, domain_specified, domain_initial_dot,
751 path, path_specified,
752 secure,
753 expires,
754 discard,
755 comment,
756 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000757 rest,
758 rfc2109=False,
759 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000760
761 if version is not None: version = int(version)
Robert Collinsa0e5d982015-08-04 10:06:29 +1200762 if expires is not None: expires = int(float(expires))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000763 if port is None and port_specified is True:
764 raise ValueError("if port is None, port_specified must be false")
765
766 self.version = version
767 self.name = name
768 self.value = value
769 self.port = port
770 self.port_specified = port_specified
771 # normalise case, as per RFC 2965 section 3.3.3
772 self.domain = domain.lower()
773 self.domain_specified = domain_specified
774 # Sigh. We need to know whether the domain given in the
775 # cookie-attribute had an initial dot, in order to follow RFC 2965
776 # (as clarified in draft errata). Needed for the returned $Domain
777 # value.
778 self.domain_initial_dot = domain_initial_dot
779 self.path = path
780 self.path_specified = path_specified
781 self.secure = secure
782 self.expires = expires
783 self.discard = discard
784 self.comment = comment
785 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000786 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000787
788 self._rest = copy.copy(rest)
789
790 def has_nonstandard_attr(self, name):
791 return name in self._rest
792 def get_nonstandard_attr(self, name, default=None):
793 return self._rest.get(name, default)
794 def set_nonstandard_attr(self, name, value):
795 self._rest[name] = value
796
797 def is_expired(self, now=None):
798 if now is None: now = time.time()
799 if (self.expires is not None) and (self.expires <= now):
800 return True
801 return False
802
803 def __str__(self):
804 if self.port is None: p = ""
805 else: p = ":"+self.port
806 limit = self.domain + p + self.path
807 if self.value is not None:
808 namevalue = "%s=%s" % (self.name, self.value)
809 else:
810 namevalue = self.name
811 return "<Cookie %s for %s>" % (namevalue, limit)
812
813 def __repr__(self):
814 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000815 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000816 "port", "port_specified",
817 "domain", "domain_specified", "domain_initial_dot",
818 "path", "path_specified",
819 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000820 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000821 attr = getattr(self, name)
822 args.append("%s=%s" % (name, repr(attr)))
823 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000824 args.append("rfc2109=%s" % repr(self.rfc2109))
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300825 return "%s(%s)" % (self.__class__.__name__, ", ".join(args))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000826
827
828class CookiePolicy:
829 """Defines which cookies get accepted from and returned to server.
830
831 May also modify cookies, though this is probably a bad idea.
832
833 The subclass DefaultCookiePolicy defines the standard rules for Netscape
Raymond Hettinger7ea386e2016-08-25 21:11:50 -0700834 and RFC 2965 cookies -- override that if you want a customized policy.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000835
836 """
837 def set_ok(self, cookie, request):
838 """Return true if (and only if) cookie should be accepted from server.
839
840 Currently, pre-expired cookies never get this far -- the CookieJar
841 class deletes such cookies itself.
842
843 """
844 raise NotImplementedError()
845
846 def return_ok(self, cookie, request):
847 """Return true if (and only if) cookie should be returned to server."""
848 raise NotImplementedError()
849
850 def domain_return_ok(self, domain, request):
851 """Return false if cookies should not be returned, given cookie domain.
852 """
853 return True
854
855 def path_return_ok(self, path, request):
856 """Return false if cookies should not be returned, given cookie path.
857 """
858 return True
859
860
861class DefaultCookiePolicy(CookiePolicy):
862 """Implements the standard rules for accepting and returning cookies."""
863
864 DomainStrictNoDots = 1
865 DomainStrictNonDomain = 2
866 DomainRFC2965Match = 4
867
868 DomainLiberal = 0
869 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
870
871 def __init__(self,
872 blocked_domains=None, allowed_domains=None,
873 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000874 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000875 hide_cookie2=False,
876 strict_domain=False,
877 strict_rfc2965_unverifiable=True,
878 strict_ns_unverifiable=False,
879 strict_ns_domain=DomainLiberal,
880 strict_ns_set_initial_dollar=False,
881 strict_ns_set_path=False,
Paul Bailey4c339972018-10-08 13:49:29 -0500882 secure_protocols=("https", "wss")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000883 ):
884 """Constructor arguments should be passed as keyword arguments only."""
885 self.netscape = netscape
886 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000887 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000888 self.hide_cookie2 = hide_cookie2
889 self.strict_domain = strict_domain
890 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
891 self.strict_ns_unverifiable = strict_ns_unverifiable
892 self.strict_ns_domain = strict_ns_domain
893 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
894 self.strict_ns_set_path = strict_ns_set_path
Paul Bailey4c339972018-10-08 13:49:29 -0500895 self.secure_protocols = secure_protocols
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000896
897 if blocked_domains is not None:
898 self._blocked_domains = tuple(blocked_domains)
899 else:
900 self._blocked_domains = ()
901
902 if allowed_domains is not None:
903 allowed_domains = tuple(allowed_domains)
904 self._allowed_domains = allowed_domains
905
906 def blocked_domains(self):
907 """Return the sequence of blocked domains (as a tuple)."""
908 return self._blocked_domains
909 def set_blocked_domains(self, blocked_domains):
910 """Set the sequence of blocked domains."""
911 self._blocked_domains = tuple(blocked_domains)
912
913 def is_blocked(self, domain):
914 for blocked_domain in self._blocked_domains:
915 if user_domain_match(domain, blocked_domain):
916 return True
917 return False
918
919 def allowed_domains(self):
920 """Return None, or the sequence of allowed domains (as a tuple)."""
921 return self._allowed_domains
922 def set_allowed_domains(self, allowed_domains):
923 """Set the sequence of allowed domains, or None."""
924 if allowed_domains is not None:
925 allowed_domains = tuple(allowed_domains)
926 self._allowed_domains = allowed_domains
927
928 def is_not_allowed(self, domain):
929 if self._allowed_domains is None:
930 return False
931 for allowed_domain in self._allowed_domains:
932 if user_domain_match(domain, allowed_domain):
933 return False
934 return True
935
936 def set_ok(self, cookie, request):
937 """
938 If you override .set_ok(), be sure to call this method. If it returns
939 false, so should your subclass (assuming your subclass wants to be more
940 strict about which cookies to accept).
941
942 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000943 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000944
945 assert cookie.name is not None
946
947 for n in "version", "verifiability", "name", "path", "domain", "port":
948 fn_name = "set_ok_"+n
949 fn = getattr(self, fn_name)
950 if not fn(cookie, request):
951 return False
952
953 return True
954
955 def set_ok_version(self, cookie, request):
956 if cookie.version is None:
957 # Version is always set to 0 by parse_ns_headers if it's a Netscape
958 # cookie, so this must be an invalid RFC 2965 cookie.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000959 _debug(" Set-Cookie2 without version attribute (%s=%s)",
960 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000961 return False
962 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000963 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000964 return False
965 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000966 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000967 return False
968 return True
969
970 def set_ok_verifiability(self, cookie, request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -0500971 if request.unverifiable and is_third_party(request):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000972 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000973 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000974 "unverifiable transaction")
975 return False
976 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000977 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000978 "unverifiable transaction")
979 return False
980 return True
981
982 def set_ok_name(self, cookie, request):
983 # Try and stop servers setting V0 cookies designed to hack other
984 # servers that know both V0 and V1 protocols.
985 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
986 cookie.name.startswith("$")):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000987 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000988 return False
989 return True
990
991 def set_ok_path(self, cookie, request):
992 if cookie.path_specified:
993 req_path = request_path(request)
994 if ((cookie.version > 0 or
995 (cookie.version == 0 and self.strict_ns_set_path)) and
Xtreak0e1f1f02019-03-10 22:42:28 +0530996 not self.path_return_ok(cookie.path, request)):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000997 _debug(" path attribute %s is not a prefix of request "
998 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000999 return False
1000 return True
1001
1002 def set_ok_domain(self, cookie, request):
1003 if self.is_blocked(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001004 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001005 return False
1006 if self.is_not_allowed(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001007 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001008 return False
1009 if cookie.domain_specified:
1010 req_host, erhn = eff_request_host(request)
1011 domain = cookie.domain
1012 if self.strict_domain and (domain.count(".") >= 2):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001013 # XXX This should probably be compared with the Konqueror
1014 # (kcookiejar.cpp) and Mozilla implementations, but it's a
1015 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001016 i = domain.rfind(".")
1017 j = domain.rfind(".", 0, i)
1018 if j == 0: # domain like .foo.bar
1019 tld = domain[i+1:]
1020 sld = domain[j+1:i]
Thomas Wouters477c8d52006-05-27 19:21:47 +00001021 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1022 "gov", "mil", "int", "aero", "biz", "cat", "coop",
1023 "info", "jobs", "mobi", "museum", "name", "pro",
1024 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001025 # domain like .co.uk
Thomas Wouters477c8d52006-05-27 19:21:47 +00001026 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001027 return False
1028 if domain.startswith("."):
1029 undotted_domain = domain[1:]
1030 else:
1031 undotted_domain = domain
1032 embedded_dots = (undotted_domain.find(".") >= 0)
1033 if not embedded_dots and domain != ".local":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001034 _debug(" non-local domain %s contains no embedded dot",
1035 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001036 return False
1037 if cookie.version == 0:
1038 if (not erhn.endswith(domain) and
1039 (not erhn.startswith(".") and
1040 not ("."+erhn).endswith(domain))):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001041 _debug(" effective request-host %s (even with added "
Ezio Melottie130a522011-10-19 10:58:56 +03001042 "initial dot) does not end with %s",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001044 return False
1045 if (cookie.version > 0 or
1046 (self.strict_ns_domain & self.DomainRFC2965Match)):
1047 if not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048 _debug(" effective request-host %s does not domain-match "
1049 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001050 return False
1051 if (cookie.version > 0 or
1052 (self.strict_ns_domain & self.DomainStrictNoDots)):
1053 host_prefix = req_host[:-len(domain)]
1054 if (host_prefix.find(".") >= 0 and
1055 not IPV4_RE.search(req_host)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056 _debug(" host prefix %s for domain %s contains a dot",
1057 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001058 return False
1059 return True
1060
1061 def set_ok_port(self, cookie, request):
1062 if cookie.port_specified:
1063 req_port = request_port(request)
1064 if req_port is None:
1065 req_port = "80"
1066 else:
1067 req_port = str(req_port)
1068 for p in cookie.port.split(","):
1069 try:
1070 int(p)
1071 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001072 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001073 return False
1074 if p == req_port:
1075 break
1076 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001077 _debug(" request port (%s) not found in %s",
1078 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001079 return False
1080 return True
1081
1082 def return_ok(self, cookie, request):
1083 """
1084 If you override .return_ok(), be sure to call this method. If it
1085 returns false, so should your subclass (assuming your subclass wants to
1086 be more strict about which cookies to return).
1087
1088 """
1089 # Path has already been checked by .path_return_ok(), and domain
1090 # blocking done by .domain_return_ok().
Thomas Wouters477c8d52006-05-27 19:21:47 +00001091 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001092
1093 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1094 fn_name = "return_ok_"+n
1095 fn = getattr(self, fn_name)
1096 if not fn(cookie, request):
1097 return False
1098 return True
1099
1100 def return_ok_version(self, cookie, request):
1101 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001102 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001103 return False
1104 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001105 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001106 return False
1107 return True
1108
1109 def return_ok_verifiability(self, cookie, request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -05001110 if request.unverifiable and is_third_party(request):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001111 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001112 _debug(" third-party RFC 2965 cookie during unverifiable "
1113 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001114 return False
1115 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116 _debug(" third-party Netscape cookie during unverifiable "
1117 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001118 return False
1119 return True
1120
1121 def return_ok_secure(self, cookie, request):
Paul Bailey4c339972018-10-08 13:49:29 -05001122 if cookie.secure and request.type not in self.secure_protocols:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001124 return False
1125 return True
1126
1127 def return_ok_expires(self, cookie, request):
1128 if cookie.is_expired(self._now):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001129 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001130 return False
1131 return True
1132
1133 def return_ok_port(self, cookie, request):
1134 if cookie.port:
1135 req_port = request_port(request)
1136 if req_port is None:
1137 req_port = "80"
1138 for p in cookie.port.split(","):
1139 if p == req_port:
1140 break
1141 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001142 _debug(" request port %s does not match cookie port %s",
1143 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001144 return False
1145 return True
1146
1147 def return_ok_domain(self, cookie, request):
1148 req_host, erhn = eff_request_host(request)
1149 domain = cookie.domain
1150
Xtreakca7fe502019-03-10 07:39:48 +05301151 if domain and not domain.startswith("."):
1152 dotdomain = "." + domain
1153 else:
1154 dotdomain = domain
1155
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001156 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1157 if (cookie.version == 0 and
1158 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1159 not cookie.domain_specified and domain != erhn):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001160 _debug(" cookie with unspecified domain does not string-compare "
1161 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001162 return False
1163
1164 if cookie.version > 0 and not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001165 _debug(" effective request-host name %s does not domain-match "
1166 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001167 return False
Xtreakca7fe502019-03-10 07:39:48 +05301168 if cookie.version == 0 and not ("."+erhn).endswith(dotdomain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001169 _debug(" request-host %s does not match Netscape cookie domain "
1170 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001171 return False
1172 return True
1173
1174 def domain_return_ok(self, domain, request):
1175 # Liberal check of. This is here as an optimization to avoid
1176 # having to load lots of MSIE cookie files unless necessary.
1177 req_host, erhn = eff_request_host(request)
1178 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001179 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001180 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001181 erhn = "."+erhn
Xtreakca7fe502019-03-10 07:39:48 +05301182 if domain and not domain.startswith("."):
1183 dotdomain = "." + domain
1184 else:
1185 dotdomain = domain
1186 if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001187 #_debug(" request domain %s does not match cookie domain %s",
1188 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001189 return False
1190
1191 if self.is_blocked(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001192 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001193 return False
1194 if self.is_not_allowed(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001195 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001196 return False
1197
1198 return True
1199
1200 def path_return_ok(self, path, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001201 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001202 req_path = request_path(request)
Xtreak0e1f1f02019-03-10 22:42:28 +05301203 pathlen = len(path)
1204 if req_path == path:
1205 return True
1206 elif (req_path.startswith(path) and
1207 (path.endswith("/") or req_path[pathlen:pathlen+1] == "/")):
1208 return True
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001209
Xtreak0e1f1f02019-03-10 22:42:28 +05301210 _debug(" %s does not path-match %s", req_path, path)
1211 return False
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001212
1213def vals_sorted_by_key(adict):
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001214 keys = sorted(adict.keys())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001215 return map(adict.get, keys)
1216
1217def deepvalues(mapping):
1218 """Iterates over nested mapping, depth-first, in sorted order by key."""
1219 values = vals_sorted_by_key(mapping)
1220 for obj in values:
1221 mapping = False
1222 try:
1223 obj.items
1224 except AttributeError:
1225 pass
1226 else:
1227 mapping = True
Philip Jenveyfd0d3e52012-10-01 15:34:31 -07001228 yield from deepvalues(obj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001229 if not mapping:
1230 yield obj
1231
1232
1233# Used as second parameter to dict.get() method, to distinguish absent
1234# dict key from one with a None value.
1235class Absent: pass
1236
1237class CookieJar:
1238 """Collection of HTTP cookies.
1239
1240 You may not need to know about this class: try
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001241 urllib.request.build_opener(HTTPCookieProcessor).open(url).
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001242 """
1243
1244 non_word_re = re.compile(r"\W")
1245 quote_re = re.compile(r"([\"\\])")
1246 strict_domain_re = re.compile(r"\.?[^.]*")
1247 domain_re = re.compile(r"[^.]*")
1248 dots_re = re.compile(r"^\.+")
1249
Antoine Pitroufd036452008-08-19 17:56:33 +00001250 magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001251
1252 def __init__(self, policy=None):
1253 if policy is None:
1254 policy = DefaultCookiePolicy()
1255 self._policy = policy
1256
1257 self._cookies_lock = _threading.RLock()
1258 self._cookies = {}
1259
1260 def set_policy(self, policy):
1261 self._policy = policy
1262
1263 def _cookies_for_domain(self, domain, request):
1264 cookies = []
1265 if not self._policy.domain_return_ok(domain, request):
1266 return []
Thomas Wouters477c8d52006-05-27 19:21:47 +00001267 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001268 cookies_by_path = self._cookies[domain]
1269 for path in cookies_by_path.keys():
1270 if not self._policy.path_return_ok(path, request):
1271 continue
1272 cookies_by_name = cookies_by_path[path]
1273 for cookie in cookies_by_name.values():
1274 if not self._policy.return_ok(cookie, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001275 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001276 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001278 cookies.append(cookie)
1279 return cookies
1280
1281 def _cookies_for_request(self, request):
1282 """Return a list of cookies to be returned to server."""
1283 cookies = []
1284 for domain in self._cookies.keys():
1285 cookies.extend(self._cookies_for_domain(domain, request))
1286 return cookies
1287
1288 def _cookie_attrs(self, cookies):
1289 """Return a list of cookie-attributes to be returned to server.
1290
1291 like ['foo="bar"; $Path="/"', ...]
1292
1293 The $Version attribute is also added when appropriate (currently only
1294 once per request).
1295
1296 """
1297 # add cookies in order of most specific (ie. longest) path first
Raymond Hettinger70b64fc2008-01-30 20:15:17 +00001298 cookies.sort(key=lambda a: len(a.path), reverse=True)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001299
1300 version_set = False
1301
1302 attrs = []
1303 for cookie in cookies:
1304 # set version of Cookie header
1305 # XXX
1306 # What should it be if multiple matching Set-Cookie headers have
1307 # different versions themselves?
1308 # Answer: there is no answer; was supposed to be settled by
1309 # RFC 2965 errata, but that may never appear...
1310 version = cookie.version
1311 if not version_set:
1312 version_set = True
1313 if version > 0:
1314 attrs.append("$Version=%s" % version)
1315
1316 # quote cookie value if necessary
1317 # (not for Netscape protocol, which already has any quotes
1318 # intact, due to the poorly-specified Netscape Cookie: syntax)
1319 if ((cookie.value is not None) and
1320 self.non_word_re.search(cookie.value) and version > 0):
1321 value = self.quote_re.sub(r"\\\1", cookie.value)
1322 else:
1323 value = cookie.value
1324
1325 # add cookie-attributes to be returned in Cookie header
1326 if cookie.value is None:
1327 attrs.append(cookie.name)
1328 else:
1329 attrs.append("%s=%s" % (cookie.name, value))
1330 if version > 0:
1331 if cookie.path_specified:
1332 attrs.append('$Path="%s"' % cookie.path)
1333 if cookie.domain.startswith("."):
1334 domain = cookie.domain
1335 if (not cookie.domain_initial_dot and
1336 domain.startswith(".")):
1337 domain = domain[1:]
1338 attrs.append('$Domain="%s"' % domain)
1339 if cookie.port is not None:
1340 p = "$Port"
1341 if cookie.port_specified:
1342 p = p + ('="%s"' % cookie.port)
1343 attrs.append(p)
1344
1345 return attrs
1346
1347 def add_cookie_header(self, request):
Georg Brandl029986a2008-06-23 11:44:14 +00001348 """Add correct Cookie: header to request (urllib.request.Request object).
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001349
1350 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1351
1352 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001353 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001354 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001355 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001356
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001357 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001358
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001359 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001360
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001361 attrs = self._cookie_attrs(cookies)
1362 if attrs:
1363 if not request.has_header("Cookie"):
1364 request.add_unredirected_header(
1365 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001366
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001367 # if necessary, advertise that we know RFC 2965
1368 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1369 not request.has_header("Cookie2")):
1370 for cookie in cookies:
1371 if cookie.version != 1:
1372 request.add_unredirected_header("Cookie2", '$Version="1"')
1373 break
1374
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001375 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001376 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001377
1378 self.clear_expired_cookies()
1379
1380 def _normalized_cookie_tuples(self, attrs_set):
1381 """Return list of tuples containing normalised cookie information.
1382
1383 attrs_set is the list of lists of key,value pairs extracted from
1384 the Set-Cookie or Set-Cookie2 headers.
1385
1386 Tuples are name, value, standard, rest, where name and value are the
1387 cookie name and value, standard is a dictionary containing the standard
1388 cookie-attributes (discard, secure, version, expires or max-age,
1389 domain, path and port) and rest is a dictionary containing the rest of
1390 the cookie-attributes.
1391
1392 """
1393 cookie_tuples = []
1394
1395 boolean_attrs = "discard", "secure"
1396 value_attrs = ("version",
1397 "expires", "max-age",
1398 "domain", "path", "port",
1399 "comment", "commenturl")
1400
1401 for cookie_attrs in attrs_set:
1402 name, value = cookie_attrs[0]
1403
1404 # Build dictionary of standard cookie-attributes (standard) and
1405 # dictionary of other cookie-attributes (rest).
1406
1407 # Note: expiry time is normalised to seconds since epoch. V0
1408 # cookies should have the Expires cookie-attribute, and V1 cookies
1409 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1410 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1411 # accept either (but prefer Max-Age).
1412 max_age_set = False
1413
1414 bad_cookie = False
1415
1416 standard = {}
1417 rest = {}
1418 for k, v in cookie_attrs[1:]:
1419 lc = k.lower()
1420 # don't lose case distinction for unknown fields
1421 if lc in value_attrs or lc in boolean_attrs:
1422 k = lc
1423 if k in boolean_attrs and v is None:
1424 # boolean cookie-attribute is present, but has no value
1425 # (like "discard", rather than "port=80")
1426 v = True
1427 if k in standard:
1428 # only first value is significant
1429 continue
1430 if k == "domain":
1431 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001432 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001433 bad_cookie = True
1434 break
1435 # RFC 2965 section 3.3.3
1436 v = v.lower()
1437 if k == "expires":
1438 if max_age_set:
1439 # Prefer max-age to expires (like Mozilla)
1440 continue
1441 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001442 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001443 "attribute: treating as session cookie")
1444 continue
1445 if k == "max-age":
1446 max_age_set = True
1447 try:
1448 v = int(v)
1449 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001450 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001451 "max-age attribute")
1452 bad_cookie = True
1453 break
1454 # convert RFC 2965 Max-Age to seconds since epoch
1455 # XXX Strictly you're supposed to follow RFC 2616
Serhiy Storchakad65c9492015-11-02 14:10:23 +02001456 # age-calculation rules. Remember that zero Max-Age
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001457 # is a request to discard (old and new) cookie, though.
1458 k = "expires"
1459 v = self._now + v
1460 if (k in value_attrs) or (k in boolean_attrs):
1461 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001462 k not in ("port", "comment", "commenturl")):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001463 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001464 bad_cookie = True
1465 break
1466 standard[k] = v
1467 else:
1468 rest[k] = v
1469
1470 if bad_cookie:
1471 continue
1472
1473 cookie_tuples.append((name, value, standard, rest))
1474
1475 return cookie_tuples
1476
1477 def _cookie_from_cookie_tuple(self, tup, request):
1478 # standard is dict of standard cookie-attributes, rest is dict of the
1479 # rest of them
1480 name, value, standard, rest = tup
1481
1482 domain = standard.get("domain", Absent)
1483 path = standard.get("path", Absent)
1484 port = standard.get("port", Absent)
1485 expires = standard.get("expires", Absent)
1486
1487 # set the easy defaults
1488 version = standard.get("version", None)
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +00001489 if version is not None:
1490 try:
1491 version = int(version)
1492 except ValueError:
1493 return None # invalid version, ignore cookie
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001494 secure = standard.get("secure", False)
1495 # (discard is also set if expires is Absent)
1496 discard = standard.get("discard", False)
1497 comment = standard.get("comment", None)
1498 comment_url = standard.get("commenturl", None)
1499
1500 # set default path
1501 if path is not Absent and path != "":
1502 path_specified = True
1503 path = escape_path(path)
1504 else:
1505 path_specified = False
1506 path = request_path(request)
1507 i = path.rfind("/")
1508 if i != -1:
1509 if version == 0:
1510 # Netscape spec parts company from reality here
1511 path = path[:i]
1512 else:
1513 path = path[:i+1]
1514 if len(path) == 0: path = "/"
1515
1516 # set default domain
1517 domain_specified = domain is not Absent
1518 # but first we have to remember whether it starts with a dot
1519 domain_initial_dot = False
1520 if domain_specified:
1521 domain_initial_dot = bool(domain.startswith("."))
1522 if domain is Absent:
1523 req_host, erhn = eff_request_host(request)
1524 domain = erhn
1525 elif not domain.startswith("."):
1526 domain = "."+domain
1527
1528 # set default port
1529 port_specified = False
1530 if port is not Absent:
1531 if port is None:
1532 # Port attr present, but has no value: default to request port.
1533 # Cookie should then only be sent back on that port.
1534 port = request_port(request)
1535 else:
1536 port_specified = True
1537 port = re.sub(r"\s+", "", port)
1538 else:
1539 # No port attr present. Cookie can be sent back on any port.
1540 port = None
1541
1542 # set default expires and discard
1543 if expires is Absent:
1544 expires = None
1545 discard = True
1546 elif expires <= self._now:
1547 # Expiry date in past is request to delete cookie. This can't be
1548 # in DefaultCookiePolicy, because can't delete cookies there.
1549 try:
1550 self.clear(domain, path, name)
1551 except KeyError:
1552 pass
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1554 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001555 return None
1556
1557 return Cookie(version,
1558 name, value,
1559 port, port_specified,
1560 domain, domain_specified, domain_initial_dot,
1561 path, path_specified,
1562 secure,
1563 expires,
1564 discard,
1565 comment,
1566 comment_url,
1567 rest)
1568
1569 def _cookies_from_attrs_set(self, attrs_set, request):
1570 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1571
1572 cookies = []
1573 for tup in cookie_tuples:
1574 cookie = self._cookie_from_cookie_tuple(tup, request)
1575 if cookie: cookies.append(cookie)
1576 return cookies
1577
Neal Norwitz71dad722005-12-23 21:43:48 +00001578 def _process_rfc2109_cookies(self, cookies):
1579 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1580 if rfc2109_as_ns is None:
1581 rfc2109_as_ns = not self._policy.rfc2965
1582 for cookie in cookies:
1583 if cookie.version == 1:
1584 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001585 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001586 # treat 2109 cookies as Netscape cookies rather than
1587 # as RFC2965 cookies
1588 cookie.version = 0
1589
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001590 def make_cookies(self, response, request):
1591 """Return sequence of Cookie objects extracted from response object."""
1592 # get cookie-attributes for RFC 2965 and Netscape protocols
1593 headers = response.info()
Barry Warsaw820c1202008-06-12 04:06:45 +00001594 rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
1595 ns_hdrs = headers.get_all("Set-Cookie", [])
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001596
1597 rfc2965 = self._policy.rfc2965
1598 netscape = self._policy.netscape
1599
1600 if ((not rfc2965_hdrs and not ns_hdrs) or
1601 (not ns_hdrs and not rfc2965) or
1602 (not rfc2965_hdrs and not netscape) or
1603 (not netscape and not rfc2965)):
1604 return [] # no relevant cookie headers: quick exit
1605
1606 try:
1607 cookies = self._cookies_from_attrs_set(
1608 split_header_words(rfc2965_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001609 except Exception:
1610 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001611 cookies = []
1612
1613 if ns_hdrs and netscape:
1614 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001615 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001616 ns_cookies = self._cookies_from_attrs_set(
1617 parse_ns_headers(ns_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001618 except Exception:
1619 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001620 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001621 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001622
1623 # Look for Netscape cookies (from Set-Cookie headers) that match
1624 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1625 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1626 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1627 # bundled in with the Netscape cookies for this purpose, which is
1628 # reasonable behaviour.
1629 if rfc2965:
1630 lookup = {}
1631 for cookie in cookies:
1632 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1633
1634 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1635 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1636 return key not in lookup
1637 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1638
1639 if ns_cookies:
1640 cookies.extend(ns_cookies)
1641
1642 return cookies
1643
1644 def set_cookie_if_ok(self, cookie, request):
1645 """Set a cookie if policy says it's OK to do so."""
1646 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001647 try:
1648 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001649
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001650 if self._policy.set_ok(cookie, request):
1651 self.set_cookie(cookie)
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001652
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001653
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001654 finally:
1655 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001656
1657 def set_cookie(self, cookie):
1658 """Set a cookie, without checking whether or not it should be set."""
1659 c = self._cookies
1660 self._cookies_lock.acquire()
1661 try:
1662 if cookie.domain not in c: c[cookie.domain] = {}
1663 c2 = c[cookie.domain]
1664 if cookie.path not in c2: c2[cookie.path] = {}
1665 c3 = c2[cookie.path]
1666 c3[cookie.name] = cookie
1667 finally:
1668 self._cookies_lock.release()
1669
1670 def extract_cookies(self, response, request):
1671 """Extract cookies from response, where allowable given the request."""
Thomas Wouters477c8d52006-05-27 19:21:47 +00001672 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001673 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001674 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001675 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001676
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001677 for cookie in self.make_cookies(response, request):
1678 if self._policy.set_ok(cookie, request):
1679 _debug(" setting cookie: %s", cookie)
1680 self.set_cookie(cookie)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001681 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001682 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001683
1684 def clear(self, domain=None, path=None, name=None):
1685 """Clear some cookies.
1686
1687 Invoking this method without arguments will clear all cookies. If
1688 given a single argument, only cookies belonging to that domain will be
1689 removed. If given two arguments, cookies belonging to the specified
1690 path within that domain are removed. If given three arguments, then
1691 the cookie with the specified name, path and domain is removed.
1692
1693 Raises KeyError if no matching cookie exists.
1694
1695 """
1696 if name is not None:
1697 if (domain is None) or (path is None):
1698 raise ValueError(
1699 "domain and path must be given to remove a cookie by name")
1700 del self._cookies[domain][path][name]
1701 elif path is not None:
1702 if domain is None:
1703 raise ValueError(
1704 "domain must be given to remove cookies by path")
1705 del self._cookies[domain][path]
1706 elif domain is not None:
1707 del self._cookies[domain]
1708 else:
1709 self._cookies = {}
1710
1711 def clear_session_cookies(self):
1712 """Discard all session cookies.
1713
1714 Note that the .save() method won't save session cookies anyway, unless
1715 you ask otherwise by passing a true ignore_discard argument.
1716
1717 """
1718 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001719 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001720 for cookie in self:
1721 if cookie.discard:
1722 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001723 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001724 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001725
1726 def clear_expired_cookies(self):
1727 """Discard all expired cookies.
1728
1729 You probably don't need to call this method: expired cookies are never
1730 sent back to the server (provided you're using DefaultCookiePolicy),
1731 this method is called by CookieJar itself every so often, and the
1732 .save() method won't save expired cookies anyway (unless you ask
1733 otherwise by passing a true ignore_expires argument).
1734
1735 """
1736 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001737 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001738 now = time.time()
1739 for cookie in self:
1740 if cookie.is_expired(now):
1741 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001742 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001743 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001744
1745 def __iter__(self):
1746 return deepvalues(self._cookies)
1747
1748 def __len__(self):
1749 """Return number of contained cookies."""
1750 i = 0
1751 for cookie in self: i = i + 1
1752 return i
1753
1754 def __repr__(self):
1755 r = []
1756 for cookie in self: r.append(repr(cookie))
Serhiy Storchaka1392df92014-07-22 11:09:36 +03001757 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001758
1759 def __str__(self):
1760 r = []
1761 for cookie in self: r.append(str(cookie))
Serhiy Storchaka1392df92014-07-22 11:09:36 +03001762 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001763
1764
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001765# derives from OSError for backwards-compatibility with Python 2.4.0
1766class LoadError(OSError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001767
1768class FileCookieJar(CookieJar):
1769 """CookieJar that can be loaded from and saved to a file."""
1770
1771 def __init__(self, filename=None, delayload=False, policy=None):
1772 """
1773 Cookies are NOT loaded from the named file until either the .load() or
1774 .revert() method is called.
1775
1776 """
1777 CookieJar.__init__(self, policy)
1778 if filename is not None:
Stéphane Wirtel4b219ce2019-03-01 21:40:54 +01001779 filename = os.fspath(filename)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001780 self.filename = filename
1781 self.delayload = bool(delayload)
1782
1783 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1784 """Save cookies to a file."""
1785 raise NotImplementedError()
1786
1787 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1788 """Load cookies from a file."""
1789 if filename is None:
1790 if self.filename is not None: filename = self.filename
1791 else: raise ValueError(MISSING_FILENAME_TEXT)
1792
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01001793 with open(filename) as f:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001794 self._really_load(f, filename, ignore_discard, ignore_expires)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001795
1796 def revert(self, filename=None,
1797 ignore_discard=False, ignore_expires=False):
1798 """Clear all cookies and reload cookies from a saved file.
1799
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001800 Raises LoadError (or OSError) if reversion is not successful; the
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001801 object's state will not be altered if this happens.
1802
1803 """
1804 if filename is None:
1805 if self.filename is not None: filename = self.filename
1806 else: raise ValueError(MISSING_FILENAME_TEXT)
1807
1808 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001809 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001810
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001811 old_state = copy.deepcopy(self._cookies)
1812 self._cookies = {}
1813 try:
1814 self.load(filename, ignore_discard, ignore_expires)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001815 except OSError:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001816 self._cookies = old_state
1817 raise
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001818
1819 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001820 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001821
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001822
1823def lwp_cookie_str(cookie):
Benjamin Peterson82f34ad2015-01-13 09:17:24 -05001824 """Return string representation of Cookie in the LWP cookie file format.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001825
1826 Actually, the format is extended a bit -- see module docstring.
1827
1828 """
1829 h = [(cookie.name, cookie.value),
1830 ("path", cookie.path),
1831 ("domain", cookie.domain)]
1832 if cookie.port is not None: h.append(("port", cookie.port))
1833 if cookie.path_specified: h.append(("path_spec", None))
1834 if cookie.port_specified: h.append(("port_spec", None))
1835 if cookie.domain_initial_dot: h.append(("domain_dot", None))
1836 if cookie.secure: h.append(("secure", None))
1837 if cookie.expires: h.append(("expires",
1838 time2isoz(float(cookie.expires))))
1839 if cookie.discard: h.append(("discard", None))
1840 if cookie.comment: h.append(("comment", cookie.comment))
1841 if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
1842
1843 keys = sorted(cookie._rest.keys())
1844 for k in keys:
1845 h.append((k, str(cookie._rest[k])))
1846
1847 h.append(("version", str(cookie.version)))
1848
1849 return join_header_words([h])
1850
1851class LWPCookieJar(FileCookieJar):
1852 """
Ezio Melotti0847db72012-09-21 16:30:22 +03001853 The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
Martin Panterf05641642016-05-08 13:48:10 +00001854 "Set-Cookie3" is the format used by the libwww-perl library, not known
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001855 to be compatible with any browser, but which is easy to read and
1856 doesn't lose information about RFC 2965 cookies.
1857
1858 Additional methods
1859
1860 as_lwp_str(ignore_discard=True, ignore_expired=True)
1861
1862 """
1863
1864 def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
Ezio Melotti0847db72012-09-21 16:30:22 +03001865 """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001866
1867 ignore_discard and ignore_expires: see docstring for FileCookieJar.save
1868
1869 """
1870 now = time.time()
1871 r = []
1872 for cookie in self:
1873 if not ignore_discard and cookie.discard:
1874 continue
1875 if not ignore_expires and cookie.is_expired(now):
1876 continue
1877 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
1878 return "\n".join(r+[""])
1879
1880 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1881 if filename is None:
1882 if self.filename is not None: filename = self.filename
1883 else: raise ValueError(MISSING_FILENAME_TEXT)
1884
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01001885 with open(filename, "w") as f:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001886 # There really isn't an LWP Cookies 2.0 format, but this indicates
1887 # that there is extra information in here (domain_dot and
1888 # port_spec) while still being compatible with libwww-perl, I hope.
1889 f.write("#LWP-Cookies-2.0\n")
1890 f.write(self.as_lwp_str(ignore_discard, ignore_expires))
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001891
1892 def _really_load(self, f, filename, ignore_discard, ignore_expires):
1893 magic = f.readline()
Antoine Pitroufd036452008-08-19 17:56:33 +00001894 if not self.magic_re.search(magic):
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001895 msg = ("%r does not look like a Set-Cookie3 (LWP) format "
1896 "file" % filename)
1897 raise LoadError(msg)
1898
1899 now = time.time()
1900
1901 header = "Set-Cookie3:"
1902 boolean_attrs = ("port_spec", "path_spec", "domain_dot",
1903 "secure", "discard")
1904 value_attrs = ("version",
1905 "port", "path", "domain",
1906 "expires",
1907 "comment", "commenturl")
1908
1909 try:
1910 while 1:
1911 line = f.readline()
1912 if line == "": break
1913 if not line.startswith(header):
1914 continue
1915 line = line[len(header):].strip()
1916
1917 for data in split_header_words([line]):
1918 name, value = data[0]
1919 standard = {}
1920 rest = {}
1921 for k in boolean_attrs:
1922 standard[k] = False
1923 for k, v in data[1:]:
1924 if k is not None:
1925 lc = k.lower()
1926 else:
1927 lc = None
1928 # don't lose case distinction for unknown fields
1929 if (lc in value_attrs) or (lc in boolean_attrs):
1930 k = lc
1931 if k in boolean_attrs:
1932 if v is None: v = True
1933 standard[k] = v
1934 elif k in value_attrs:
1935 standard[k] = v
1936 else:
1937 rest[k] = v
1938
1939 h = standard.get
1940 expires = h("expires")
1941 discard = h("discard")
1942 if expires is not None:
1943 expires = iso2time(expires)
1944 if expires is None:
1945 discard = True
1946 domain = h("domain")
1947 domain_specified = domain.startswith(".")
1948 c = Cookie(h("version"), name, value,
1949 h("port"), h("port_spec"),
1950 domain, domain_specified, h("domain_dot"),
1951 h("path"), h("path_spec"),
1952 h("secure"),
1953 expires,
1954 discard,
1955 h("comment"),
1956 h("commenturl"),
1957 rest)
1958 if not ignore_discard and c.discard:
1959 continue
1960 if not ignore_expires and c.is_expired(now):
1961 continue
1962 self.set_cookie(c)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001963 except OSError:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001964 raise
1965 except Exception:
1966 _warn_unhandled_exception()
1967 raise LoadError("invalid Set-Cookie3 format file %r: %r" %
1968 (filename, line))
1969
1970
1971class MozillaCookieJar(FileCookieJar):
1972 """
1973
1974 WARNING: you may want to backup your browser's cookies file if you use
1975 this class to save cookies. I *think* it works, but there have been
1976 bugs in the past!
1977
1978 This class differs from CookieJar only in the format it uses to save and
1979 load cookies to and from a file. This class uses the Mozilla/Netscape
1980 `cookies.txt' format. lynx uses this file format, too.
1981
1982 Don't expect cookies saved while the browser is running to be noticed by
1983 the browser (in fact, Mozilla on unix will overwrite your saved cookies if
1984 you change them on disk while it's running; on Windows, you probably can't
1985 save at all while the browser is running).
1986
1987 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
1988 Netscape cookies on saving.
1989
1990 In particular, the cookie version and port number information is lost,
1991 together with information about whether or not Path, Port and Discard were
1992 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
1993 domain as set in the HTTP header started with a dot (yes, I'm aware some
1994 domains in Netscape files start with a dot and some don't -- trust me, you
1995 really don't want to know any more about this).
1996
1997 Note that though Mozilla and Netscape use the same format, they use
1998 slightly different headers. The class saves cookies using the Netscape
1999 header by default (Mozilla can cope with that).
2000
2001 """
Antoine Pitroufd036452008-08-19 17:56:33 +00002002 magic_re = re.compile("#( Netscape)? HTTP Cookie File")
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002003 header = """\
Georg Brandl87a15642010-07-31 22:11:11 +00002004# Netscape HTTP Cookie File
Benjamin Petersonccedc222013-12-18 15:35:18 -06002005# http://curl.haxx.se/rfc/cookie_spec.html
Georg Brandl87a15642010-07-31 22:11:11 +00002006# This is a generated file! Do not edit.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002007
2008"""
2009
2010 def _really_load(self, f, filename, ignore_discard, ignore_expires):
2011 now = time.time()
2012
2013 magic = f.readline()
Antoine Pitroufd036452008-08-19 17:56:33 +00002014 if not self.magic_re.search(magic):
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002015 raise LoadError(
2016 "%r does not look like a Netscape format cookies file" %
2017 filename)
2018
2019 try:
2020 while 1:
2021 line = f.readline()
2022 if line == "": break
2023
2024 # last field may be absent, so keep any trailing tab
2025 if line.endswith("\n"): line = line[:-1]
2026
2027 # skip comments and blank lines XXX what is $ for?
2028 if (line.strip().startswith(("#", "$")) or
2029 line.strip() == ""):
2030 continue
2031
2032 domain, domain_specified, path, secure, expires, name, value = \
2033 line.split("\t")
2034 secure = (secure == "TRUE")
2035 domain_specified = (domain_specified == "TRUE")
2036 if name == "":
2037 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2038 # with no name, whereas http.cookiejar regards it as a
2039 # cookie with no value.
2040 name = value
2041 value = None
2042
2043 initial_dot = domain.startswith(".")
2044 assert domain_specified == initial_dot
2045
2046 discard = False
2047 if expires == "":
2048 expires = None
2049 discard = True
2050
2051 # assume path_specified is false
2052 c = Cookie(0, name, value,
2053 None, False,
2054 domain, domain_specified, initial_dot,
2055 path, False,
2056 secure,
2057 expires,
2058 discard,
2059 None,
2060 None,
2061 {})
2062 if not ignore_discard and c.discard:
2063 continue
2064 if not ignore_expires and c.is_expired(now):
2065 continue
2066 self.set_cookie(c)
2067
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002068 except OSError:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002069 raise
2070 except Exception:
2071 _warn_unhandled_exception()
2072 raise LoadError("invalid Netscape format cookies file %r: %r" %
2073 (filename, line))
2074
2075 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2076 if filename is None:
2077 if self.filename is not None: filename = self.filename
2078 else: raise ValueError(MISSING_FILENAME_TEXT)
2079
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01002080 with open(filename, "w") as f:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002081 f.write(self.header)
2082 now = time.time()
2083 for cookie in self:
2084 if not ignore_discard and cookie.discard:
2085 continue
2086 if not ignore_expires and cookie.is_expired(now):
2087 continue
2088 if cookie.secure: secure = "TRUE"
2089 else: secure = "FALSE"
2090 if cookie.domain.startswith("."): initial_dot = "TRUE"
2091 else: initial_dot = "FALSE"
2092 if cookie.expires is not None:
2093 expires = str(cookie.expires)
2094 else:
2095 expires = ""
2096 if cookie.value is None:
2097 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2098 # with no name, whereas http.cookiejar regards it as a
2099 # cookie with no value.
2100 name = ""
2101 value = cookie.name
2102 else:
2103 name = cookie.name
2104 value = cookie.value
2105 f.write(
2106 "\t".join([cookie.domain, initial_dot, cookie.path,
2107 secure, expires, name, value])+
2108 "\n")