blob: eaa76c26b9c591a6a5f5ea64cd5348e430bd7fad [file] [log] [blame]
Éric Araujo23760e92011-11-07 17:52:48 +01001r"""HTTP cookie handling for web clients.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00002
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Thomas Wouters477c8d52006-05-27 19:21:47 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Thomas Wouters477c8d52006-05-27 19:21:47 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
30
Stéphane Wirtel4b219ce2019-03-01 21:40:54 +010031import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032import copy
Victor Stinner628225c2011-03-21 02:38:51 +010033import datetime
Jeremy Hylton1afc1692008-06-18 20:49:58 +000034import re
35import time
36import urllib.parse, urllib.request
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020037import threading as _threading
Georg Brandl24420152008-05-26 16:32:26 +000038import http.client # only for the default HTTP port
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000039from calendar import timegm
40
Thomas Wouters477c8d52006-05-27 19:21:47 +000041debug = False # set to True to enable debugging via the logging module
42logger = None
43
44def _debug(*args):
45 if not debug:
46 return
47 global logger
48 if not logger:
49 import logging
Georg Brandl24420152008-05-26 16:32:26 +000050 logger = logging.getLogger("http.cookiejar")
Thomas Wouters477c8d52006-05-27 19:21:47 +000051 return logger.debug(*args)
52
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +110053HTTPONLY_ATTR = "HTTPOnly"
54HTTPONLY_PREFIX = "#HttpOnly_"
Georg Brandl24420152008-05-26 16:32:26 +000055DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +110056NETSCAPE_MAGIC_RGX = re.compile("#( Netscape)? HTTP Cookie File")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000057MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
58 "instance initialised with one)")
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +110059NETSCAPE_HEADER_TEXT = """\
60# Netscape HTTP Cookie File
61# http://curl.haxx.se/rfc/cookie_spec.html
62# This is a generated file! Do not edit.
63
64"""
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000065
Thomas Wouters477c8d52006-05-27 19:21:47 +000066def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000067 # There are a few catch-all except: statements in this module, for
Thomas Wouters477c8d52006-05-27 19:21:47 +000068 # catching input that's bad in unexpected ways. Warn if any
69 # exceptions are caught there.
Jeremy Hylton7ecf3dc2008-05-10 20:38:40 +000070 import io, warnings, traceback
Guido van Rossum34d19282007-08-09 01:03:29 +000071 f = io.StringIO()
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000072 traceback.print_exc(None, f)
73 msg = f.getvalue()
Georg Brandl24420152008-05-26 16:32:26 +000074 warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000075
76
77# Date/time conversion
78# -----------------------------------------------------------------------------
79
80EPOCH_YEAR = 1970
81def _timegm(tt):
82 year, month, mday, hour, min, sec = tt[:6]
83 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
84 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
85 return timegm(tt)
86 else:
87 return None
88
89DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
90MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
91 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
92MONTHS_LOWER = []
93for month in MONTHS: MONTHS_LOWER.append(month.lower())
94
95def time2isoz(t=None):
96 """Return a string representing time in seconds since epoch, t.
97
98 If the function is called without an argument, it will use the current
99 time.
100
101 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
102 representing Universal Time (UTC, aka GMT). An example of this format is:
103
104 1994-11-24 08:49:37Z
105
106 """
Victor Stinner628225c2011-03-21 02:38:51 +0100107 if t is None:
108 dt = datetime.datetime.utcnow()
109 else:
110 dt = datetime.datetime.utcfromtimestamp(t)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000111 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
Victor Stinner628225c2011-03-21 02:38:51 +0100112 dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000113
114def time2netscape(t=None):
115 """Return a string representing time in seconds since epoch, t.
116
117 If the function is called without an argument, it will use the current
118 time.
119
120 The format of the returned string is like this:
121
122 Wed, DD-Mon-YYYY HH:MM:SS GMT
123
124 """
Victor Stinner628225c2011-03-21 02:38:51 +0100125 if t is None:
126 dt = datetime.datetime.utcnow()
127 else:
128 dt = datetime.datetime.utcfromtimestamp(t)
Senthil Kumarand5b47fb2016-07-10 06:45:38 -0700129 return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (
Victor Stinner628225c2011-03-21 02:38:51 +0100130 DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
131 dt.year, dt.hour, dt.minute, dt.second)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000132
133
134UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
135
Antoine Pitroufd036452008-08-19 17:56:33 +0000136TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000137def offset_from_tz_string(tz):
138 offset = None
139 if tz in UTC_ZONES:
140 offset = 0
141 else:
142 m = TIMEZONE_RE.search(tz)
143 if m:
144 offset = 3600 * int(m.group(2))
145 if m.group(3):
146 offset = offset + 60 * int(m.group(3))
147 if m.group(1) == '-':
148 offset = -offset
149 return offset
150
151def _str2time(day, mon, yr, hr, min, sec, tz):
Berker Peksag20be53e2016-03-14 05:48:02 +0200152 yr = int(yr)
153 if yr > datetime.MAXYEAR:
154 return None
155
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000156 # translate month name to number
157 # month numbers start with 1 (January)
158 try:
159 mon = MONTHS_LOWER.index(mon.lower())+1
160 except ValueError:
161 # maybe it's already a number
162 try:
163 imon = int(mon)
164 except ValueError:
165 return None
166 if 1 <= imon <= 12:
167 mon = imon
168 else:
169 return None
170
171 # make sure clock elements are defined
172 if hr is None: hr = 0
173 if min is None: min = 0
174 if sec is None: sec = 0
175
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000176 day = int(day)
177 hr = int(hr)
178 min = int(min)
179 sec = int(sec)
180
181 if yr < 1000:
182 # find "obvious" year
183 cur_yr = time.localtime(time.time())[0]
184 m = cur_yr % 100
185 tmp = yr
186 yr = yr + cur_yr - m
187 m = m - tmp
188 if abs(m) > 50:
189 if m > 0: yr = yr + 100
190 else: yr = yr - 100
191
192 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
193 t = _timegm((yr, mon, day, hr, min, sec, tz))
194
195 if t is not None:
196 # adjust time using timezone string, to get absolute time since epoch
197 if tz is None:
198 tz = "UTC"
199 tz = tz.upper()
200 offset = offset_from_tz_string(tz)
201 if offset is None:
202 return None
203 t = t - offset
204
205 return t
206
207STRICT_DATE_RE = re.compile(
208 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
R David Murray44b548d2016-09-08 13:59:53 -0400209 r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000210WEEKDAY_RE = re.compile(
Antoine Pitroufd036452008-08-19 17:56:33 +0000211 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000212LOOSE_HTTP_DATE_RE = re.compile(
213 r"""^
214 (\d\d?) # day
215 (?:\s+|[-\/])
216 (\w+) # month
217 (?:\s+|[-\/])
218 (\d+) # year
219 (?:
220 (?:\s+|:) # separator before clock
221 (\d\d?):(\d\d) # hour:min
222 (?::(\d\d))? # optional seconds
223 )? # optional clock
224 \s*
bcaller1b779bf2019-11-22 14:22:11 +0000225 (?:
226 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000227 \s*
bcaller1b779bf2019-11-22 14:22:11 +0000228 )?
229 (?:
230 \(\w+\) # ASCII representation of timezone in parens.
231 \s*
232 )?$""", re.X | re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000233def http2time(text):
234 """Returns time in seconds since epoch of time represented by a string.
235
236 Return value is an integer.
237
238 None is returned if the format of str is unrecognized, the time is outside
239 the representable range, or the timezone string is not recognized. If the
240 string contains no timezone, UTC is assumed.
241
242 The timezone in the string may be numerical (like "-0800" or "+0100") or a
243 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
244 timezone strings equivalent to UTC (zero offset) are known to the function.
245
246 The function loosely parses the following formats:
247
248 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
249 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
250 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
251 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
252 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
253 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
254
255 The parser ignores leading and trailing whitespace. The time may be
256 absent.
257
258 If the year is given with only 2 digits, the function will select the
259 century that makes the year closest to the current date.
260
261 """
262 # fast exit for strictly conforming string
263 m = STRICT_DATE_RE.search(text)
264 if m:
265 g = m.groups()
266 mon = MONTHS_LOWER.index(g[1].lower()) + 1
267 tt = (int(g[2]), mon, int(g[0]),
268 int(g[3]), int(g[4]), float(g[5]))
269 return _timegm(tt)
270
271 # No, we need some messy parsing...
272
273 # clean up
274 text = text.lstrip()
275 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
276
277 # tz is time zone specifier string
278 day, mon, yr, hr, min, sec, tz = [None]*7
279
280 # loose regexp parse
281 m = LOOSE_HTTP_DATE_RE.search(text)
282 if m is not None:
283 day, mon, yr, hr, min, sec, tz = m.groups()
284 else:
285 return None # bad format
286
287 return _str2time(day, mon, yr, hr, min, sec, tz)
288
289ISO_DATE_RE = re.compile(
R David Murray44b548d2016-09-08 13:59:53 -0400290 r"""^
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000291 (\d{4}) # year
292 [-\/]?
293 (\d\d?) # numerical month
294 [-\/]?
295 (\d\d?) # day
296 (?:
297 (?:\s+|[-:Tt]) # separator before clock
298 (\d\d?):?(\d\d) # hour:min
299 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
300 )? # optional clock
301 \s*
bcaller1b779bf2019-11-22 14:22:11 +0000302 (?:
303 ([-+]?\d\d?:?(:?\d\d)?
304 |Z|z) # timezone (Z is "zero meridian", i.e. GMT)
305 \s*
306 )?$""", re.X | re. ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000307def iso2time(text):
308 """
309 As for http2time, but parses the ISO 8601 formats:
310
311 1994-02-03 14:15:29 -0100 -- ISO 8601 format
312 1994-02-03 14:15:29 -- zone is optional
313 1994-02-03 -- only date
314 1994-02-03T14:15:29 -- Use T as separator
315 19940203T141529Z -- ISO 8601 compact format
316 19940203 -- only date
317
318 """
319 # clean up
320 text = text.lstrip()
321
322 # tz is time zone specifier string
323 day, mon, yr, hr, min, sec, tz = [None]*7
324
325 # loose regexp parse
326 m = ISO_DATE_RE.search(text)
327 if m is not None:
328 # XXX there's an extra bit of the timezone I'm ignoring here: is
329 # this the right thing to do?
330 yr, mon, day, hr, min, sec, tz, _ = m.groups()
331 else:
332 return None # bad format
333
334 return _str2time(day, mon, yr, hr, min, sec, tz)
335
336
337# Header parsing
338# -----------------------------------------------------------------------------
339
340def unmatched(match):
341 """Return unmatched part of re.Match object."""
342 start, end = match.span(0)
343 return match.string[:start]+match.string[end:]
344
345HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
346HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
347HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
348HEADER_ESCAPE_RE = re.compile(r"\\(.)")
349def split_header_words(header_values):
350 r"""Parse header values into a list of lists containing key,value pairs.
351
352 The function knows how to deal with ",", ";" and "=" as well as quoted
353 values after "=". A list of space separated tokens are parsed as if they
354 were separated by ";".
355
356 If the header_values passed as argument contains multiple values, then they
357 are treated as if they were a single value separated by comma ",".
358
359 This means that this function is useful for parsing header fields that
360 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
361 the requirement for tokens).
362
363 headers = #header
364 header = (token | parameter) *( [";"] (token | parameter))
365
366 token = 1*<any CHAR except CTLs or separators>
367 separators = "(" | ")" | "<" | ">" | "@"
368 | "," | ";" | ":" | "\" | <">
369 | "/" | "[" | "]" | "?" | "="
370 | "{" | "}" | SP | HT
371
372 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
373 qdtext = <any TEXT except <">>
374 quoted-pair = "\" CHAR
375
376 parameter = attribute "=" value
377 attribute = token
378 value = token | quoted-string
379
380 Each header is represented by a list of key/value pairs. The value for a
381 simple token (not part of a parameter) is None. Syntactically incorrect
382 headers will not necessarily be parsed as you would want.
383
384 This is easier to describe with some examples:
385
386 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
387 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
388 >>> split_header_words(['text/html; charset="iso-8859-1"'])
389 [[('text/html', None), ('charset', 'iso-8859-1')]]
390 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
391 [[('Basic', None), ('realm', '"foobar"')]]
392
393 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000394 assert not isinstance(header_values, str)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000395 result = []
396 for text in header_values:
397 orig_text = text
398 pairs = []
399 while text:
400 m = HEADER_TOKEN_RE.search(text)
401 if m:
402 text = unmatched(m)
403 name = m.group(1)
404 m = HEADER_QUOTED_VALUE_RE.search(text)
405 if m: # quoted value
406 text = unmatched(m)
407 value = m.group(1)
408 value = HEADER_ESCAPE_RE.sub(r"\1", value)
409 else:
410 m = HEADER_VALUE_RE.search(text)
411 if m: # unquoted value
412 text = unmatched(m)
413 value = m.group(1)
414 value = value.rstrip()
415 else:
416 # no value, a lone token
417 value = None
418 pairs.append((name, value))
419 elif text.lstrip().startswith(","):
420 # concatenated headers, as per RFC 2616 section 4.2
421 text = text.lstrip()[1:]
422 if pairs: result.append(pairs)
423 pairs = []
424 else:
425 # skip junk
R David Murray44b548d2016-09-08 13:59:53 -0400426 non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000427 assert nr_junk_chars > 0, (
428 "split_header_words bug: '%s', '%s', %s" %
429 (orig_text, text, pairs))
430 text = non_junk
431 if pairs: result.append(pairs)
432 return result
433
434HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
435def join_header_words(lists):
436 """Do the inverse (almost) of the conversion done by split_header_words.
437
438 Takes a list of lists of (key, value) pairs and produces a single header
439 value. Attribute values are quoted if needed.
440
Martin Panterac34e092015-11-14 00:58:32 +0000441 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
442 'text/plain; charset="iso-8859-1"'
443 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
444 'text/plain, charset="iso-8859-1"'
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000445
446 """
447 headers = []
448 for pairs in lists:
449 attr = []
450 for k, v in pairs:
451 if v is not None:
452 if not re.search(r"^\w+$", v):
453 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
454 v = '"%s"' % v
455 k = "%s=%s" % (k, v)
456 attr.append(k)
457 if attr: headers.append("; ".join(attr))
458 return ", ".join(headers)
459
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +0000460def strip_quotes(text):
461 if text.startswith('"'):
462 text = text[1:]
463 if text.endswith('"'):
464 text = text[:-1]
465 return text
466
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000467def parse_ns_headers(ns_headers):
468 """Ad-hoc parser for Netscape protocol cookie-attributes.
469
470 The old Netscape cookie format for Set-Cookie can for instance contain
471 an unquoted "," in the expires field, so we have to use this ad-hoc
472 parser instead of split_header_words.
473
474 XXX This may not make the best possible effort to parse all the crap
475 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
476 parser is probably better, so could do worse than following that if
477 this ever gives any trouble.
478
479 Currently, this is also used for parsing RFC 2109 cookies.
480
481 """
482 known_attrs = ("expires", "domain", "path", "secure",
483 # RFC 2109 attrs (may turn up in Netscape cookies, too)
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +0000484 "version", "port", "max-age")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000485
486 result = []
487 for ns_header in ns_headers:
488 pairs = []
489 version_set = False
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200490
491 # XXX: The following does not strictly adhere to RFCs in that empty
492 # names and values are legal (the former will only appear once and will
493 # be overwritten if multiple occurrences are present). This is
494 # mostly to deal with backwards compatibility.
495 for ii, param in enumerate(ns_header.split(';')):
496 param = param.strip()
497
498 key, sep, val = param.partition('=')
499 key = key.strip()
500
501 if not key:
502 if ii == 0:
503 break
504 else:
505 continue
506
507 # allow for a distinction between present and empty and missing
508 # altogether
509 val = val.strip() if sep else None
510
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000511 if ii != 0:
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200512 lc = key.lower()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000513 if lc in known_attrs:
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200514 key = lc
515
516 if key == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000517 # This is an RFC 2109 cookie.
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200518 if val is not None:
519 val = strip_quotes(val)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000520 version_set = True
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200521 elif key == "expires":
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000522 # convert expires date to seconds since epoch
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200523 if val is not None:
524 val = http2time(strip_quotes(val)) # None if invalid
525 pairs.append((key, val))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000526
527 if pairs:
528 if not version_set:
529 pairs.append(("version", "0"))
530 result.append(pairs)
531
532 return result
533
534
Antoine Pitroufd036452008-08-19 17:56:33 +0000535IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000536def is_HDN(text):
537 """Return True if text is a host domain name."""
538 # XXX
539 # This may well be wrong. Which RFC is HDN defined in, if any (for
540 # the purposes of RFC 2965)?
541 # For the current implementation, what about IPv6? Remember to look
542 # at other uses of IPV4_RE also, if change this.
543 if IPV4_RE.search(text):
544 return False
545 if text == "":
546 return False
547 if text[0] == "." or text[-1] == ".":
548 return False
549 return True
550
551def domain_match(A, B):
552 """Return True if domain A domain-matches domain B, according to RFC 2965.
553
554 A and B may be host domain names or IP addresses.
555
556 RFC 2965, section 1:
557
558 Host names can be specified either as an IP address or a HDN string.
559 Sometimes we compare one host name with another. (Such comparisons SHALL
560 be case-insensitive.) Host A's name domain-matches host B's if
561
562 * their host name strings string-compare equal; or
563
564 * A is a HDN string and has the form NB, where N is a non-empty
565 name string, B has the form .B', and B' is a HDN string. (So,
566 x.y.com domain-matches .Y.com but not Y.com.)
567
568 Note that domain-match is not a commutative operation: a.b.c.com
569 domain-matches .c.com, but not the reverse.
570
571 """
572 # Note that, if A or B are IP addresses, the only relevant part of the
573 # definition of the domain-match algorithm is the direct string-compare.
574 A = A.lower()
575 B = B.lower()
576 if A == B:
577 return True
578 if not is_HDN(A):
579 return False
580 i = A.rfind(B)
581 if i == -1 or i == 0:
582 # A does not have form NB, or N is the empty string
583 return False
584 if not B.startswith("."):
585 return False
586 if not is_HDN(B[1:]):
587 return False
588 return True
589
590def liberal_is_HDN(text):
591 """Return True if text is a sort-of-like a host domain name.
592
593 For accepting/blocking domains.
594
595 """
596 if IPV4_RE.search(text):
597 return False
598 return True
599
600def user_domain_match(A, B):
601 """For blocking/accepting domains.
602
603 A and B may be host domain names or IP addresses.
604
605 """
606 A = A.lower()
607 B = B.lower()
608 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
609 if A == B:
610 # equal IP addresses
611 return True
612 return False
613 initial_dot = B.startswith(".")
614 if initial_dot and A.endswith(B):
615 return True
616 if not initial_dot and A == B:
617 return True
618 return False
619
Antoine Pitroufd036452008-08-19 17:56:33 +0000620cut_port_re = re.compile(r":\d+$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000621def request_host(request):
622 """Return request-host, as defined by RFC 2965.
623
624 Variation from RFC: returned value is lowercased, for convenient
625 comparison.
626
627 """
628 url = request.get_full_url()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000629 host = urllib.parse.urlparse(url)[1]
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000630 if host == "":
631 host = request.get_header("Host", "")
632
633 # remove port, if present
634 host = cut_port_re.sub("", host, 1)
635 return host.lower()
636
637def eff_request_host(request):
638 """Return a tuple (request-host, effective request-host name).
639
640 As defined by RFC 2965, except both are lowercased.
641
642 """
643 erhn = req_host = request_host(request)
644 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
645 erhn = req_host + ".local"
646 return req_host, erhn
647
648def request_path(request):
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000649 """Path component of request-URI, as defined by RFC 2965."""
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000650 url = request.get_full_url()
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000651 parts = urllib.parse.urlsplit(url)
652 path = escape_path(parts.path)
653 if not path.startswith("/"):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000654 # fix bad RFC 2396 absoluteURI
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000655 path = "/" + path
656 return path
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000657
658def request_port(request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -0500659 host = request.host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000660 i = host.find(':')
661 if i >= 0:
662 port = host[i+1:]
663 try:
664 int(port)
665 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000666 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000667 return None
668 else:
669 port = DEFAULT_HTTP_PORT
670 return port
671
672# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
673# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
674HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
675ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
676def uppercase_escaped_char(match):
677 return "%%%s" % match.group(1).upper()
678def escape_path(path):
679 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
680 # There's no knowing what character encoding was used to create URLs
681 # containing %-escapes, but since we have to pick one to escape invalid
682 # path characters, we pick UTF-8, as recommended in the HTML 4.0
683 # specification:
684 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
685 # And here, kind of: draft-fielding-uri-rfc2396bis-03
686 # (And in draft IRI specification: draft-duerst-iri-05)
687 # (And here, for new URI schemes: RFC 2718)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000688 path = urllib.parse.quote(path, HTTP_PATH_SAFE)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000689 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
690 return path
691
692def reach(h):
693 """Return reach of host h, as defined by RFC 2965, section 1.
694
695 The reach R of a host name H is defined as follows:
696
697 * If
698
699 - H is the host domain name of a host; and,
700
701 - H has the form A.B; and
702
703 - A has no embedded (that is, interior) dots; and
704
705 - B has at least one embedded dot, or B is the string "local".
706 then the reach of H is .B.
707
708 * Otherwise, the reach of H is H.
709
710 >>> reach("www.acme.com")
711 '.acme.com'
712 >>> reach("acme.com")
713 'acme.com'
714 >>> reach("acme.local")
715 '.local'
716
717 """
718 i = h.find(".")
719 if i >= 0:
720 #a = h[:i] # this line is only here to show what a is
721 b = h[i+1:]
722 i = b.find(".")
723 if is_HDN(h) and (i >= 0 or b == "local"):
724 return "."+b
725 return h
726
727def is_third_party(request):
728 """
729
730 RFC 2965, section 3.3.6:
731
732 An unverifiable transaction is to a third-party host if its request-
733 host U does not domain-match the reach R of the request-host O in the
734 origin transaction.
735
736 """
737 req_host = request_host(request)
Senthil Kumarand9fbf362013-04-09 07:07:59 -0700738 if not domain_match(req_host, reach(request.origin_req_host)):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000739 return True
740 else:
741 return False
742
743
744class Cookie:
745 """HTTP Cookie.
746
747 This class represents both Netscape and RFC 2965 cookies.
748
749 This is deliberately a very simple class. It just holds attributes. It's
750 possible to construct Cookie instances that don't comply with the cookie
751 standards. CookieJar.make_cookies is the factory function for Cookie
752 objects -- it deals with cookie parsing, supplying defaults, and
753 normalising to the representation used in this class. CookiePolicy is
754 responsible for checking them to see whether they should be accepted from
755 and returned to the server.
756
757 Note that the port may be present in the headers, but unspecified ("Port"
758 rather than"Port=80", for example); if this is the case, port is None.
759
760 """
761
762 def __init__(self, version, name, value,
763 port, port_specified,
764 domain, domain_specified, domain_initial_dot,
765 path, path_specified,
766 secure,
767 expires,
768 discard,
769 comment,
770 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000771 rest,
772 rfc2109=False,
773 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000774
775 if version is not None: version = int(version)
Robert Collinsa0e5d982015-08-04 10:06:29 +1200776 if expires is not None: expires = int(float(expires))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000777 if port is None and port_specified is True:
778 raise ValueError("if port is None, port_specified must be false")
779
780 self.version = version
781 self.name = name
782 self.value = value
783 self.port = port
784 self.port_specified = port_specified
785 # normalise case, as per RFC 2965 section 3.3.3
786 self.domain = domain.lower()
787 self.domain_specified = domain_specified
788 # Sigh. We need to know whether the domain given in the
789 # cookie-attribute had an initial dot, in order to follow RFC 2965
790 # (as clarified in draft errata). Needed for the returned $Domain
791 # value.
792 self.domain_initial_dot = domain_initial_dot
793 self.path = path
794 self.path_specified = path_specified
795 self.secure = secure
796 self.expires = expires
797 self.discard = discard
798 self.comment = comment
799 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000800 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000801
802 self._rest = copy.copy(rest)
803
804 def has_nonstandard_attr(self, name):
805 return name in self._rest
806 def get_nonstandard_attr(self, name, default=None):
807 return self._rest.get(name, default)
808 def set_nonstandard_attr(self, name, value):
809 self._rest[name] = value
810
811 def is_expired(self, now=None):
812 if now is None: now = time.time()
813 if (self.expires is not None) and (self.expires <= now):
814 return True
815 return False
816
817 def __str__(self):
818 if self.port is None: p = ""
819 else: p = ":"+self.port
820 limit = self.domain + p + self.path
821 if self.value is not None:
822 namevalue = "%s=%s" % (self.name, self.value)
823 else:
824 namevalue = self.name
825 return "<Cookie %s for %s>" % (namevalue, limit)
826
827 def __repr__(self):
828 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000829 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000830 "port", "port_specified",
831 "domain", "domain_specified", "domain_initial_dot",
832 "path", "path_specified",
833 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000834 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000835 attr = getattr(self, name)
836 args.append("%s=%s" % (name, repr(attr)))
837 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000838 args.append("rfc2109=%s" % repr(self.rfc2109))
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300839 return "%s(%s)" % (self.__class__.__name__, ", ".join(args))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000840
841
842class CookiePolicy:
843 """Defines which cookies get accepted from and returned to server.
844
845 May also modify cookies, though this is probably a bad idea.
846
847 The subclass DefaultCookiePolicy defines the standard rules for Netscape
Raymond Hettinger7ea386e2016-08-25 21:11:50 -0700848 and RFC 2965 cookies -- override that if you want a customized policy.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000849
850 """
851 def set_ok(self, cookie, request):
852 """Return true if (and only if) cookie should be accepted from server.
853
854 Currently, pre-expired cookies never get this far -- the CookieJar
855 class deletes such cookies itself.
856
857 """
858 raise NotImplementedError()
859
860 def return_ok(self, cookie, request):
861 """Return true if (and only if) cookie should be returned to server."""
862 raise NotImplementedError()
863
864 def domain_return_ok(self, domain, request):
865 """Return false if cookies should not be returned, given cookie domain.
866 """
867 return True
868
869 def path_return_ok(self, path, request):
870 """Return false if cookies should not be returned, given cookie path.
871 """
872 return True
873
874
875class DefaultCookiePolicy(CookiePolicy):
876 """Implements the standard rules for accepting and returning cookies."""
877
878 DomainStrictNoDots = 1
879 DomainStrictNonDomain = 2
880 DomainRFC2965Match = 4
881
882 DomainLiberal = 0
883 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
884
885 def __init__(self,
886 blocked_domains=None, allowed_domains=None,
887 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000888 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000889 hide_cookie2=False,
890 strict_domain=False,
891 strict_rfc2965_unverifiable=True,
892 strict_ns_unverifiable=False,
893 strict_ns_domain=DomainLiberal,
894 strict_ns_set_initial_dollar=False,
895 strict_ns_set_path=False,
Paul Bailey4c339972018-10-08 13:49:29 -0500896 secure_protocols=("https", "wss")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000897 ):
898 """Constructor arguments should be passed as keyword arguments only."""
899 self.netscape = netscape
900 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000901 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000902 self.hide_cookie2 = hide_cookie2
903 self.strict_domain = strict_domain
904 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
905 self.strict_ns_unverifiable = strict_ns_unverifiable
906 self.strict_ns_domain = strict_ns_domain
907 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
908 self.strict_ns_set_path = strict_ns_set_path
Paul Bailey4c339972018-10-08 13:49:29 -0500909 self.secure_protocols = secure_protocols
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000910
911 if blocked_domains is not None:
912 self._blocked_domains = tuple(blocked_domains)
913 else:
914 self._blocked_domains = ()
915
916 if allowed_domains is not None:
917 allowed_domains = tuple(allowed_domains)
918 self._allowed_domains = allowed_domains
919
920 def blocked_domains(self):
921 """Return the sequence of blocked domains (as a tuple)."""
922 return self._blocked_domains
923 def set_blocked_domains(self, blocked_domains):
924 """Set the sequence of blocked domains."""
925 self._blocked_domains = tuple(blocked_domains)
926
927 def is_blocked(self, domain):
928 for blocked_domain in self._blocked_domains:
929 if user_domain_match(domain, blocked_domain):
930 return True
931 return False
932
933 def allowed_domains(self):
934 """Return None, or the sequence of allowed domains (as a tuple)."""
935 return self._allowed_domains
936 def set_allowed_domains(self, allowed_domains):
937 """Set the sequence of allowed domains, or None."""
938 if allowed_domains is not None:
939 allowed_domains = tuple(allowed_domains)
940 self._allowed_domains = allowed_domains
941
942 def is_not_allowed(self, domain):
943 if self._allowed_domains is None:
944 return False
945 for allowed_domain in self._allowed_domains:
946 if user_domain_match(domain, allowed_domain):
947 return False
948 return True
949
950 def set_ok(self, cookie, request):
951 """
952 If you override .set_ok(), be sure to call this method. If it returns
953 false, so should your subclass (assuming your subclass wants to be more
954 strict about which cookies to accept).
955
956 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000957 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000958
959 assert cookie.name is not None
960
961 for n in "version", "verifiability", "name", "path", "domain", "port":
962 fn_name = "set_ok_"+n
963 fn = getattr(self, fn_name)
964 if not fn(cookie, request):
965 return False
966
967 return True
968
969 def set_ok_version(self, cookie, request):
970 if cookie.version is None:
971 # Version is always set to 0 by parse_ns_headers if it's a Netscape
972 # cookie, so this must be an invalid RFC 2965 cookie.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000973 _debug(" Set-Cookie2 without version attribute (%s=%s)",
974 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000975 return False
976 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000977 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000978 return False
979 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000980 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000981 return False
982 return True
983
984 def set_ok_verifiability(self, cookie, request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -0500985 if request.unverifiable and is_third_party(request):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000986 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000987 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000988 "unverifiable transaction")
989 return False
990 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000991 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000992 "unverifiable transaction")
993 return False
994 return True
995
996 def set_ok_name(self, cookie, request):
997 # Try and stop servers setting V0 cookies designed to hack other
998 # servers that know both V0 and V1 protocols.
999 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
1000 cookie.name.startswith("$")):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001001 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001002 return False
1003 return True
1004
1005 def set_ok_path(self, cookie, request):
1006 if cookie.path_specified:
1007 req_path = request_path(request)
1008 if ((cookie.version > 0 or
1009 (cookie.version == 0 and self.strict_ns_set_path)) and
Xtreak0e1f1f02019-03-10 22:42:28 +05301010 not self.path_return_ok(cookie.path, request)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001011 _debug(" path attribute %s is not a prefix of request "
1012 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001013 return False
1014 return True
1015
1016 def set_ok_domain(self, cookie, request):
1017 if self.is_blocked(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001018 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001019 return False
1020 if self.is_not_allowed(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001021 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001022 return False
1023 if cookie.domain_specified:
1024 req_host, erhn = eff_request_host(request)
1025 domain = cookie.domain
1026 if self.strict_domain and (domain.count(".") >= 2):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027 # XXX This should probably be compared with the Konqueror
1028 # (kcookiejar.cpp) and Mozilla implementations, but it's a
1029 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001030 i = domain.rfind(".")
1031 j = domain.rfind(".", 0, i)
1032 if j == 0: # domain like .foo.bar
1033 tld = domain[i+1:]
1034 sld = domain[j+1:i]
Thomas Wouters477c8d52006-05-27 19:21:47 +00001035 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1036 "gov", "mil", "int", "aero", "biz", "cat", "coop",
1037 "info", "jobs", "mobi", "museum", "name", "pro",
1038 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001039 # domain like .co.uk
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001041 return False
1042 if domain.startswith("."):
1043 undotted_domain = domain[1:]
1044 else:
1045 undotted_domain = domain
1046 embedded_dots = (undotted_domain.find(".") >= 0)
1047 if not embedded_dots and domain != ".local":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048 _debug(" non-local domain %s contains no embedded dot",
1049 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001050 return False
1051 if cookie.version == 0:
1052 if (not erhn.endswith(domain) and
1053 (not erhn.startswith(".") and
1054 not ("."+erhn).endswith(domain))):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055 _debug(" effective request-host %s (even with added "
Ezio Melottie130a522011-10-19 10:58:56 +03001056 "initial dot) does not end with %s",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001058 return False
1059 if (cookie.version > 0 or
1060 (self.strict_ns_domain & self.DomainRFC2965Match)):
1061 if not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062 _debug(" effective request-host %s does not domain-match "
1063 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001064 return False
1065 if (cookie.version > 0 or
1066 (self.strict_ns_domain & self.DomainStrictNoDots)):
1067 host_prefix = req_host[:-len(domain)]
1068 if (host_prefix.find(".") >= 0 and
1069 not IPV4_RE.search(req_host)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001070 _debug(" host prefix %s for domain %s contains a dot",
1071 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001072 return False
1073 return True
1074
1075 def set_ok_port(self, cookie, request):
1076 if cookie.port_specified:
1077 req_port = request_port(request)
1078 if req_port is None:
1079 req_port = "80"
1080 else:
1081 req_port = str(req_port)
1082 for p in cookie.port.split(","):
1083 try:
1084 int(p)
1085 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001086 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001087 return False
1088 if p == req_port:
1089 break
1090 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001091 _debug(" request port (%s) not found in %s",
1092 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001093 return False
1094 return True
1095
1096 def return_ok(self, cookie, request):
1097 """
1098 If you override .return_ok(), be sure to call this method. If it
1099 returns false, so should your subclass (assuming your subclass wants to
1100 be more strict about which cookies to return).
1101
1102 """
1103 # Path has already been checked by .path_return_ok(), and domain
1104 # blocking done by .domain_return_ok().
Thomas Wouters477c8d52006-05-27 19:21:47 +00001105 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001106
1107 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1108 fn_name = "return_ok_"+n
1109 fn = getattr(self, fn_name)
1110 if not fn(cookie, request):
1111 return False
1112 return True
1113
1114 def return_ok_version(self, cookie, request):
1115 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001117 return False
1118 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001119 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001120 return False
1121 return True
1122
1123 def return_ok_verifiability(self, cookie, request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -05001124 if request.unverifiable and is_third_party(request):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001125 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126 _debug(" third-party RFC 2965 cookie during unverifiable "
1127 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001128 return False
1129 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001130 _debug(" third-party Netscape cookie during unverifiable "
1131 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001132 return False
1133 return True
1134
1135 def return_ok_secure(self, cookie, request):
Paul Bailey4c339972018-10-08 13:49:29 -05001136 if cookie.secure and request.type not in self.secure_protocols:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001137 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001138 return False
1139 return True
1140
1141 def return_ok_expires(self, cookie, request):
1142 if cookie.is_expired(self._now):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001143 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001144 return False
1145 return True
1146
1147 def return_ok_port(self, cookie, request):
1148 if cookie.port:
1149 req_port = request_port(request)
1150 if req_port is None:
1151 req_port = "80"
1152 for p in cookie.port.split(","):
1153 if p == req_port:
1154 break
1155 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001156 _debug(" request port %s does not match cookie port %s",
1157 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001158 return False
1159 return True
1160
1161 def return_ok_domain(self, cookie, request):
1162 req_host, erhn = eff_request_host(request)
1163 domain = cookie.domain
1164
Xtreakca7fe502019-03-10 07:39:48 +05301165 if domain and not domain.startswith("."):
1166 dotdomain = "." + domain
1167 else:
1168 dotdomain = domain
1169
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001170 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1171 if (cookie.version == 0 and
1172 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1173 not cookie.domain_specified and domain != erhn):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001174 _debug(" cookie with unspecified domain does not string-compare "
1175 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001176 return False
1177
1178 if cookie.version > 0 and not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001179 _debug(" effective request-host name %s does not domain-match "
1180 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001181 return False
Xtreakca7fe502019-03-10 07:39:48 +05301182 if cookie.version == 0 and not ("."+erhn).endswith(dotdomain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001183 _debug(" request-host %s does not match Netscape cookie domain "
1184 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001185 return False
1186 return True
1187
1188 def domain_return_ok(self, domain, request):
1189 # Liberal check of. This is here as an optimization to avoid
1190 # having to load lots of MSIE cookie files unless necessary.
1191 req_host, erhn = eff_request_host(request)
1192 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001193 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001194 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001195 erhn = "."+erhn
Xtreakca7fe502019-03-10 07:39:48 +05301196 if domain and not domain.startswith("."):
1197 dotdomain = "." + domain
1198 else:
1199 dotdomain = domain
1200 if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001201 #_debug(" request domain %s does not match cookie domain %s",
1202 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001203 return False
1204
1205 if self.is_blocked(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001206 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001207 return False
1208 if self.is_not_allowed(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001209 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001210 return False
1211
1212 return True
1213
1214 def path_return_ok(self, path, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001215 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001216 req_path = request_path(request)
Xtreak0e1f1f02019-03-10 22:42:28 +05301217 pathlen = len(path)
1218 if req_path == path:
1219 return True
1220 elif (req_path.startswith(path) and
1221 (path.endswith("/") or req_path[pathlen:pathlen+1] == "/")):
1222 return True
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001223
Xtreak0e1f1f02019-03-10 22:42:28 +05301224 _debug(" %s does not path-match %s", req_path, path)
1225 return False
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001226
1227def vals_sorted_by_key(adict):
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001228 keys = sorted(adict.keys())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001229 return map(adict.get, keys)
1230
1231def deepvalues(mapping):
1232 """Iterates over nested mapping, depth-first, in sorted order by key."""
1233 values = vals_sorted_by_key(mapping)
1234 for obj in values:
1235 mapping = False
1236 try:
1237 obj.items
1238 except AttributeError:
1239 pass
1240 else:
1241 mapping = True
Philip Jenveyfd0d3e52012-10-01 15:34:31 -07001242 yield from deepvalues(obj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001243 if not mapping:
1244 yield obj
1245
1246
1247# Used as second parameter to dict.get() method, to distinguish absent
1248# dict key from one with a None value.
1249class Absent: pass
1250
1251class CookieJar:
1252 """Collection of HTTP cookies.
1253
1254 You may not need to know about this class: try
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001255 urllib.request.build_opener(HTTPCookieProcessor).open(url).
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001256 """
1257
1258 non_word_re = re.compile(r"\W")
1259 quote_re = re.compile(r"([\"\\])")
1260 strict_domain_re = re.compile(r"\.?[^.]*")
1261 domain_re = re.compile(r"[^.]*")
1262 dots_re = re.compile(r"^\.+")
1263
Antoine Pitroufd036452008-08-19 17:56:33 +00001264 magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001265
1266 def __init__(self, policy=None):
1267 if policy is None:
1268 policy = DefaultCookiePolicy()
1269 self._policy = policy
1270
1271 self._cookies_lock = _threading.RLock()
1272 self._cookies = {}
1273
1274 def set_policy(self, policy):
1275 self._policy = policy
1276
1277 def _cookies_for_domain(self, domain, request):
1278 cookies = []
1279 if not self._policy.domain_return_ok(domain, request):
1280 return []
Thomas Wouters477c8d52006-05-27 19:21:47 +00001281 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001282 cookies_by_path = self._cookies[domain]
1283 for path in cookies_by_path.keys():
1284 if not self._policy.path_return_ok(path, request):
1285 continue
1286 cookies_by_name = cookies_by_path[path]
1287 for cookie in cookies_by_name.values():
1288 if not self._policy.return_ok(cookie, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001289 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001290 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001292 cookies.append(cookie)
1293 return cookies
1294
1295 def _cookies_for_request(self, request):
1296 """Return a list of cookies to be returned to server."""
1297 cookies = []
1298 for domain in self._cookies.keys():
1299 cookies.extend(self._cookies_for_domain(domain, request))
1300 return cookies
1301
1302 def _cookie_attrs(self, cookies):
1303 """Return a list of cookie-attributes to be returned to server.
1304
1305 like ['foo="bar"; $Path="/"', ...]
1306
1307 The $Version attribute is also added when appropriate (currently only
1308 once per request).
1309
1310 """
1311 # add cookies in order of most specific (ie. longest) path first
Raymond Hettinger70b64fc2008-01-30 20:15:17 +00001312 cookies.sort(key=lambda a: len(a.path), reverse=True)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001313
1314 version_set = False
1315
1316 attrs = []
1317 for cookie in cookies:
1318 # set version of Cookie header
1319 # XXX
1320 # What should it be if multiple matching Set-Cookie headers have
1321 # different versions themselves?
1322 # Answer: there is no answer; was supposed to be settled by
1323 # RFC 2965 errata, but that may never appear...
1324 version = cookie.version
1325 if not version_set:
1326 version_set = True
1327 if version > 0:
1328 attrs.append("$Version=%s" % version)
1329
1330 # quote cookie value if necessary
1331 # (not for Netscape protocol, which already has any quotes
1332 # intact, due to the poorly-specified Netscape Cookie: syntax)
1333 if ((cookie.value is not None) and
1334 self.non_word_re.search(cookie.value) and version > 0):
1335 value = self.quote_re.sub(r"\\\1", cookie.value)
1336 else:
1337 value = cookie.value
1338
1339 # add cookie-attributes to be returned in Cookie header
1340 if cookie.value is None:
1341 attrs.append(cookie.name)
1342 else:
1343 attrs.append("%s=%s" % (cookie.name, value))
1344 if version > 0:
1345 if cookie.path_specified:
1346 attrs.append('$Path="%s"' % cookie.path)
1347 if cookie.domain.startswith("."):
1348 domain = cookie.domain
1349 if (not cookie.domain_initial_dot and
1350 domain.startswith(".")):
1351 domain = domain[1:]
1352 attrs.append('$Domain="%s"' % domain)
1353 if cookie.port is not None:
1354 p = "$Port"
1355 if cookie.port_specified:
1356 p = p + ('="%s"' % cookie.port)
1357 attrs.append(p)
1358
1359 return attrs
1360
1361 def add_cookie_header(self, request):
Georg Brandl029986a2008-06-23 11:44:14 +00001362 """Add correct Cookie: header to request (urllib.request.Request object).
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001363
1364 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1365
1366 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001367 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001368 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001369 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001370
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001371 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001372
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001373 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001374
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001375 attrs = self._cookie_attrs(cookies)
1376 if attrs:
1377 if not request.has_header("Cookie"):
1378 request.add_unredirected_header(
1379 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001380
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001381 # if necessary, advertise that we know RFC 2965
1382 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1383 not request.has_header("Cookie2")):
1384 for cookie in cookies:
1385 if cookie.version != 1:
1386 request.add_unredirected_header("Cookie2", '$Version="1"')
1387 break
1388
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001389 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001390 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001391
1392 self.clear_expired_cookies()
1393
1394 def _normalized_cookie_tuples(self, attrs_set):
1395 """Return list of tuples containing normalised cookie information.
1396
1397 attrs_set is the list of lists of key,value pairs extracted from
1398 the Set-Cookie or Set-Cookie2 headers.
1399
1400 Tuples are name, value, standard, rest, where name and value are the
1401 cookie name and value, standard is a dictionary containing the standard
1402 cookie-attributes (discard, secure, version, expires or max-age,
1403 domain, path and port) and rest is a dictionary containing the rest of
1404 the cookie-attributes.
1405
1406 """
1407 cookie_tuples = []
1408
1409 boolean_attrs = "discard", "secure"
1410 value_attrs = ("version",
1411 "expires", "max-age",
1412 "domain", "path", "port",
1413 "comment", "commenturl")
1414
1415 for cookie_attrs in attrs_set:
1416 name, value = cookie_attrs[0]
1417
1418 # Build dictionary of standard cookie-attributes (standard) and
1419 # dictionary of other cookie-attributes (rest).
1420
1421 # Note: expiry time is normalised to seconds since epoch. V0
1422 # cookies should have the Expires cookie-attribute, and V1 cookies
1423 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1424 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1425 # accept either (but prefer Max-Age).
1426 max_age_set = False
1427
1428 bad_cookie = False
1429
1430 standard = {}
1431 rest = {}
1432 for k, v in cookie_attrs[1:]:
1433 lc = k.lower()
1434 # don't lose case distinction for unknown fields
1435 if lc in value_attrs or lc in boolean_attrs:
1436 k = lc
1437 if k in boolean_attrs and v is None:
1438 # boolean cookie-attribute is present, but has no value
1439 # (like "discard", rather than "port=80")
1440 v = True
1441 if k in standard:
1442 # only first value is significant
1443 continue
1444 if k == "domain":
1445 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001446 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001447 bad_cookie = True
1448 break
1449 # RFC 2965 section 3.3.3
1450 v = v.lower()
1451 if k == "expires":
1452 if max_age_set:
1453 # Prefer max-age to expires (like Mozilla)
1454 continue
1455 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001456 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001457 "attribute: treating as session cookie")
1458 continue
1459 if k == "max-age":
1460 max_age_set = True
1461 try:
1462 v = int(v)
1463 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001464 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001465 "max-age attribute")
1466 bad_cookie = True
1467 break
1468 # convert RFC 2965 Max-Age to seconds since epoch
1469 # XXX Strictly you're supposed to follow RFC 2616
Serhiy Storchakad65c9492015-11-02 14:10:23 +02001470 # age-calculation rules. Remember that zero Max-Age
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001471 # is a request to discard (old and new) cookie, though.
1472 k = "expires"
1473 v = self._now + v
1474 if (k in value_attrs) or (k in boolean_attrs):
1475 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001476 k not in ("port", "comment", "commenturl")):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001477 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001478 bad_cookie = True
1479 break
1480 standard[k] = v
1481 else:
1482 rest[k] = v
1483
1484 if bad_cookie:
1485 continue
1486
1487 cookie_tuples.append((name, value, standard, rest))
1488
1489 return cookie_tuples
1490
1491 def _cookie_from_cookie_tuple(self, tup, request):
1492 # standard is dict of standard cookie-attributes, rest is dict of the
1493 # rest of them
1494 name, value, standard, rest = tup
1495
1496 domain = standard.get("domain", Absent)
1497 path = standard.get("path", Absent)
1498 port = standard.get("port", Absent)
1499 expires = standard.get("expires", Absent)
1500
1501 # set the easy defaults
1502 version = standard.get("version", None)
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +00001503 if version is not None:
1504 try:
1505 version = int(version)
1506 except ValueError:
1507 return None # invalid version, ignore cookie
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001508 secure = standard.get("secure", False)
1509 # (discard is also set if expires is Absent)
1510 discard = standard.get("discard", False)
1511 comment = standard.get("comment", None)
1512 comment_url = standard.get("commenturl", None)
1513
1514 # set default path
1515 if path is not Absent and path != "":
1516 path_specified = True
1517 path = escape_path(path)
1518 else:
1519 path_specified = False
1520 path = request_path(request)
1521 i = path.rfind("/")
1522 if i != -1:
1523 if version == 0:
1524 # Netscape spec parts company from reality here
1525 path = path[:i]
1526 else:
1527 path = path[:i+1]
1528 if len(path) == 0: path = "/"
1529
1530 # set default domain
1531 domain_specified = domain is not Absent
1532 # but first we have to remember whether it starts with a dot
1533 domain_initial_dot = False
1534 if domain_specified:
1535 domain_initial_dot = bool(domain.startswith("."))
1536 if domain is Absent:
1537 req_host, erhn = eff_request_host(request)
1538 domain = erhn
1539 elif not domain.startswith("."):
1540 domain = "."+domain
1541
1542 # set default port
1543 port_specified = False
1544 if port is not Absent:
1545 if port is None:
1546 # Port attr present, but has no value: default to request port.
1547 # Cookie should then only be sent back on that port.
1548 port = request_port(request)
1549 else:
1550 port_specified = True
1551 port = re.sub(r"\s+", "", port)
1552 else:
1553 # No port attr present. Cookie can be sent back on any port.
1554 port = None
1555
1556 # set default expires and discard
1557 if expires is Absent:
1558 expires = None
1559 discard = True
1560 elif expires <= self._now:
1561 # Expiry date in past is request to delete cookie. This can't be
1562 # in DefaultCookiePolicy, because can't delete cookies there.
1563 try:
1564 self.clear(domain, path, name)
1565 except KeyError:
1566 pass
Thomas Wouters477c8d52006-05-27 19:21:47 +00001567 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1568 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001569 return None
1570
1571 return Cookie(version,
1572 name, value,
1573 port, port_specified,
1574 domain, domain_specified, domain_initial_dot,
1575 path, path_specified,
1576 secure,
1577 expires,
1578 discard,
1579 comment,
1580 comment_url,
1581 rest)
1582
1583 def _cookies_from_attrs_set(self, attrs_set, request):
1584 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1585
1586 cookies = []
1587 for tup in cookie_tuples:
1588 cookie = self._cookie_from_cookie_tuple(tup, request)
1589 if cookie: cookies.append(cookie)
1590 return cookies
1591
Neal Norwitz71dad722005-12-23 21:43:48 +00001592 def _process_rfc2109_cookies(self, cookies):
1593 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1594 if rfc2109_as_ns is None:
1595 rfc2109_as_ns = not self._policy.rfc2965
1596 for cookie in cookies:
1597 if cookie.version == 1:
1598 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001599 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001600 # treat 2109 cookies as Netscape cookies rather than
1601 # as RFC2965 cookies
1602 cookie.version = 0
1603
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001604 def make_cookies(self, response, request):
1605 """Return sequence of Cookie objects extracted from response object."""
1606 # get cookie-attributes for RFC 2965 and Netscape protocols
1607 headers = response.info()
Barry Warsaw820c1202008-06-12 04:06:45 +00001608 rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
1609 ns_hdrs = headers.get_all("Set-Cookie", [])
Xtreakbb411472019-09-13 12:29:00 +01001610 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001611
1612 rfc2965 = self._policy.rfc2965
1613 netscape = self._policy.netscape
1614
1615 if ((not rfc2965_hdrs and not ns_hdrs) or
1616 (not ns_hdrs and not rfc2965) or
1617 (not rfc2965_hdrs and not netscape) or
1618 (not netscape and not rfc2965)):
1619 return [] # no relevant cookie headers: quick exit
1620
1621 try:
1622 cookies = self._cookies_from_attrs_set(
1623 split_header_words(rfc2965_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001624 except Exception:
1625 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001626 cookies = []
1627
1628 if ns_hdrs and netscape:
1629 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001630 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001631 ns_cookies = self._cookies_from_attrs_set(
1632 parse_ns_headers(ns_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001633 except Exception:
1634 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001635 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001636 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001637
1638 # Look for Netscape cookies (from Set-Cookie headers) that match
1639 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1640 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1641 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1642 # bundled in with the Netscape cookies for this purpose, which is
1643 # reasonable behaviour.
1644 if rfc2965:
1645 lookup = {}
1646 for cookie in cookies:
1647 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1648
1649 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1650 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1651 return key not in lookup
1652 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1653
1654 if ns_cookies:
1655 cookies.extend(ns_cookies)
1656
1657 return cookies
1658
1659 def set_cookie_if_ok(self, cookie, request):
1660 """Set a cookie if policy says it's OK to do so."""
1661 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001662 try:
1663 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001664
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001665 if self._policy.set_ok(cookie, request):
1666 self.set_cookie(cookie)
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001667
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001668
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001669 finally:
1670 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001671
1672 def set_cookie(self, cookie):
1673 """Set a cookie, without checking whether or not it should be set."""
1674 c = self._cookies
1675 self._cookies_lock.acquire()
1676 try:
1677 if cookie.domain not in c: c[cookie.domain] = {}
1678 c2 = c[cookie.domain]
1679 if cookie.path not in c2: c2[cookie.path] = {}
1680 c3 = c2[cookie.path]
1681 c3[cookie.name] = cookie
1682 finally:
1683 self._cookies_lock.release()
1684
1685 def extract_cookies(self, response, request):
1686 """Extract cookies from response, where allowable given the request."""
Thomas Wouters477c8d52006-05-27 19:21:47 +00001687 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001688 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001689 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001690 for cookie in self.make_cookies(response, request):
1691 if self._policy.set_ok(cookie, request):
1692 _debug(" setting cookie: %s", cookie)
1693 self.set_cookie(cookie)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001694 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001695 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001696
1697 def clear(self, domain=None, path=None, name=None):
1698 """Clear some cookies.
1699
1700 Invoking this method without arguments will clear all cookies. If
1701 given a single argument, only cookies belonging to that domain will be
1702 removed. If given two arguments, cookies belonging to the specified
1703 path within that domain are removed. If given three arguments, then
1704 the cookie with the specified name, path and domain is removed.
1705
1706 Raises KeyError if no matching cookie exists.
1707
1708 """
1709 if name is not None:
1710 if (domain is None) or (path is None):
1711 raise ValueError(
1712 "domain and path must be given to remove a cookie by name")
1713 del self._cookies[domain][path][name]
1714 elif path is not None:
1715 if domain is None:
1716 raise ValueError(
1717 "domain must be given to remove cookies by path")
1718 del self._cookies[domain][path]
1719 elif domain is not None:
1720 del self._cookies[domain]
1721 else:
1722 self._cookies = {}
1723
1724 def clear_session_cookies(self):
1725 """Discard all session cookies.
1726
1727 Note that the .save() method won't save session cookies anyway, unless
1728 you ask otherwise by passing a true ignore_discard argument.
1729
1730 """
1731 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001732 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001733 for cookie in self:
1734 if cookie.discard:
1735 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001736 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001737 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001738
1739 def clear_expired_cookies(self):
1740 """Discard all expired cookies.
1741
1742 You probably don't need to call this method: expired cookies are never
1743 sent back to the server (provided you're using DefaultCookiePolicy),
1744 this method is called by CookieJar itself every so often, and the
1745 .save() method won't save expired cookies anyway (unless you ask
1746 otherwise by passing a true ignore_expires argument).
1747
1748 """
1749 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001750 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001751 now = time.time()
1752 for cookie in self:
1753 if cookie.is_expired(now):
1754 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001755 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001756 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001757
1758 def __iter__(self):
1759 return deepvalues(self._cookies)
1760
1761 def __len__(self):
1762 """Return number of contained cookies."""
1763 i = 0
1764 for cookie in self: i = i + 1
1765 return i
1766
1767 def __repr__(self):
1768 r = []
1769 for cookie in self: r.append(repr(cookie))
Serhiy Storchaka1392df92014-07-22 11:09:36 +03001770 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001771
1772 def __str__(self):
1773 r = []
1774 for cookie in self: r.append(str(cookie))
Serhiy Storchaka1392df92014-07-22 11:09:36 +03001775 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001776
1777
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001778# derives from OSError for backwards-compatibility with Python 2.4.0
1779class LoadError(OSError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001780
1781class FileCookieJar(CookieJar):
1782 """CookieJar that can be loaded from and saved to a file."""
1783
1784 def __init__(self, filename=None, delayload=False, policy=None):
1785 """
1786 Cookies are NOT loaded from the named file until either the .load() or
1787 .revert() method is called.
1788
1789 """
1790 CookieJar.__init__(self, policy)
1791 if filename is not None:
Stéphane Wirtel4b219ce2019-03-01 21:40:54 +01001792 filename = os.fspath(filename)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001793 self.filename = filename
1794 self.delayload = bool(delayload)
1795
1796 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1797 """Save cookies to a file."""
1798 raise NotImplementedError()
1799
1800 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1801 """Load cookies from a file."""
1802 if filename is None:
1803 if self.filename is not None: filename = self.filename
1804 else: raise ValueError(MISSING_FILENAME_TEXT)
1805
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01001806 with open(filename) as f:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001807 self._really_load(f, filename, ignore_discard, ignore_expires)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001808
1809 def revert(self, filename=None,
1810 ignore_discard=False, ignore_expires=False):
1811 """Clear all cookies and reload cookies from a saved file.
1812
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001813 Raises LoadError (or OSError) if reversion is not successful; the
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001814 object's state will not be altered if this happens.
1815
1816 """
1817 if filename is None:
1818 if self.filename is not None: filename = self.filename
1819 else: raise ValueError(MISSING_FILENAME_TEXT)
1820
1821 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001822 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001823
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001824 old_state = copy.deepcopy(self._cookies)
1825 self._cookies = {}
1826 try:
1827 self.load(filename, ignore_discard, ignore_expires)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001828 except OSError:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001829 self._cookies = old_state
1830 raise
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001831
1832 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001833 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001834
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001835
1836def lwp_cookie_str(cookie):
Benjamin Peterson82f34ad2015-01-13 09:17:24 -05001837 """Return string representation of Cookie in the LWP cookie file format.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001838
1839 Actually, the format is extended a bit -- see module docstring.
1840
1841 """
1842 h = [(cookie.name, cookie.value),
1843 ("path", cookie.path),
1844 ("domain", cookie.domain)]
1845 if cookie.port is not None: h.append(("port", cookie.port))
1846 if cookie.path_specified: h.append(("path_spec", None))
1847 if cookie.port_specified: h.append(("port_spec", None))
1848 if cookie.domain_initial_dot: h.append(("domain_dot", None))
1849 if cookie.secure: h.append(("secure", None))
1850 if cookie.expires: h.append(("expires",
1851 time2isoz(float(cookie.expires))))
1852 if cookie.discard: h.append(("discard", None))
1853 if cookie.comment: h.append(("comment", cookie.comment))
1854 if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
1855
1856 keys = sorted(cookie._rest.keys())
1857 for k in keys:
1858 h.append((k, str(cookie._rest[k])))
1859
1860 h.append(("version", str(cookie.version)))
1861
1862 return join_header_words([h])
1863
1864class LWPCookieJar(FileCookieJar):
1865 """
Ezio Melotti0847db72012-09-21 16:30:22 +03001866 The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
Martin Panterf05641642016-05-08 13:48:10 +00001867 "Set-Cookie3" is the format used by the libwww-perl library, not known
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001868 to be compatible with any browser, but which is easy to read and
1869 doesn't lose information about RFC 2965 cookies.
1870
1871 Additional methods
1872
1873 as_lwp_str(ignore_discard=True, ignore_expired=True)
1874
1875 """
1876
1877 def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
Ezio Melotti0847db72012-09-21 16:30:22 +03001878 """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001879
1880 ignore_discard and ignore_expires: see docstring for FileCookieJar.save
1881
1882 """
1883 now = time.time()
1884 r = []
1885 for cookie in self:
1886 if not ignore_discard and cookie.discard:
1887 continue
1888 if not ignore_expires and cookie.is_expired(now):
1889 continue
1890 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
1891 return "\n".join(r+[""])
1892
1893 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1894 if filename is None:
1895 if self.filename is not None: filename = self.filename
1896 else: raise ValueError(MISSING_FILENAME_TEXT)
1897
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01001898 with open(filename, "w") as f:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001899 # There really isn't an LWP Cookies 2.0 format, but this indicates
1900 # that there is extra information in here (domain_dot and
1901 # port_spec) while still being compatible with libwww-perl, I hope.
1902 f.write("#LWP-Cookies-2.0\n")
1903 f.write(self.as_lwp_str(ignore_discard, ignore_expires))
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001904
1905 def _really_load(self, f, filename, ignore_discard, ignore_expires):
1906 magic = f.readline()
Antoine Pitroufd036452008-08-19 17:56:33 +00001907 if not self.magic_re.search(magic):
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001908 msg = ("%r does not look like a Set-Cookie3 (LWP) format "
1909 "file" % filename)
1910 raise LoadError(msg)
1911
1912 now = time.time()
1913
1914 header = "Set-Cookie3:"
1915 boolean_attrs = ("port_spec", "path_spec", "domain_dot",
1916 "secure", "discard")
1917 value_attrs = ("version",
1918 "port", "path", "domain",
1919 "expires",
1920 "comment", "commenturl")
1921
1922 try:
1923 while 1:
1924 line = f.readline()
1925 if line == "": break
1926 if not line.startswith(header):
1927 continue
1928 line = line[len(header):].strip()
1929
1930 for data in split_header_words([line]):
1931 name, value = data[0]
1932 standard = {}
1933 rest = {}
1934 for k in boolean_attrs:
1935 standard[k] = False
1936 for k, v in data[1:]:
1937 if k is not None:
1938 lc = k.lower()
1939 else:
1940 lc = None
1941 # don't lose case distinction for unknown fields
1942 if (lc in value_attrs) or (lc in boolean_attrs):
1943 k = lc
1944 if k in boolean_attrs:
1945 if v is None: v = True
1946 standard[k] = v
1947 elif k in value_attrs:
1948 standard[k] = v
1949 else:
1950 rest[k] = v
1951
1952 h = standard.get
1953 expires = h("expires")
1954 discard = h("discard")
1955 if expires is not None:
1956 expires = iso2time(expires)
1957 if expires is None:
1958 discard = True
1959 domain = h("domain")
1960 domain_specified = domain.startswith(".")
1961 c = Cookie(h("version"), name, value,
1962 h("port"), h("port_spec"),
1963 domain, domain_specified, h("domain_dot"),
1964 h("path"), h("path_spec"),
1965 h("secure"),
1966 expires,
1967 discard,
1968 h("comment"),
1969 h("commenturl"),
1970 rest)
1971 if not ignore_discard and c.discard:
1972 continue
1973 if not ignore_expires and c.is_expired(now):
1974 continue
1975 self.set_cookie(c)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001976 except OSError:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001977 raise
1978 except Exception:
1979 _warn_unhandled_exception()
1980 raise LoadError("invalid Set-Cookie3 format file %r: %r" %
1981 (filename, line))
1982
1983
1984class MozillaCookieJar(FileCookieJar):
1985 """
1986
1987 WARNING: you may want to backup your browser's cookies file if you use
1988 this class to save cookies. I *think* it works, but there have been
1989 bugs in the past!
1990
1991 This class differs from CookieJar only in the format it uses to save and
1992 load cookies to and from a file. This class uses the Mozilla/Netscape
1993 `cookies.txt' format. lynx uses this file format, too.
1994
1995 Don't expect cookies saved while the browser is running to be noticed by
1996 the browser (in fact, Mozilla on unix will overwrite your saved cookies if
1997 you change them on disk while it's running; on Windows, you probably can't
1998 save at all while the browser is running).
1999
2000 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
2001 Netscape cookies on saving.
2002
2003 In particular, the cookie version and port number information is lost,
2004 together with information about whether or not Path, Port and Discard were
2005 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
2006 domain as set in the HTTP header started with a dot (yes, I'm aware some
2007 domains in Netscape files start with a dot and some don't -- trust me, you
2008 really don't want to know any more about this).
2009
2010 Note that though Mozilla and Netscape use the same format, they use
2011 slightly different headers. The class saves cookies using the Netscape
2012 header by default (Mozilla can cope with that).
2013
2014 """
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002015
2016 def _really_load(self, f, filename, ignore_discard, ignore_expires):
2017 now = time.time()
2018
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002019 if not NETSCAPE_MAGIC_RGX.match(f.readline()):
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002020 raise LoadError(
2021 "%r does not look like a Netscape format cookies file" %
2022 filename)
2023
2024 try:
2025 while 1:
2026 line = f.readline()
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002027 rest = {}
2028
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002029 if line == "": break
2030
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002031 # httponly is a cookie flag as defined in rfc6265
2032 # when encoded in a netscape cookie file,
2033 # the line is prepended with "#HttpOnly_"
2034 if line.startswith(HTTPONLY_PREFIX):
2035 rest[HTTPONLY_ATTR] = ""
2036 line = line[len(HTTPONLY_PREFIX):]
2037
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002038 # last field may be absent, so keep any trailing tab
2039 if line.endswith("\n"): line = line[:-1]
2040
2041 # skip comments and blank lines XXX what is $ for?
2042 if (line.strip().startswith(("#", "$")) or
2043 line.strip() == ""):
2044 continue
2045
2046 domain, domain_specified, path, secure, expires, name, value = \
2047 line.split("\t")
2048 secure = (secure == "TRUE")
2049 domain_specified = (domain_specified == "TRUE")
2050 if name == "":
2051 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2052 # with no name, whereas http.cookiejar regards it as a
2053 # cookie with no value.
2054 name = value
2055 value = None
2056
2057 initial_dot = domain.startswith(".")
2058 assert domain_specified == initial_dot
2059
2060 discard = False
2061 if expires == "":
2062 expires = None
2063 discard = True
2064
2065 # assume path_specified is false
2066 c = Cookie(0, name, value,
2067 None, False,
2068 domain, domain_specified, initial_dot,
2069 path, False,
2070 secure,
2071 expires,
2072 discard,
2073 None,
2074 None,
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002075 rest)
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002076 if not ignore_discard and c.discard:
2077 continue
2078 if not ignore_expires and c.is_expired(now):
2079 continue
2080 self.set_cookie(c)
2081
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002082 except OSError:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002083 raise
2084 except Exception:
2085 _warn_unhandled_exception()
2086 raise LoadError("invalid Netscape format cookies file %r: %r" %
2087 (filename, line))
2088
2089 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2090 if filename is None:
2091 if self.filename is not None: filename = self.filename
2092 else: raise ValueError(MISSING_FILENAME_TEXT)
2093
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01002094 with open(filename, "w") as f:
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002095 f.write(NETSCAPE_HEADER_TEXT)
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002096 now = time.time()
2097 for cookie in self:
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002098 domain = cookie.domain
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002099 if not ignore_discard and cookie.discard:
2100 continue
2101 if not ignore_expires and cookie.is_expired(now):
2102 continue
2103 if cookie.secure: secure = "TRUE"
2104 else: secure = "FALSE"
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002105 if domain.startswith("."): initial_dot = "TRUE"
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002106 else: initial_dot = "FALSE"
2107 if cookie.expires is not None:
2108 expires = str(cookie.expires)
2109 else:
2110 expires = ""
2111 if cookie.value is None:
2112 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2113 # with no name, whereas http.cookiejar regards it as a
2114 # cookie with no value.
2115 name = ""
2116 value = cookie.name
2117 else:
2118 name = cookie.name
2119 value = cookie.value
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002120 if cookie.has_nonstandard_attr(HTTPONLY_ATTR):
2121 domain = HTTPONLY_PREFIX + domain
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002122 f.write(
Jacob Neil Taylor16ee68d2020-10-24 09:48:55 +11002123 "\t".join([domain, initial_dot, cookie.path,
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002124 secure, expires, name, value])+
2125 "\n")