blob: 47ed5c3d64ab7d1043d8ba382fb1a12b649118bc [file] [log] [blame]
Éric Araujo23760e92011-11-07 17:52:48 +01001r"""HTTP cookie handling for web clients.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00002
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
Thomas Wouters477c8d52006-05-27 19:21:47 +000010Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
11distributed with the Python standard library, but are available from
12http://wwwsearch.sf.net/):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000013
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
Thomas Wouters477c8d52006-05-27 19:21:47 +000028__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
30
Stéphane Wirtel4b219ce2019-03-01 21:40:54 +010031import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032import copy
Victor Stinner628225c2011-03-21 02:38:51 +010033import datetime
Jeremy Hylton1afc1692008-06-18 20:49:58 +000034import re
35import time
36import urllib.parse, urllib.request
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020037import threading as _threading
Georg Brandl24420152008-05-26 16:32:26 +000038import http.client # only for the default HTTP port
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000039from calendar import timegm
40
Thomas Wouters477c8d52006-05-27 19:21:47 +000041debug = False # set to True to enable debugging via the logging module
42logger = None
43
44def _debug(*args):
45 if not debug:
46 return
47 global logger
48 if not logger:
49 import logging
Georg Brandl24420152008-05-26 16:32:26 +000050 logger = logging.getLogger("http.cookiejar")
Thomas Wouters477c8d52006-05-27 19:21:47 +000051 return logger.debug(*args)
52
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000053
Georg Brandl24420152008-05-26 16:32:26 +000054DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000055MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
56 "instance initialised with one)")
57
Thomas Wouters477c8d52006-05-27 19:21:47 +000058def _warn_unhandled_exception():
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000059 # There are a few catch-all except: statements in this module, for
Thomas Wouters477c8d52006-05-27 19:21:47 +000060 # catching input that's bad in unexpected ways. Warn if any
61 # exceptions are caught there.
Jeremy Hylton7ecf3dc2008-05-10 20:38:40 +000062 import io, warnings, traceback
Guido van Rossum34d19282007-08-09 01:03:29 +000063 f = io.StringIO()
Andrew M. Kuchlingae40c2f2004-07-10 18:32:12 +000064 traceback.print_exc(None, f)
65 msg = f.getvalue()
Georg Brandl24420152008-05-26 16:32:26 +000066 warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +000067
68
69# Date/time conversion
70# -----------------------------------------------------------------------------
71
72EPOCH_YEAR = 1970
73def _timegm(tt):
74 year, month, mday, hour, min, sec = tt[:6]
75 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
76 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
77 return timegm(tt)
78 else:
79 return None
80
81DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
82MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
83 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
84MONTHS_LOWER = []
85for month in MONTHS: MONTHS_LOWER.append(month.lower())
86
87def time2isoz(t=None):
88 """Return a string representing time in seconds since epoch, t.
89
90 If the function is called without an argument, it will use the current
91 time.
92
93 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
94 representing Universal Time (UTC, aka GMT). An example of this format is:
95
96 1994-11-24 08:49:37Z
97
98 """
Victor Stinner628225c2011-03-21 02:38:51 +010099 if t is None:
100 dt = datetime.datetime.utcnow()
101 else:
102 dt = datetime.datetime.utcfromtimestamp(t)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000103 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
Victor Stinner628225c2011-03-21 02:38:51 +0100104 dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000105
106def time2netscape(t=None):
107 """Return a string representing time in seconds since epoch, t.
108
109 If the function is called without an argument, it will use the current
110 time.
111
112 The format of the returned string is like this:
113
114 Wed, DD-Mon-YYYY HH:MM:SS GMT
115
116 """
Victor Stinner628225c2011-03-21 02:38:51 +0100117 if t is None:
118 dt = datetime.datetime.utcnow()
119 else:
120 dt = datetime.datetime.utcfromtimestamp(t)
Senthil Kumarand5b47fb2016-07-10 06:45:38 -0700121 return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (
Victor Stinner628225c2011-03-21 02:38:51 +0100122 DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
123 dt.year, dt.hour, dt.minute, dt.second)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000124
125
126UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
127
Antoine Pitroufd036452008-08-19 17:56:33 +0000128TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000129def offset_from_tz_string(tz):
130 offset = None
131 if tz in UTC_ZONES:
132 offset = 0
133 else:
134 m = TIMEZONE_RE.search(tz)
135 if m:
136 offset = 3600 * int(m.group(2))
137 if m.group(3):
138 offset = offset + 60 * int(m.group(3))
139 if m.group(1) == '-':
140 offset = -offset
141 return offset
142
143def _str2time(day, mon, yr, hr, min, sec, tz):
Berker Peksag20be53e2016-03-14 05:48:02 +0200144 yr = int(yr)
145 if yr > datetime.MAXYEAR:
146 return None
147
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000148 # translate month name to number
149 # month numbers start with 1 (January)
150 try:
151 mon = MONTHS_LOWER.index(mon.lower())+1
152 except ValueError:
153 # maybe it's already a number
154 try:
155 imon = int(mon)
156 except ValueError:
157 return None
158 if 1 <= imon <= 12:
159 mon = imon
160 else:
161 return None
162
163 # make sure clock elements are defined
164 if hr is None: hr = 0
165 if min is None: min = 0
166 if sec is None: sec = 0
167
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000168 day = int(day)
169 hr = int(hr)
170 min = int(min)
171 sec = int(sec)
172
173 if yr < 1000:
174 # find "obvious" year
175 cur_yr = time.localtime(time.time())[0]
176 m = cur_yr % 100
177 tmp = yr
178 yr = yr + cur_yr - m
179 m = m - tmp
180 if abs(m) > 50:
181 if m > 0: yr = yr + 100
182 else: yr = yr - 100
183
184 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
185 t = _timegm((yr, mon, day, hr, min, sec, tz))
186
187 if t is not None:
188 # adjust time using timezone string, to get absolute time since epoch
189 if tz is None:
190 tz = "UTC"
191 tz = tz.upper()
192 offset = offset_from_tz_string(tz)
193 if offset is None:
194 return None
195 t = t - offset
196
197 return t
198
199STRICT_DATE_RE = re.compile(
200 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
R David Murray44b548d2016-09-08 13:59:53 -0400201 r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000202WEEKDAY_RE = re.compile(
Antoine Pitroufd036452008-08-19 17:56:33 +0000203 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000204LOOSE_HTTP_DATE_RE = re.compile(
205 r"""^
206 (\d\d?) # day
207 (?:\s+|[-\/])
208 (\w+) # month
209 (?:\s+|[-\/])
210 (\d+) # year
211 (?:
212 (?:\s+|:) # separator before clock
213 (\d\d?):(\d\d) # hour:min
214 (?::(\d\d))? # optional seconds
215 )? # optional clock
216 \s*
bcaller1b779bf2019-11-22 14:22:11 +0000217 (?:
218 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000219 \s*
bcaller1b779bf2019-11-22 14:22:11 +0000220 )?
221 (?:
222 \(\w+\) # ASCII representation of timezone in parens.
223 \s*
224 )?$""", re.X | re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000225def http2time(text):
226 """Returns time in seconds since epoch of time represented by a string.
227
228 Return value is an integer.
229
230 None is returned if the format of str is unrecognized, the time is outside
231 the representable range, or the timezone string is not recognized. If the
232 string contains no timezone, UTC is assumed.
233
234 The timezone in the string may be numerical (like "-0800" or "+0100") or a
235 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
236 timezone strings equivalent to UTC (zero offset) are known to the function.
237
238 The function loosely parses the following formats:
239
240 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
241 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
242 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
243 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
244 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
245 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
246
247 The parser ignores leading and trailing whitespace. The time may be
248 absent.
249
250 If the year is given with only 2 digits, the function will select the
251 century that makes the year closest to the current date.
252
253 """
254 # fast exit for strictly conforming string
255 m = STRICT_DATE_RE.search(text)
256 if m:
257 g = m.groups()
258 mon = MONTHS_LOWER.index(g[1].lower()) + 1
259 tt = (int(g[2]), mon, int(g[0]),
260 int(g[3]), int(g[4]), float(g[5]))
261 return _timegm(tt)
262
263 # No, we need some messy parsing...
264
265 # clean up
266 text = text.lstrip()
267 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
268
269 # tz is time zone specifier string
270 day, mon, yr, hr, min, sec, tz = [None]*7
271
272 # loose regexp parse
273 m = LOOSE_HTTP_DATE_RE.search(text)
274 if m is not None:
275 day, mon, yr, hr, min, sec, tz = m.groups()
276 else:
277 return None # bad format
278
279 return _str2time(day, mon, yr, hr, min, sec, tz)
280
281ISO_DATE_RE = re.compile(
R David Murray44b548d2016-09-08 13:59:53 -0400282 r"""^
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000283 (\d{4}) # year
284 [-\/]?
285 (\d\d?) # numerical month
286 [-\/]?
287 (\d\d?) # day
288 (?:
289 (?:\s+|[-:Tt]) # separator before clock
290 (\d\d?):?(\d\d) # hour:min
291 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
292 )? # optional clock
293 \s*
bcaller1b779bf2019-11-22 14:22:11 +0000294 (?:
295 ([-+]?\d\d?:?(:?\d\d)?
296 |Z|z) # timezone (Z is "zero meridian", i.e. GMT)
297 \s*
298 )?$""", re.X | re. ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000299def iso2time(text):
300 """
301 As for http2time, but parses the ISO 8601 formats:
302
303 1994-02-03 14:15:29 -0100 -- ISO 8601 format
304 1994-02-03 14:15:29 -- zone is optional
305 1994-02-03 -- only date
306 1994-02-03T14:15:29 -- Use T as separator
307 19940203T141529Z -- ISO 8601 compact format
308 19940203 -- only date
309
310 """
311 # clean up
312 text = text.lstrip()
313
314 # tz is time zone specifier string
315 day, mon, yr, hr, min, sec, tz = [None]*7
316
317 # loose regexp parse
318 m = ISO_DATE_RE.search(text)
319 if m is not None:
320 # XXX there's an extra bit of the timezone I'm ignoring here: is
321 # this the right thing to do?
322 yr, mon, day, hr, min, sec, tz, _ = m.groups()
323 else:
324 return None # bad format
325
326 return _str2time(day, mon, yr, hr, min, sec, tz)
327
328
329# Header parsing
330# -----------------------------------------------------------------------------
331
332def unmatched(match):
333 """Return unmatched part of re.Match object."""
334 start, end = match.span(0)
335 return match.string[:start]+match.string[end:]
336
337HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
338HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
339HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
340HEADER_ESCAPE_RE = re.compile(r"\\(.)")
341def split_header_words(header_values):
342 r"""Parse header values into a list of lists containing key,value pairs.
343
344 The function knows how to deal with ",", ";" and "=" as well as quoted
345 values after "=". A list of space separated tokens are parsed as if they
346 were separated by ";".
347
348 If the header_values passed as argument contains multiple values, then they
349 are treated as if they were a single value separated by comma ",".
350
351 This means that this function is useful for parsing header fields that
352 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
353 the requirement for tokens).
354
355 headers = #header
356 header = (token | parameter) *( [";"] (token | parameter))
357
358 token = 1*<any CHAR except CTLs or separators>
359 separators = "(" | ")" | "<" | ">" | "@"
360 | "," | ";" | ":" | "\" | <">
361 | "/" | "[" | "]" | "?" | "="
362 | "{" | "}" | SP | HT
363
364 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
365 qdtext = <any TEXT except <">>
366 quoted-pair = "\" CHAR
367
368 parameter = attribute "=" value
369 attribute = token
370 value = token | quoted-string
371
372 Each header is represented by a list of key/value pairs. The value for a
373 simple token (not part of a parameter) is None. Syntactically incorrect
374 headers will not necessarily be parsed as you would want.
375
376 This is easier to describe with some examples:
377
378 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
379 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
380 >>> split_header_words(['text/html; charset="iso-8859-1"'])
381 [[('text/html', None), ('charset', 'iso-8859-1')]]
382 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
383 [[('Basic', None), ('realm', '"foobar"')]]
384
385 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000386 assert not isinstance(header_values, str)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000387 result = []
388 for text in header_values:
389 orig_text = text
390 pairs = []
391 while text:
392 m = HEADER_TOKEN_RE.search(text)
393 if m:
394 text = unmatched(m)
395 name = m.group(1)
396 m = HEADER_QUOTED_VALUE_RE.search(text)
397 if m: # quoted value
398 text = unmatched(m)
399 value = m.group(1)
400 value = HEADER_ESCAPE_RE.sub(r"\1", value)
401 else:
402 m = HEADER_VALUE_RE.search(text)
403 if m: # unquoted value
404 text = unmatched(m)
405 value = m.group(1)
406 value = value.rstrip()
407 else:
408 # no value, a lone token
409 value = None
410 pairs.append((name, value))
411 elif text.lstrip().startswith(","):
412 # concatenated headers, as per RFC 2616 section 4.2
413 text = text.lstrip()[1:]
414 if pairs: result.append(pairs)
415 pairs = []
416 else:
417 # skip junk
R David Murray44b548d2016-09-08 13:59:53 -0400418 non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000419 assert nr_junk_chars > 0, (
420 "split_header_words bug: '%s', '%s', %s" %
421 (orig_text, text, pairs))
422 text = non_junk
423 if pairs: result.append(pairs)
424 return result
425
426HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
427def join_header_words(lists):
428 """Do the inverse (almost) of the conversion done by split_header_words.
429
430 Takes a list of lists of (key, value) pairs and produces a single header
431 value. Attribute values are quoted if needed.
432
Martin Panterac34e092015-11-14 00:58:32 +0000433 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
434 'text/plain; charset="iso-8859-1"'
435 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
436 'text/plain, charset="iso-8859-1"'
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000437
438 """
439 headers = []
440 for pairs in lists:
441 attr = []
442 for k, v in pairs:
443 if v is not None:
444 if not re.search(r"^\w+$", v):
445 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
446 v = '"%s"' % v
447 k = "%s=%s" % (k, v)
448 attr.append(k)
449 if attr: headers.append("; ".join(attr))
450 return ", ".join(headers)
451
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +0000452def strip_quotes(text):
453 if text.startswith('"'):
454 text = text[1:]
455 if text.endswith('"'):
456 text = text[:-1]
457 return text
458
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000459def parse_ns_headers(ns_headers):
460 """Ad-hoc parser for Netscape protocol cookie-attributes.
461
462 The old Netscape cookie format for Set-Cookie can for instance contain
463 an unquoted "," in the expires field, so we have to use this ad-hoc
464 parser instead of split_header_words.
465
466 XXX This may not make the best possible effort to parse all the crap
467 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
468 parser is probably better, so could do worse than following that if
469 this ever gives any trouble.
470
471 Currently, this is also used for parsing RFC 2109 cookies.
472
473 """
474 known_attrs = ("expires", "domain", "path", "secure",
475 # RFC 2109 attrs (may turn up in Netscape cookies, too)
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +0000476 "version", "port", "max-age")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000477
478 result = []
479 for ns_header in ns_headers:
480 pairs = []
481 version_set = False
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200482
483 # XXX: The following does not strictly adhere to RFCs in that empty
484 # names and values are legal (the former will only appear once and will
485 # be overwritten if multiple occurrences are present). This is
486 # mostly to deal with backwards compatibility.
487 for ii, param in enumerate(ns_header.split(';')):
488 param = param.strip()
489
490 key, sep, val = param.partition('=')
491 key = key.strip()
492
493 if not key:
494 if ii == 0:
495 break
496 else:
497 continue
498
499 # allow for a distinction between present and empty and missing
500 # altogether
501 val = val.strip() if sep else None
502
Martin v. Löwis4ea3ead2005-03-03 10:48:12 +0000503 if ii != 0:
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200504 lc = key.lower()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000505 if lc in known_attrs:
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200506 key = lc
507
508 if key == "version":
Neal Norwitz71dad722005-12-23 21:43:48 +0000509 # This is an RFC 2109 cookie.
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200510 if val is not None:
511 val = strip_quotes(val)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000512 version_set = True
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200513 elif key == "expires":
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000514 # convert expires date to seconds since epoch
Serhiy Storchaka577fc4e2015-03-13 09:05:01 +0200515 if val is not None:
516 val = http2time(strip_quotes(val)) # None if invalid
517 pairs.append((key, val))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000518
519 if pairs:
520 if not version_set:
521 pairs.append(("version", "0"))
522 result.append(pairs)
523
524 return result
525
526
Antoine Pitroufd036452008-08-19 17:56:33 +0000527IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000528def is_HDN(text):
529 """Return True if text is a host domain name."""
530 # XXX
531 # This may well be wrong. Which RFC is HDN defined in, if any (for
532 # the purposes of RFC 2965)?
533 # For the current implementation, what about IPv6? Remember to look
534 # at other uses of IPV4_RE also, if change this.
535 if IPV4_RE.search(text):
536 return False
537 if text == "":
538 return False
539 if text[0] == "." or text[-1] == ".":
540 return False
541 return True
542
543def domain_match(A, B):
544 """Return True if domain A domain-matches domain B, according to RFC 2965.
545
546 A and B may be host domain names or IP addresses.
547
548 RFC 2965, section 1:
549
550 Host names can be specified either as an IP address or a HDN string.
551 Sometimes we compare one host name with another. (Such comparisons SHALL
552 be case-insensitive.) Host A's name domain-matches host B's if
553
554 * their host name strings string-compare equal; or
555
556 * A is a HDN string and has the form NB, where N is a non-empty
557 name string, B has the form .B', and B' is a HDN string. (So,
558 x.y.com domain-matches .Y.com but not Y.com.)
559
560 Note that domain-match is not a commutative operation: a.b.c.com
561 domain-matches .c.com, but not the reverse.
562
563 """
564 # Note that, if A or B are IP addresses, the only relevant part of the
565 # definition of the domain-match algorithm is the direct string-compare.
566 A = A.lower()
567 B = B.lower()
568 if A == B:
569 return True
570 if not is_HDN(A):
571 return False
572 i = A.rfind(B)
573 if i == -1 or i == 0:
574 # A does not have form NB, or N is the empty string
575 return False
576 if not B.startswith("."):
577 return False
578 if not is_HDN(B[1:]):
579 return False
580 return True
581
582def liberal_is_HDN(text):
583 """Return True if text is a sort-of-like a host domain name.
584
585 For accepting/blocking domains.
586
587 """
588 if IPV4_RE.search(text):
589 return False
590 return True
591
592def user_domain_match(A, B):
593 """For blocking/accepting domains.
594
595 A and B may be host domain names or IP addresses.
596
597 """
598 A = A.lower()
599 B = B.lower()
600 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
601 if A == B:
602 # equal IP addresses
603 return True
604 return False
605 initial_dot = B.startswith(".")
606 if initial_dot and A.endswith(B):
607 return True
608 if not initial_dot and A == B:
609 return True
610 return False
611
Antoine Pitroufd036452008-08-19 17:56:33 +0000612cut_port_re = re.compile(r":\d+$", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000613def request_host(request):
614 """Return request-host, as defined by RFC 2965.
615
616 Variation from RFC: returned value is lowercased, for convenient
617 comparison.
618
619 """
620 url = request.get_full_url()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000621 host = urllib.parse.urlparse(url)[1]
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000622 if host == "":
623 host = request.get_header("Host", "")
624
625 # remove port, if present
626 host = cut_port_re.sub("", host, 1)
627 return host.lower()
628
629def eff_request_host(request):
630 """Return a tuple (request-host, effective request-host name).
631
632 As defined by RFC 2965, except both are lowercased.
633
634 """
635 erhn = req_host = request_host(request)
636 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
637 erhn = req_host + ".local"
638 return req_host, erhn
639
640def request_path(request):
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000641 """Path component of request-URI, as defined by RFC 2965."""
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000642 url = request.get_full_url()
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000643 parts = urllib.parse.urlsplit(url)
644 path = escape_path(parts.path)
645 if not path.startswith("/"):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000646 # fix bad RFC 2396 absoluteURI
Gregory P. Smith41e6c3d2010-07-19 23:17:22 +0000647 path = "/" + path
648 return path
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000649
650def request_port(request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -0500651 host = request.host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000652 i = host.find(':')
653 if i >= 0:
654 port = host[i+1:]
655 try:
656 int(port)
657 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000658 _debug("nonnumeric port: '%s'", port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000659 return None
660 else:
661 port = DEFAULT_HTTP_PORT
662 return port
663
664# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
665# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
666HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
667ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
668def uppercase_escaped_char(match):
669 return "%%%s" % match.group(1).upper()
670def escape_path(path):
671 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
672 # There's no knowing what character encoding was used to create URLs
673 # containing %-escapes, but since we have to pick one to escape invalid
674 # path characters, we pick UTF-8, as recommended in the HTML 4.0
675 # specification:
676 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
677 # And here, kind of: draft-fielding-uri-rfc2396bis-03
678 # (And in draft IRI specification: draft-duerst-iri-05)
679 # (And here, for new URI schemes: RFC 2718)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000680 path = urllib.parse.quote(path, HTTP_PATH_SAFE)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000681 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
682 return path
683
684def reach(h):
685 """Return reach of host h, as defined by RFC 2965, section 1.
686
687 The reach R of a host name H is defined as follows:
688
689 * If
690
691 - H is the host domain name of a host; and,
692
693 - H has the form A.B; and
694
695 - A has no embedded (that is, interior) dots; and
696
697 - B has at least one embedded dot, or B is the string "local".
698 then the reach of H is .B.
699
700 * Otherwise, the reach of H is H.
701
702 >>> reach("www.acme.com")
703 '.acme.com'
704 >>> reach("acme.com")
705 'acme.com'
706 >>> reach("acme.local")
707 '.local'
708
709 """
710 i = h.find(".")
711 if i >= 0:
712 #a = h[:i] # this line is only here to show what a is
713 b = h[i+1:]
714 i = b.find(".")
715 if is_HDN(h) and (i >= 0 or b == "local"):
716 return "."+b
717 return h
718
719def is_third_party(request):
720 """
721
722 RFC 2965, section 3.3.6:
723
724 An unverifiable transaction is to a third-party host if its request-
725 host U does not domain-match the reach R of the request-host O in the
726 origin transaction.
727
728 """
729 req_host = request_host(request)
Senthil Kumarand9fbf362013-04-09 07:07:59 -0700730 if not domain_match(req_host, reach(request.origin_req_host)):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000731 return True
732 else:
733 return False
734
735
736class Cookie:
737 """HTTP Cookie.
738
739 This class represents both Netscape and RFC 2965 cookies.
740
741 This is deliberately a very simple class. It just holds attributes. It's
742 possible to construct Cookie instances that don't comply with the cookie
743 standards. CookieJar.make_cookies is the factory function for Cookie
744 objects -- it deals with cookie parsing, supplying defaults, and
745 normalising to the representation used in this class. CookiePolicy is
746 responsible for checking them to see whether they should be accepted from
747 and returned to the server.
748
749 Note that the port may be present in the headers, but unspecified ("Port"
750 rather than"Port=80", for example); if this is the case, port is None.
751
752 """
753
754 def __init__(self, version, name, value,
755 port, port_specified,
756 domain, domain_specified, domain_initial_dot,
757 path, path_specified,
758 secure,
759 expires,
760 discard,
761 comment,
762 comment_url,
Neal Norwitz71dad722005-12-23 21:43:48 +0000763 rest,
764 rfc2109=False,
765 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000766
767 if version is not None: version = int(version)
Robert Collinsa0e5d982015-08-04 10:06:29 +1200768 if expires is not None: expires = int(float(expires))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000769 if port is None and port_specified is True:
770 raise ValueError("if port is None, port_specified must be false")
771
772 self.version = version
773 self.name = name
774 self.value = value
775 self.port = port
776 self.port_specified = port_specified
777 # normalise case, as per RFC 2965 section 3.3.3
778 self.domain = domain.lower()
779 self.domain_specified = domain_specified
780 # Sigh. We need to know whether the domain given in the
781 # cookie-attribute had an initial dot, in order to follow RFC 2965
782 # (as clarified in draft errata). Needed for the returned $Domain
783 # value.
784 self.domain_initial_dot = domain_initial_dot
785 self.path = path
786 self.path_specified = path_specified
787 self.secure = secure
788 self.expires = expires
789 self.discard = discard
790 self.comment = comment
791 self.comment_url = comment_url
Neal Norwitz71dad722005-12-23 21:43:48 +0000792 self.rfc2109 = rfc2109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000793
794 self._rest = copy.copy(rest)
795
796 def has_nonstandard_attr(self, name):
797 return name in self._rest
798 def get_nonstandard_attr(self, name, default=None):
799 return self._rest.get(name, default)
800 def set_nonstandard_attr(self, name, value):
801 self._rest[name] = value
802
803 def is_expired(self, now=None):
804 if now is None: now = time.time()
805 if (self.expires is not None) and (self.expires <= now):
806 return True
807 return False
808
809 def __str__(self):
810 if self.port is None: p = ""
811 else: p = ":"+self.port
812 limit = self.domain + p + self.path
813 if self.value is not None:
814 namevalue = "%s=%s" % (self.name, self.value)
815 else:
816 namevalue = self.name
817 return "<Cookie %s for %s>" % (namevalue, limit)
818
819 def __repr__(self):
820 args = []
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000821 for name in ("version", "name", "value",
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000822 "port", "port_specified",
823 "domain", "domain_specified", "domain_initial_dot",
824 "path", "path_specified",
825 "secure", "expires", "discard", "comment", "comment_url",
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000826 ):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000827 attr = getattr(self, name)
828 args.append("%s=%s" % (name, repr(attr)))
829 args.append("rest=%s" % repr(self._rest))
Neal Norwitz71dad722005-12-23 21:43:48 +0000830 args.append("rfc2109=%s" % repr(self.rfc2109))
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300831 return "%s(%s)" % (self.__class__.__name__, ", ".join(args))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000832
833
834class CookiePolicy:
835 """Defines which cookies get accepted from and returned to server.
836
837 May also modify cookies, though this is probably a bad idea.
838
839 The subclass DefaultCookiePolicy defines the standard rules for Netscape
Raymond Hettinger7ea386e2016-08-25 21:11:50 -0700840 and RFC 2965 cookies -- override that if you want a customized policy.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000841
842 """
843 def set_ok(self, cookie, request):
844 """Return true if (and only if) cookie should be accepted from server.
845
846 Currently, pre-expired cookies never get this far -- the CookieJar
847 class deletes such cookies itself.
848
849 """
850 raise NotImplementedError()
851
852 def return_ok(self, cookie, request):
853 """Return true if (and only if) cookie should be returned to server."""
854 raise NotImplementedError()
855
856 def domain_return_ok(self, domain, request):
857 """Return false if cookies should not be returned, given cookie domain.
858 """
859 return True
860
861 def path_return_ok(self, path, request):
862 """Return false if cookies should not be returned, given cookie path.
863 """
864 return True
865
866
867class DefaultCookiePolicy(CookiePolicy):
868 """Implements the standard rules for accepting and returning cookies."""
869
870 DomainStrictNoDots = 1
871 DomainStrictNonDomain = 2
872 DomainRFC2965Match = 4
873
874 DomainLiberal = 0
875 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
876
877 def __init__(self,
878 blocked_domains=None, allowed_domains=None,
879 netscape=True, rfc2965=False,
Neal Norwitz71dad722005-12-23 21:43:48 +0000880 rfc2109_as_netscape=None,
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000881 hide_cookie2=False,
882 strict_domain=False,
883 strict_rfc2965_unverifiable=True,
884 strict_ns_unverifiable=False,
885 strict_ns_domain=DomainLiberal,
886 strict_ns_set_initial_dollar=False,
887 strict_ns_set_path=False,
Paul Bailey4c339972018-10-08 13:49:29 -0500888 secure_protocols=("https", "wss")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000889 ):
890 """Constructor arguments should be passed as keyword arguments only."""
891 self.netscape = netscape
892 self.rfc2965 = rfc2965
Neal Norwitz71dad722005-12-23 21:43:48 +0000893 self.rfc2109_as_netscape = rfc2109_as_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000894 self.hide_cookie2 = hide_cookie2
895 self.strict_domain = strict_domain
896 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
897 self.strict_ns_unverifiable = strict_ns_unverifiable
898 self.strict_ns_domain = strict_ns_domain
899 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
900 self.strict_ns_set_path = strict_ns_set_path
Paul Bailey4c339972018-10-08 13:49:29 -0500901 self.secure_protocols = secure_protocols
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000902
903 if blocked_domains is not None:
904 self._blocked_domains = tuple(blocked_domains)
905 else:
906 self._blocked_domains = ()
907
908 if allowed_domains is not None:
909 allowed_domains = tuple(allowed_domains)
910 self._allowed_domains = allowed_domains
911
912 def blocked_domains(self):
913 """Return the sequence of blocked domains (as a tuple)."""
914 return self._blocked_domains
915 def set_blocked_domains(self, blocked_domains):
916 """Set the sequence of blocked domains."""
917 self._blocked_domains = tuple(blocked_domains)
918
919 def is_blocked(self, domain):
920 for blocked_domain in self._blocked_domains:
921 if user_domain_match(domain, blocked_domain):
922 return True
923 return False
924
925 def allowed_domains(self):
926 """Return None, or the sequence of allowed domains (as a tuple)."""
927 return self._allowed_domains
928 def set_allowed_domains(self, allowed_domains):
929 """Set the sequence of allowed domains, or None."""
930 if allowed_domains is not None:
931 allowed_domains = tuple(allowed_domains)
932 self._allowed_domains = allowed_domains
933
934 def is_not_allowed(self, domain):
935 if self._allowed_domains is None:
936 return False
937 for allowed_domain in self._allowed_domains:
938 if user_domain_match(domain, allowed_domain):
939 return False
940 return True
941
942 def set_ok(self, cookie, request):
943 """
944 If you override .set_ok(), be sure to call this method. If it returns
945 false, so should your subclass (assuming your subclass wants to be more
946 strict about which cookies to accept).
947
948 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000949 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000950
951 assert cookie.name is not None
952
953 for n in "version", "verifiability", "name", "path", "domain", "port":
954 fn_name = "set_ok_"+n
955 fn = getattr(self, fn_name)
956 if not fn(cookie, request):
957 return False
958
959 return True
960
961 def set_ok_version(self, cookie, request):
962 if cookie.version is None:
963 # Version is always set to 0 by parse_ns_headers if it's a Netscape
964 # cookie, so this must be an invalid RFC 2965 cookie.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000965 _debug(" Set-Cookie2 without version attribute (%s=%s)",
966 cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000967 return False
968 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000969 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000970 return False
971 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000972 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000973 return False
974 return True
975
976 def set_ok_verifiability(self, cookie, request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -0500977 if request.unverifiable and is_third_party(request):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000978 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000979 _debug(" third-party RFC 2965 cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000980 "unverifiable transaction")
981 return False
982 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000983 _debug(" third-party Netscape cookie during "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000984 "unverifiable transaction")
985 return False
986 return True
987
988 def set_ok_name(self, cookie, request):
989 # Try and stop servers setting V0 cookies designed to hack other
990 # servers that know both V0 and V1 protocols.
991 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
992 cookie.name.startswith("$")):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000993 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000994 return False
995 return True
996
997 def set_ok_path(self, cookie, request):
998 if cookie.path_specified:
999 req_path = request_path(request)
1000 if ((cookie.version > 0 or
1001 (cookie.version == 0 and self.strict_ns_set_path)) and
Xtreak0e1f1f02019-03-10 22:42:28 +05301002 not self.path_return_ok(cookie.path, request)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001003 _debug(" path attribute %s is not a prefix of request "
1004 "path %s", cookie.path, req_path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001005 return False
1006 return True
1007
1008 def set_ok_domain(self, cookie, request):
1009 if self.is_blocked(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001010 _debug(" domain %s is in user block-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001011 return False
1012 if self.is_not_allowed(cookie.domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001013 _debug(" domain %s is not in user allow-list", cookie.domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001014 return False
1015 if cookie.domain_specified:
1016 req_host, erhn = eff_request_host(request)
1017 domain = cookie.domain
1018 if self.strict_domain and (domain.count(".") >= 2):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001019 # XXX This should probably be compared with the Konqueror
1020 # (kcookiejar.cpp) and Mozilla implementations, but it's a
1021 # losing battle.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001022 i = domain.rfind(".")
1023 j = domain.rfind(".", 0, i)
1024 if j == 0: # domain like .foo.bar
1025 tld = domain[i+1:]
1026 sld = domain[j+1:i]
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1028 "gov", "mil", "int", "aero", "biz", "cat", "coop",
1029 "info", "jobs", "mobi", "museum", "name", "pro",
1030 "travel", "eu") and len(tld) == 2:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001031 # domain like .co.uk
Thomas Wouters477c8d52006-05-27 19:21:47 +00001032 _debug(" country-code second level domain %s", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001033 return False
1034 if domain.startswith("."):
1035 undotted_domain = domain[1:]
1036 else:
1037 undotted_domain = domain
1038 embedded_dots = (undotted_domain.find(".") >= 0)
1039 if not embedded_dots and domain != ".local":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 _debug(" non-local domain %s contains no embedded dot",
1041 domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001042 return False
1043 if cookie.version == 0:
1044 if (not erhn.endswith(domain) and
1045 (not erhn.startswith(".") and
1046 not ("."+erhn).endswith(domain))):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001047 _debug(" effective request-host %s (even with added "
Ezio Melottie130a522011-10-19 10:58:56 +03001048 "initial dot) does not end with %s",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049 erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001050 return False
1051 if (cookie.version > 0 or
1052 (self.strict_ns_domain & self.DomainRFC2965Match)):
1053 if not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 _debug(" effective request-host %s does not domain-match "
1055 "%s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001056 return False
1057 if (cookie.version > 0 or
1058 (self.strict_ns_domain & self.DomainStrictNoDots)):
1059 host_prefix = req_host[:-len(domain)]
1060 if (host_prefix.find(".") >= 0 and
1061 not IPV4_RE.search(req_host)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062 _debug(" host prefix %s for domain %s contains a dot",
1063 host_prefix, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001064 return False
1065 return True
1066
1067 def set_ok_port(self, cookie, request):
1068 if cookie.port_specified:
1069 req_port = request_port(request)
1070 if req_port is None:
1071 req_port = "80"
1072 else:
1073 req_port = str(req_port)
1074 for p in cookie.port.split(","):
1075 try:
1076 int(p)
1077 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001078 _debug(" bad port %s (not numeric)", p)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001079 return False
1080 if p == req_port:
1081 break
1082 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001083 _debug(" request port (%s) not found in %s",
1084 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001085 return False
1086 return True
1087
1088 def return_ok(self, cookie, request):
1089 """
1090 If you override .return_ok(), be sure to call this method. If it
1091 returns false, so should your subclass (assuming your subclass wants to
1092 be more strict about which cookies to return).
1093
1094 """
1095 # Path has already been checked by .path_return_ok(), and domain
1096 # blocking done by .domain_return_ok().
Thomas Wouters477c8d52006-05-27 19:21:47 +00001097 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001098
1099 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1100 fn_name = "return_ok_"+n
1101 fn = getattr(self, fn_name)
1102 if not fn(cookie, request):
1103 return False
1104 return True
1105
1106 def return_ok_version(self, cookie, request):
1107 if cookie.version > 0 and not self.rfc2965:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108 _debug(" RFC 2965 cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001109 return False
1110 elif cookie.version == 0 and not self.netscape:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001111 _debug(" Netscape cookies are switched off")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001112 return False
1113 return True
1114
1115 def return_ok_verifiability(self, cookie, request):
Meador Ingeda1ffbc2012-07-20 19:12:04 -05001116 if request.unverifiable and is_third_party(request):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001117 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001118 _debug(" third-party RFC 2965 cookie during unverifiable "
1119 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001120 return False
1121 elif cookie.version == 0 and self.strict_ns_unverifiable:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001122 _debug(" third-party Netscape cookie during unverifiable "
1123 "transaction")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001124 return False
1125 return True
1126
1127 def return_ok_secure(self, cookie, request):
Paul Bailey4c339972018-10-08 13:49:29 -05001128 if cookie.secure and request.type not in self.secure_protocols:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001129 _debug(" secure cookie with non-secure request")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001130 return False
1131 return True
1132
1133 def return_ok_expires(self, cookie, request):
1134 if cookie.is_expired(self._now):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001135 _debug(" cookie expired")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001136 return False
1137 return True
1138
1139 def return_ok_port(self, cookie, request):
1140 if cookie.port:
1141 req_port = request_port(request)
1142 if req_port is None:
1143 req_port = "80"
1144 for p in cookie.port.split(","):
1145 if p == req_port:
1146 break
1147 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001148 _debug(" request port %s does not match cookie port %s",
1149 req_port, cookie.port)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001150 return False
1151 return True
1152
1153 def return_ok_domain(self, cookie, request):
1154 req_host, erhn = eff_request_host(request)
1155 domain = cookie.domain
1156
Xtreakca7fe502019-03-10 07:39:48 +05301157 if domain and not domain.startswith("."):
1158 dotdomain = "." + domain
1159 else:
1160 dotdomain = domain
1161
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001162 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1163 if (cookie.version == 0 and
1164 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1165 not cookie.domain_specified and domain != erhn):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001166 _debug(" cookie with unspecified domain does not string-compare "
1167 "equal to request domain")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001168 return False
1169
1170 if cookie.version > 0 and not domain_match(erhn, domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001171 _debug(" effective request-host name %s does not domain-match "
1172 "RFC 2965 cookie domain %s", erhn, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001173 return False
Xtreakca7fe502019-03-10 07:39:48 +05301174 if cookie.version == 0 and not ("."+erhn).endswith(dotdomain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001175 _debug(" request-host %s does not match Netscape cookie domain "
1176 "%s", req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001177 return False
1178 return True
1179
1180 def domain_return_ok(self, domain, request):
1181 # Liberal check of. This is here as an optimization to avoid
1182 # having to load lots of MSIE cookie files unless necessary.
1183 req_host, erhn = eff_request_host(request)
1184 if not req_host.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001185 req_host = "."+req_host
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001186 if not erhn.startswith("."):
Raymond Hettingerbab41432005-02-05 01:31:19 +00001187 erhn = "."+erhn
Xtreakca7fe502019-03-10 07:39:48 +05301188 if domain and not domain.startswith("."):
1189 dotdomain = "." + domain
1190 else:
1191 dotdomain = domain
1192 if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001193 #_debug(" request domain %s does not match cookie domain %s",
1194 # req_host, domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001195 return False
1196
1197 if self.is_blocked(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001198 _debug(" domain %s is in user block-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001199 return False
1200 if self.is_not_allowed(domain):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001201 _debug(" domain %s is not in user allow-list", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001202 return False
1203
1204 return True
1205
1206 def path_return_ok(self, path, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001207 _debug("- checking cookie path=%s", path)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001208 req_path = request_path(request)
Xtreak0e1f1f02019-03-10 22:42:28 +05301209 pathlen = len(path)
1210 if req_path == path:
1211 return True
1212 elif (req_path.startswith(path) and
1213 (path.endswith("/") or req_path[pathlen:pathlen+1] == "/")):
1214 return True
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001215
Xtreak0e1f1f02019-03-10 22:42:28 +05301216 _debug(" %s does not path-match %s", req_path, path)
1217 return False
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001218
1219def vals_sorted_by_key(adict):
Guido van Rossumcc2b0162007-02-11 06:12:03 +00001220 keys = sorted(adict.keys())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001221 return map(adict.get, keys)
1222
1223def deepvalues(mapping):
1224 """Iterates over nested mapping, depth-first, in sorted order by key."""
1225 values = vals_sorted_by_key(mapping)
1226 for obj in values:
1227 mapping = False
1228 try:
1229 obj.items
1230 except AttributeError:
1231 pass
1232 else:
1233 mapping = True
Philip Jenveyfd0d3e52012-10-01 15:34:31 -07001234 yield from deepvalues(obj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001235 if not mapping:
1236 yield obj
1237
1238
1239# Used as second parameter to dict.get() method, to distinguish absent
1240# dict key from one with a None value.
1241class Absent: pass
1242
1243class CookieJar:
1244 """Collection of HTTP cookies.
1245
1246 You may not need to know about this class: try
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001247 urllib.request.build_opener(HTTPCookieProcessor).open(url).
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001248 """
1249
1250 non_word_re = re.compile(r"\W")
1251 quote_re = re.compile(r"([\"\\])")
1252 strict_domain_re = re.compile(r"\.?[^.]*")
1253 domain_re = re.compile(r"[^.]*")
1254 dots_re = re.compile(r"^\.+")
1255
Antoine Pitroufd036452008-08-19 17:56:33 +00001256 magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001257
1258 def __init__(self, policy=None):
1259 if policy is None:
1260 policy = DefaultCookiePolicy()
1261 self._policy = policy
1262
1263 self._cookies_lock = _threading.RLock()
1264 self._cookies = {}
1265
1266 def set_policy(self, policy):
1267 self._policy = policy
1268
1269 def _cookies_for_domain(self, domain, request):
1270 cookies = []
1271 if not self._policy.domain_return_ok(domain, request):
1272 return []
Thomas Wouters477c8d52006-05-27 19:21:47 +00001273 _debug("Checking %s for cookies to return", domain)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001274 cookies_by_path = self._cookies[domain]
1275 for path in cookies_by_path.keys():
1276 if not self._policy.path_return_ok(path, request):
1277 continue
1278 cookies_by_name = cookies_by_path[path]
1279 for cookie in cookies_by_name.values():
1280 if not self._policy.return_ok(cookie, request):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001281 _debug(" not returning cookie")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001282 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001283 _debug(" it's a match")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001284 cookies.append(cookie)
1285 return cookies
1286
1287 def _cookies_for_request(self, request):
1288 """Return a list of cookies to be returned to server."""
1289 cookies = []
1290 for domain in self._cookies.keys():
1291 cookies.extend(self._cookies_for_domain(domain, request))
1292 return cookies
1293
1294 def _cookie_attrs(self, cookies):
1295 """Return a list of cookie-attributes to be returned to server.
1296
1297 like ['foo="bar"; $Path="/"', ...]
1298
1299 The $Version attribute is also added when appropriate (currently only
1300 once per request).
1301
1302 """
1303 # add cookies in order of most specific (ie. longest) path first
Raymond Hettinger70b64fc2008-01-30 20:15:17 +00001304 cookies.sort(key=lambda a: len(a.path), reverse=True)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001305
1306 version_set = False
1307
1308 attrs = []
1309 for cookie in cookies:
1310 # set version of Cookie header
1311 # XXX
1312 # What should it be if multiple matching Set-Cookie headers have
1313 # different versions themselves?
1314 # Answer: there is no answer; was supposed to be settled by
1315 # RFC 2965 errata, but that may never appear...
1316 version = cookie.version
1317 if not version_set:
1318 version_set = True
1319 if version > 0:
1320 attrs.append("$Version=%s" % version)
1321
1322 # quote cookie value if necessary
1323 # (not for Netscape protocol, which already has any quotes
1324 # intact, due to the poorly-specified Netscape Cookie: syntax)
1325 if ((cookie.value is not None) and
1326 self.non_word_re.search(cookie.value) and version > 0):
1327 value = self.quote_re.sub(r"\\\1", cookie.value)
1328 else:
1329 value = cookie.value
1330
1331 # add cookie-attributes to be returned in Cookie header
1332 if cookie.value is None:
1333 attrs.append(cookie.name)
1334 else:
1335 attrs.append("%s=%s" % (cookie.name, value))
1336 if version > 0:
1337 if cookie.path_specified:
1338 attrs.append('$Path="%s"' % cookie.path)
1339 if cookie.domain.startswith("."):
1340 domain = cookie.domain
1341 if (not cookie.domain_initial_dot and
1342 domain.startswith(".")):
1343 domain = domain[1:]
1344 attrs.append('$Domain="%s"' % domain)
1345 if cookie.port is not None:
1346 p = "$Port"
1347 if cookie.port_specified:
1348 p = p + ('="%s"' % cookie.port)
1349 attrs.append(p)
1350
1351 return attrs
1352
1353 def add_cookie_header(self, request):
Georg Brandl029986a2008-06-23 11:44:14 +00001354 """Add correct Cookie: header to request (urllib.request.Request object).
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001355
1356 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1357
1358 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001359 _debug("add_cookie_header")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001360 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001361 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001362
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001363 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001364
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001365 cookies = self._cookies_for_request(request)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001366
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001367 attrs = self._cookie_attrs(cookies)
1368 if attrs:
1369 if not request.has_header("Cookie"):
1370 request.add_unredirected_header(
1371 "Cookie", "; ".join(attrs))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001372
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001373 # if necessary, advertise that we know RFC 2965
1374 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1375 not request.has_header("Cookie2")):
1376 for cookie in cookies:
1377 if cookie.version != 1:
1378 request.add_unredirected_header("Cookie2", '$Version="1"')
1379 break
1380
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001381 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001382 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001383
1384 self.clear_expired_cookies()
1385
1386 def _normalized_cookie_tuples(self, attrs_set):
1387 """Return list of tuples containing normalised cookie information.
1388
1389 attrs_set is the list of lists of key,value pairs extracted from
1390 the Set-Cookie or Set-Cookie2 headers.
1391
1392 Tuples are name, value, standard, rest, where name and value are the
1393 cookie name and value, standard is a dictionary containing the standard
1394 cookie-attributes (discard, secure, version, expires or max-age,
1395 domain, path and port) and rest is a dictionary containing the rest of
1396 the cookie-attributes.
1397
1398 """
1399 cookie_tuples = []
1400
1401 boolean_attrs = "discard", "secure"
1402 value_attrs = ("version",
1403 "expires", "max-age",
1404 "domain", "path", "port",
1405 "comment", "commenturl")
1406
1407 for cookie_attrs in attrs_set:
1408 name, value = cookie_attrs[0]
1409
1410 # Build dictionary of standard cookie-attributes (standard) and
1411 # dictionary of other cookie-attributes (rest).
1412
1413 # Note: expiry time is normalised to seconds since epoch. V0
1414 # cookies should have the Expires cookie-attribute, and V1 cookies
1415 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1416 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1417 # accept either (but prefer Max-Age).
1418 max_age_set = False
1419
1420 bad_cookie = False
1421
1422 standard = {}
1423 rest = {}
1424 for k, v in cookie_attrs[1:]:
1425 lc = k.lower()
1426 # don't lose case distinction for unknown fields
1427 if lc in value_attrs or lc in boolean_attrs:
1428 k = lc
1429 if k in boolean_attrs and v is None:
1430 # boolean cookie-attribute is present, but has no value
1431 # (like "discard", rather than "port=80")
1432 v = True
1433 if k in standard:
1434 # only first value is significant
1435 continue
1436 if k == "domain":
1437 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001438 _debug(" missing value for domain attribute")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001439 bad_cookie = True
1440 break
1441 # RFC 2965 section 3.3.3
1442 v = v.lower()
1443 if k == "expires":
1444 if max_age_set:
1445 # Prefer max-age to expires (like Mozilla)
1446 continue
1447 if v is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001448 _debug(" missing or invalid value for expires "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001449 "attribute: treating as session cookie")
1450 continue
1451 if k == "max-age":
1452 max_age_set = True
1453 try:
1454 v = int(v)
1455 except ValueError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001456 _debug(" missing or invalid (non-numeric) value for "
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001457 "max-age attribute")
1458 bad_cookie = True
1459 break
1460 # convert RFC 2965 Max-Age to seconds since epoch
1461 # XXX Strictly you're supposed to follow RFC 2616
Serhiy Storchakad65c9492015-11-02 14:10:23 +02001462 # age-calculation rules. Remember that zero Max-Age
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001463 # is a request to discard (old and new) cookie, though.
1464 k = "expires"
1465 v = self._now + v
1466 if (k in value_attrs) or (k in boolean_attrs):
1467 if (v is None and
Raymond Hettingerdbecd932005-02-06 06:57:08 +00001468 k not in ("port", "comment", "commenturl")):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001469 _debug(" missing value for %s attribute" % k)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001470 bad_cookie = True
1471 break
1472 standard[k] = v
1473 else:
1474 rest[k] = v
1475
1476 if bad_cookie:
1477 continue
1478
1479 cookie_tuples.append((name, value, standard, rest))
1480
1481 return cookie_tuples
1482
1483 def _cookie_from_cookie_tuple(self, tup, request):
1484 # standard is dict of standard cookie-attributes, rest is dict of the
1485 # rest of them
1486 name, value, standard, rest = tup
1487
1488 domain = standard.get("domain", Absent)
1489 path = standard.get("path", Absent)
1490 port = standard.get("port", Absent)
1491 expires = standard.get("expires", Absent)
1492
1493 # set the easy defaults
1494 version = standard.get("version", None)
Benjamin Peterson3e5cd1d2010-06-27 21:45:24 +00001495 if version is not None:
1496 try:
1497 version = int(version)
1498 except ValueError:
1499 return None # invalid version, ignore cookie
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001500 secure = standard.get("secure", False)
1501 # (discard is also set if expires is Absent)
1502 discard = standard.get("discard", False)
1503 comment = standard.get("comment", None)
1504 comment_url = standard.get("commenturl", None)
1505
1506 # set default path
1507 if path is not Absent and path != "":
1508 path_specified = True
1509 path = escape_path(path)
1510 else:
1511 path_specified = False
1512 path = request_path(request)
1513 i = path.rfind("/")
1514 if i != -1:
1515 if version == 0:
1516 # Netscape spec parts company from reality here
1517 path = path[:i]
1518 else:
1519 path = path[:i+1]
1520 if len(path) == 0: path = "/"
1521
1522 # set default domain
1523 domain_specified = domain is not Absent
1524 # but first we have to remember whether it starts with a dot
1525 domain_initial_dot = False
1526 if domain_specified:
1527 domain_initial_dot = bool(domain.startswith("."))
1528 if domain is Absent:
1529 req_host, erhn = eff_request_host(request)
1530 domain = erhn
1531 elif not domain.startswith("."):
1532 domain = "."+domain
1533
1534 # set default port
1535 port_specified = False
1536 if port is not Absent:
1537 if port is None:
1538 # Port attr present, but has no value: default to request port.
1539 # Cookie should then only be sent back on that port.
1540 port = request_port(request)
1541 else:
1542 port_specified = True
1543 port = re.sub(r"\s+", "", port)
1544 else:
1545 # No port attr present. Cookie can be sent back on any port.
1546 port = None
1547
1548 # set default expires and discard
1549 if expires is Absent:
1550 expires = None
1551 discard = True
1552 elif expires <= self._now:
1553 # Expiry date in past is request to delete cookie. This can't be
1554 # in DefaultCookiePolicy, because can't delete cookies there.
1555 try:
1556 self.clear(domain, path, name)
1557 except KeyError:
1558 pass
Thomas Wouters477c8d52006-05-27 19:21:47 +00001559 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1560 domain, path, name)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001561 return None
1562
1563 return Cookie(version,
1564 name, value,
1565 port, port_specified,
1566 domain, domain_specified, domain_initial_dot,
1567 path, path_specified,
1568 secure,
1569 expires,
1570 discard,
1571 comment,
1572 comment_url,
1573 rest)
1574
1575 def _cookies_from_attrs_set(self, attrs_set, request):
1576 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1577
1578 cookies = []
1579 for tup in cookie_tuples:
1580 cookie = self._cookie_from_cookie_tuple(tup, request)
1581 if cookie: cookies.append(cookie)
1582 return cookies
1583
Neal Norwitz71dad722005-12-23 21:43:48 +00001584 def _process_rfc2109_cookies(self, cookies):
1585 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1586 if rfc2109_as_ns is None:
1587 rfc2109_as_ns = not self._policy.rfc2965
1588 for cookie in cookies:
1589 if cookie.version == 1:
1590 cookie.rfc2109 = True
Tim Peters536cf992005-12-25 23:18:31 +00001591 if rfc2109_as_ns:
Neal Norwitz71dad722005-12-23 21:43:48 +00001592 # treat 2109 cookies as Netscape cookies rather than
1593 # as RFC2965 cookies
1594 cookie.version = 0
1595
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001596 def make_cookies(self, response, request):
1597 """Return sequence of Cookie objects extracted from response object."""
1598 # get cookie-attributes for RFC 2965 and Netscape protocols
1599 headers = response.info()
Barry Warsaw820c1202008-06-12 04:06:45 +00001600 rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
1601 ns_hdrs = headers.get_all("Set-Cookie", [])
Xtreakbb411472019-09-13 12:29:00 +01001602 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001603
1604 rfc2965 = self._policy.rfc2965
1605 netscape = self._policy.netscape
1606
1607 if ((not rfc2965_hdrs and not ns_hdrs) or
1608 (not ns_hdrs and not rfc2965) or
1609 (not rfc2965_hdrs and not netscape) or
1610 (not netscape and not rfc2965)):
1611 return [] # no relevant cookie headers: quick exit
1612
1613 try:
1614 cookies = self._cookies_from_attrs_set(
1615 split_header_words(rfc2965_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001616 except Exception:
1617 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001618 cookies = []
1619
1620 if ns_hdrs and netscape:
1621 try:
Neal Norwitz71dad722005-12-23 21:43:48 +00001622 # RFC 2109 and Netscape cookies
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001623 ns_cookies = self._cookies_from_attrs_set(
1624 parse_ns_headers(ns_hdrs), request)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001625 except Exception:
1626 _warn_unhandled_exception()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001627 ns_cookies = []
Neal Norwitz71dad722005-12-23 21:43:48 +00001628 self._process_rfc2109_cookies(ns_cookies)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001629
1630 # Look for Netscape cookies (from Set-Cookie headers) that match
1631 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1632 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1633 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1634 # bundled in with the Netscape cookies for this purpose, which is
1635 # reasonable behaviour.
1636 if rfc2965:
1637 lookup = {}
1638 for cookie in cookies:
1639 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1640
1641 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1642 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1643 return key not in lookup
1644 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1645
1646 if ns_cookies:
1647 cookies.extend(ns_cookies)
1648
1649 return cookies
1650
1651 def set_cookie_if_ok(self, cookie, request):
1652 """Set a cookie if policy says it's OK to do so."""
1653 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001654 try:
1655 self._policy._now = self._now = int(time.time())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001656
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001657 if self._policy.set_ok(cookie, request):
1658 self.set_cookie(cookie)
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001659
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001660
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001661 finally:
1662 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001663
1664 def set_cookie(self, cookie):
1665 """Set a cookie, without checking whether or not it should be set."""
1666 c = self._cookies
1667 self._cookies_lock.acquire()
1668 try:
1669 if cookie.domain not in c: c[cookie.domain] = {}
1670 c2 = c[cookie.domain]
1671 if cookie.path not in c2: c2[cookie.path] = {}
1672 c3 = c2[cookie.path]
1673 c3[cookie.name] = cookie
1674 finally:
1675 self._cookies_lock.release()
1676
1677 def extract_cookies(self, response, request):
1678 """Extract cookies from response, where allowable given the request."""
Thomas Wouters477c8d52006-05-27 19:21:47 +00001679 _debug("extract_cookies: %s", response.info())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001680 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001681 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001682 for cookie in self.make_cookies(response, request):
1683 if self._policy.set_ok(cookie, request):
1684 _debug(" setting cookie: %s", cookie)
1685 self.set_cookie(cookie)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001686 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001687 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001688
1689 def clear(self, domain=None, path=None, name=None):
1690 """Clear some cookies.
1691
1692 Invoking this method without arguments will clear all cookies. If
1693 given a single argument, only cookies belonging to that domain will be
1694 removed. If given two arguments, cookies belonging to the specified
1695 path within that domain are removed. If given three arguments, then
1696 the cookie with the specified name, path and domain is removed.
1697
1698 Raises KeyError if no matching cookie exists.
1699
1700 """
1701 if name is not None:
1702 if (domain is None) or (path is None):
1703 raise ValueError(
1704 "domain and path must be given to remove a cookie by name")
1705 del self._cookies[domain][path][name]
1706 elif path is not None:
1707 if domain is None:
1708 raise ValueError(
1709 "domain must be given to remove cookies by path")
1710 del self._cookies[domain][path]
1711 elif domain is not None:
1712 del self._cookies[domain]
1713 else:
1714 self._cookies = {}
1715
1716 def clear_session_cookies(self):
1717 """Discard all session cookies.
1718
1719 Note that the .save() method won't save session cookies anyway, unless
1720 you ask otherwise by passing a true ignore_discard argument.
1721
1722 """
1723 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001724 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001725 for cookie in self:
1726 if cookie.discard:
1727 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001728 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001729 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001730
1731 def clear_expired_cookies(self):
1732 """Discard all expired cookies.
1733
1734 You probably don't need to call this method: expired cookies are never
1735 sent back to the server (provided you're using DefaultCookiePolicy),
1736 this method is called by CookieJar itself every so often, and the
1737 .save() method won't save expired cookies anyway (unless you ask
1738 otherwise by passing a true ignore_expires argument).
1739
1740 """
1741 self._cookies_lock.acquire()
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001742 try:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001743 now = time.time()
1744 for cookie in self:
1745 if cookie.is_expired(now):
1746 self.clear(cookie.domain, cookie.path, cookie.name)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001747 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001748 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001749
1750 def __iter__(self):
1751 return deepvalues(self._cookies)
1752
1753 def __len__(self):
1754 """Return number of contained cookies."""
1755 i = 0
1756 for cookie in self: i = i + 1
1757 return i
1758
1759 def __repr__(self):
1760 r = []
1761 for cookie in self: r.append(repr(cookie))
Serhiy Storchaka1392df92014-07-22 11:09:36 +03001762 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001763
1764 def __str__(self):
1765 r = []
1766 for cookie in self: r.append(str(cookie))
Serhiy Storchaka1392df92014-07-22 11:09:36 +03001767 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001768
1769
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001770# derives from OSError for backwards-compatibility with Python 2.4.0
1771class LoadError(OSError): pass
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001772
1773class FileCookieJar(CookieJar):
1774 """CookieJar that can be loaded from and saved to a file."""
1775
1776 def __init__(self, filename=None, delayload=False, policy=None):
1777 """
1778 Cookies are NOT loaded from the named file until either the .load() or
1779 .revert() method is called.
1780
1781 """
1782 CookieJar.__init__(self, policy)
1783 if filename is not None:
Stéphane Wirtel4b219ce2019-03-01 21:40:54 +01001784 filename = os.fspath(filename)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001785 self.filename = filename
1786 self.delayload = bool(delayload)
1787
1788 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1789 """Save cookies to a file."""
1790 raise NotImplementedError()
1791
1792 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1793 """Load cookies from a file."""
1794 if filename is None:
1795 if self.filename is not None: filename = self.filename
1796 else: raise ValueError(MISSING_FILENAME_TEXT)
1797
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01001798 with open(filename) as f:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001799 self._really_load(f, filename, ignore_discard, ignore_expires)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001800
1801 def revert(self, filename=None,
1802 ignore_discard=False, ignore_expires=False):
1803 """Clear all cookies and reload cookies from a saved file.
1804
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001805 Raises LoadError (or OSError) if reversion is not successful; the
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001806 object's state will not be altered if this happens.
1807
1808 """
1809 if filename is None:
1810 if self.filename is not None: filename = self.filename
1811 else: raise ValueError(MISSING_FILENAME_TEXT)
1812
1813 self._cookies_lock.acquire()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001814 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001815
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001816 old_state = copy.deepcopy(self._cookies)
1817 self._cookies = {}
1818 try:
1819 self.load(filename, ignore_discard, ignore_expires)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001820 except OSError:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001821 self._cookies = old_state
1822 raise
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001823
1824 finally:
Thomas Wouters9fe394c2007-02-05 01:24:16 +00001825 self._cookies_lock.release()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001826
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001827
1828def lwp_cookie_str(cookie):
Benjamin Peterson82f34ad2015-01-13 09:17:24 -05001829 """Return string representation of Cookie in the LWP cookie file format.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001830
1831 Actually, the format is extended a bit -- see module docstring.
1832
1833 """
1834 h = [(cookie.name, cookie.value),
1835 ("path", cookie.path),
1836 ("domain", cookie.domain)]
1837 if cookie.port is not None: h.append(("port", cookie.port))
1838 if cookie.path_specified: h.append(("path_spec", None))
1839 if cookie.port_specified: h.append(("port_spec", None))
1840 if cookie.domain_initial_dot: h.append(("domain_dot", None))
1841 if cookie.secure: h.append(("secure", None))
1842 if cookie.expires: h.append(("expires",
1843 time2isoz(float(cookie.expires))))
1844 if cookie.discard: h.append(("discard", None))
1845 if cookie.comment: h.append(("comment", cookie.comment))
1846 if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
1847
1848 keys = sorted(cookie._rest.keys())
1849 for k in keys:
1850 h.append((k, str(cookie._rest[k])))
1851
1852 h.append(("version", str(cookie.version)))
1853
1854 return join_header_words([h])
1855
1856class LWPCookieJar(FileCookieJar):
1857 """
Ezio Melotti0847db72012-09-21 16:30:22 +03001858 The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
Martin Panterf05641642016-05-08 13:48:10 +00001859 "Set-Cookie3" is the format used by the libwww-perl library, not known
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001860 to be compatible with any browser, but which is easy to read and
1861 doesn't lose information about RFC 2965 cookies.
1862
1863 Additional methods
1864
1865 as_lwp_str(ignore_discard=True, ignore_expired=True)
1866
1867 """
1868
1869 def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
Ezio Melotti0847db72012-09-21 16:30:22 +03001870 """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001871
1872 ignore_discard and ignore_expires: see docstring for FileCookieJar.save
1873
1874 """
1875 now = time.time()
1876 r = []
1877 for cookie in self:
1878 if not ignore_discard and cookie.discard:
1879 continue
1880 if not ignore_expires and cookie.is_expired(now):
1881 continue
1882 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
1883 return "\n".join(r+[""])
1884
1885 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1886 if filename is None:
1887 if self.filename is not None: filename = self.filename
1888 else: raise ValueError(MISSING_FILENAME_TEXT)
1889
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01001890 with open(filename, "w") as f:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001891 # There really isn't an LWP Cookies 2.0 format, but this indicates
1892 # that there is extra information in here (domain_dot and
1893 # port_spec) while still being compatible with libwww-perl, I hope.
1894 f.write("#LWP-Cookies-2.0\n")
1895 f.write(self.as_lwp_str(ignore_discard, ignore_expires))
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001896
1897 def _really_load(self, f, filename, ignore_discard, ignore_expires):
1898 magic = f.readline()
Antoine Pitroufd036452008-08-19 17:56:33 +00001899 if not self.magic_re.search(magic):
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001900 msg = ("%r does not look like a Set-Cookie3 (LWP) format "
1901 "file" % filename)
1902 raise LoadError(msg)
1903
1904 now = time.time()
1905
1906 header = "Set-Cookie3:"
1907 boolean_attrs = ("port_spec", "path_spec", "domain_dot",
1908 "secure", "discard")
1909 value_attrs = ("version",
1910 "port", "path", "domain",
1911 "expires",
1912 "comment", "commenturl")
1913
1914 try:
1915 while 1:
1916 line = f.readline()
1917 if line == "": break
1918 if not line.startswith(header):
1919 continue
1920 line = line[len(header):].strip()
1921
1922 for data in split_header_words([line]):
1923 name, value = data[0]
1924 standard = {}
1925 rest = {}
1926 for k in boolean_attrs:
1927 standard[k] = False
1928 for k, v in data[1:]:
1929 if k is not None:
1930 lc = k.lower()
1931 else:
1932 lc = None
1933 # don't lose case distinction for unknown fields
1934 if (lc in value_attrs) or (lc in boolean_attrs):
1935 k = lc
1936 if k in boolean_attrs:
1937 if v is None: v = True
1938 standard[k] = v
1939 elif k in value_attrs:
1940 standard[k] = v
1941 else:
1942 rest[k] = v
1943
1944 h = standard.get
1945 expires = h("expires")
1946 discard = h("discard")
1947 if expires is not None:
1948 expires = iso2time(expires)
1949 if expires is None:
1950 discard = True
1951 domain = h("domain")
1952 domain_specified = domain.startswith(".")
1953 c = Cookie(h("version"), name, value,
1954 h("port"), h("port_spec"),
1955 domain, domain_specified, h("domain_dot"),
1956 h("path"), h("path_spec"),
1957 h("secure"),
1958 expires,
1959 discard,
1960 h("comment"),
1961 h("commenturl"),
1962 rest)
1963 if not ignore_discard and c.discard:
1964 continue
1965 if not ignore_expires and c.is_expired(now):
1966 continue
1967 self.set_cookie(c)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001968 except OSError:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00001969 raise
1970 except Exception:
1971 _warn_unhandled_exception()
1972 raise LoadError("invalid Set-Cookie3 format file %r: %r" %
1973 (filename, line))
1974
1975
1976class MozillaCookieJar(FileCookieJar):
1977 """
1978
1979 WARNING: you may want to backup your browser's cookies file if you use
1980 this class to save cookies. I *think* it works, but there have been
1981 bugs in the past!
1982
1983 This class differs from CookieJar only in the format it uses to save and
1984 load cookies to and from a file. This class uses the Mozilla/Netscape
1985 `cookies.txt' format. lynx uses this file format, too.
1986
1987 Don't expect cookies saved while the browser is running to be noticed by
1988 the browser (in fact, Mozilla on unix will overwrite your saved cookies if
1989 you change them on disk while it's running; on Windows, you probably can't
1990 save at all while the browser is running).
1991
1992 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
1993 Netscape cookies on saving.
1994
1995 In particular, the cookie version and port number information is lost,
1996 together with information about whether or not Path, Port and Discard were
1997 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
1998 domain as set in the HTTP header started with a dot (yes, I'm aware some
1999 domains in Netscape files start with a dot and some don't -- trust me, you
2000 really don't want to know any more about this).
2001
2002 Note that though Mozilla and Netscape use the same format, they use
2003 slightly different headers. The class saves cookies using the Netscape
2004 header by default (Mozilla can cope with that).
2005
2006 """
Antoine Pitroufd036452008-08-19 17:56:33 +00002007 magic_re = re.compile("#( Netscape)? HTTP Cookie File")
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002008 header = """\
Georg Brandl87a15642010-07-31 22:11:11 +00002009# Netscape HTTP Cookie File
Benjamin Petersonccedc222013-12-18 15:35:18 -06002010# http://curl.haxx.se/rfc/cookie_spec.html
Georg Brandl87a15642010-07-31 22:11:11 +00002011# This is a generated file! Do not edit.
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002012
2013"""
2014
2015 def _really_load(self, f, filename, ignore_discard, ignore_expires):
2016 now = time.time()
2017
2018 magic = f.readline()
Antoine Pitroufd036452008-08-19 17:56:33 +00002019 if not self.magic_re.search(magic):
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002020 raise LoadError(
2021 "%r does not look like a Netscape format cookies file" %
2022 filename)
2023
2024 try:
2025 while 1:
2026 line = f.readline()
2027 if line == "": break
2028
2029 # last field may be absent, so keep any trailing tab
2030 if line.endswith("\n"): line = line[:-1]
2031
2032 # skip comments and blank lines XXX what is $ for?
2033 if (line.strip().startswith(("#", "$")) or
2034 line.strip() == ""):
2035 continue
2036
2037 domain, domain_specified, path, secure, expires, name, value = \
2038 line.split("\t")
2039 secure = (secure == "TRUE")
2040 domain_specified = (domain_specified == "TRUE")
2041 if name == "":
2042 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2043 # with no name, whereas http.cookiejar regards it as a
2044 # cookie with no value.
2045 name = value
2046 value = None
2047
2048 initial_dot = domain.startswith(".")
2049 assert domain_specified == initial_dot
2050
2051 discard = False
2052 if expires == "":
2053 expires = None
2054 discard = True
2055
2056 # assume path_specified is false
2057 c = Cookie(0, name, value,
2058 None, False,
2059 domain, domain_specified, initial_dot,
2060 path, False,
2061 secure,
2062 expires,
2063 discard,
2064 None,
2065 None,
2066 {})
2067 if not ignore_discard and c.discard:
2068 continue
2069 if not ignore_expires and c.is_expired(now):
2070 continue
2071 self.set_cookie(c)
2072
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002073 except OSError:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002074 raise
2075 except Exception:
2076 _warn_unhandled_exception()
2077 raise LoadError("invalid Netscape format cookies file %r: %r" %
2078 (filename, line))
2079
2080 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2081 if filename is None:
2082 if self.filename is not None: filename = self.filename
2083 else: raise ValueError(MISSING_FILENAME_TEXT)
2084
Giampaolo Rodola'2f50aaf2013-02-12 02:04:27 +01002085 with open(filename, "w") as f:
Georg Brandl7c9b61b2008-05-26 17:56:51 +00002086 f.write(self.header)
2087 now = time.time()
2088 for cookie in self:
2089 if not ignore_discard and cookie.discard:
2090 continue
2091 if not ignore_expires and cookie.is_expired(now):
2092 continue
2093 if cookie.secure: secure = "TRUE"
2094 else: secure = "FALSE"
2095 if cookie.domain.startswith("."): initial_dot = "TRUE"
2096 else: initial_dot = "FALSE"
2097 if cookie.expires is not None:
2098 expires = str(cookie.expires)
2099 else:
2100 expires = ""
2101 if cookie.value is None:
2102 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2103 # with no name, whereas http.cookiejar regards it as a
2104 # cookie with no value.
2105 name = ""
2106 value = cookie.name
2107 else:
2108 name = cookie.name
2109 value = cookie.value
2110 f.write(
2111 "\t".join([cookie.domain, initial_dot, cookie.path,
2112 secure, expires, name, value])+
2113 "\n")