blob: 3be327dd0063381f363f663ab28f736666d794ff [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
Martin Pantere6f06092016-05-16 01:14:20 +000094import string
Jeremy Hylton1afc1692008-06-18 20:49:58 +000095import sys
96import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000097import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070098import tempfile
99import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700100import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700101
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000102
Georg Brandl13e89462008-07-01 19:56:00 +0000103from urllib.error import URLError, HTTPError, ContentTooShortError
104from urllib.parse import (
105 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
106 splittype, splithost, splitport, splituser, splitpasswd,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100107 splitattr, splitquery, splitvalue, splittag, to_bytes,
108 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000109from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000110
111# check for SSL
112try:
113 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -0400114except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000115 _have_ssl = False
116else:
117 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000118
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800119__all__ = [
120 # Classes
121 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
122 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
123 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
R David Murray4c7f9952015-04-16 16:36:18 -0400124 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
125 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
126 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
127 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800128 'UnknownHandler', 'HTTPErrorProcessor',
129 # Functions
130 'urlopen', 'install_opener', 'build_opener',
131 'pathname2url', 'url2pathname', 'getproxies',
132 # Legacy interface
133 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
134]
135
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136# used in User-Agent header sent
137__version__ = sys.version[:3]
138
139_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000140def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800141 *, cafile=None, capath=None, cadefault=False, context=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000142 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200143 if cafile or capath or cadefault:
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800144 if context is not None:
145 raise ValueError(
146 "You can't pass both context and any of cafile, capath, and "
147 "cadefault"
148 )
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000149 if not _have_ssl:
150 raise ValueError('SSL support not available')
Benjamin Petersonb6666972014-12-07 13:46:02 -0500151 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
Christian Heimes67986f92013-11-23 22:43:47 +0100152 cafile=cafile,
153 capath=capath)
Benjamin Petersonb6666972014-12-07 13:46:02 -0500154 https_handler = HTTPSHandler(context=context)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000155 opener = build_opener(https_handler)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800156 elif context:
157 https_handler = HTTPSHandler(context=context)
158 opener = build_opener(https_handler)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000159 elif _opener is None:
160 _opener = opener = build_opener()
161 else:
162 opener = _opener
163 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000164
165def install_opener(opener):
166 global _opener
167 _opener = opener
168
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700169_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000170def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700171 """
172 Retrieve a URL into a temporary location on disk.
173
174 Requires a URL argument. If a filename is passed, it is used as
175 the temporary file location. The reporthook argument should be
176 a callable that accepts a block number, a read size, and the
177 total file size of the URL target. The data argument should be
178 valid URL encoded data.
179
180 If a filename is passed and the URL points to a local resource,
181 the result is a copy from local file to new file.
182
183 Returns a tuple containing the path to the newly created
184 data file as well as the resulting HTTPMessage object.
185 """
186 url_type, path = splittype(url)
187
188 with contextlib.closing(urlopen(url, data)) as fp:
189 headers = fp.info()
190
191 # Just return the local path and the "headers" for file://
192 # URLs. No sense in performing a copy unless requested.
193 if url_type == "file" and not filename:
194 return os.path.normpath(path), headers
195
196 # Handle temporary file setup.
197 if filename:
198 tfp = open(filename, 'wb')
199 else:
200 tfp = tempfile.NamedTemporaryFile(delete=False)
201 filename = tfp.name
202 _url_tempfiles.append(filename)
203
204 with tfp:
205 result = filename, headers
206 bs = 1024*8
207 size = -1
208 read = 0
209 blocknum = 0
210 if "content-length" in headers:
211 size = int(headers["Content-Length"])
212
213 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800214 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700215
216 while True:
217 block = fp.read(bs)
218 if not block:
219 break
220 read += len(block)
221 tfp.write(block)
222 blocknum += 1
223 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800224 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700225
226 if size >= 0 and read < size:
227 raise ContentTooShortError(
228 "retrieval incomplete: got only %i out of %i bytes"
229 % (read, size), result)
230
231 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000232
233def urlcleanup():
Robert Collins2fee5c92015-08-04 12:52:06 +1200234 """Clean up temporary files from urlretrieve calls."""
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700235 for temp_file in _url_tempfiles:
236 try:
237 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200238 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700239 pass
240
241 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000242 global _opener
243 if _opener:
244 _opener = None
245
246# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000247_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000248def request_host(request):
249 """Return request-host, as defined by RFC 2965.
250
251 Variation from RFC: returned value is lowercased, for convenient
252 comparison.
253
254 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000255 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000256 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000257 if host == "":
258 host = request.get_header("Host", "")
259
260 # remove port, if present
261 host = _cut_port_re.sub("", host, 1)
262 return host.lower()
263
264class Request:
265
266 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800267 origin_req_host=None, unverifiable=False,
268 method=None):
Senthil Kumaran52380922013-04-25 05:45:48 -0700269 self.full_url = url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000270 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200271 self.unredirected_hdrs = {}
272 self._data = None
273 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000274 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000275 for key, value in headers.items():
276 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000277 if origin_req_host is None:
278 origin_req_host = request_host(self)
279 self.origin_req_host = origin_req_host
280 self.unverifiable = unverifiable
Jason R. Coombs7dc4f4b2013-09-08 12:47:07 -0400281 if method:
282 self.method = method
Senthil Kumaran52380922013-04-25 05:45:48 -0700283
284 @property
285 def full_url(self):
Senthil Kumaran83070752013-05-24 09:14:12 -0700286 if self.fragment:
287 return '{}#{}'.format(self._full_url, self.fragment)
Senthil Kumaran52380922013-04-25 05:45:48 -0700288 return self._full_url
289
290 @full_url.setter
291 def full_url(self, url):
292 # unwrap('<URL:type://host/path>') --> 'type://host/path'
293 self._full_url = unwrap(url)
294 self._full_url, self.fragment = splittag(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000295 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000296
Senthil Kumaran52380922013-04-25 05:45:48 -0700297 @full_url.deleter
298 def full_url(self):
299 self._full_url = None
300 self.fragment = None
301 self.selector = ''
302
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200303 @property
304 def data(self):
305 return self._data
306
307 @data.setter
308 def data(self, data):
309 if data != self._data:
310 self._data = data
311 # issue 16464
312 # if we change data we need to remove content-length header
313 # (cause it's most probably calculated for previous value)
314 if self.has_header("Content-length"):
315 self.remove_header("Content-length")
316
317 @data.deleter
318 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400319 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200320
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000321 def _parse(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700322 self.type, rest = splittype(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000323 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400324 raise ValueError("unknown url type: %r" % self.full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000325 self.host, self.selector = splithost(rest)
326 if self.host:
327 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000328
329 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800330 """Return a string indicating the HTTP request method."""
Jason R. Coombsaae6a1d2013-09-08 12:54:33 -0400331 default_method = "POST" if self.data is not None else "GET"
332 return getattr(self, 'method', default_method)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000333
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000334 def get_full_url(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700335 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000336
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000337 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000338 if self.type == 'https' and not self._tunnel_host:
339 self._tunnel_host = self.host
340 else:
341 self.type= type
342 self.selector = self.full_url
343 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000344
345 def has_proxy(self):
346 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000347
348 def add_header(self, key, val):
349 # useful for something like authentication
350 self.headers[key.capitalize()] = val
351
352 def add_unredirected_header(self, key, val):
353 # will not be added to a redirected request
354 self.unredirected_hdrs[key.capitalize()] = val
355
356 def has_header(self, header_name):
357 return (header_name in self.headers or
358 header_name in self.unredirected_hdrs)
359
360 def get_header(self, header_name, default=None):
361 return self.headers.get(
362 header_name,
363 self.unredirected_hdrs.get(header_name, default))
364
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200365 def remove_header(self, header_name):
366 self.headers.pop(header_name, None)
367 self.unredirected_hdrs.pop(header_name, None)
368
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000369 def header_items(self):
370 hdrs = self.unredirected_hdrs.copy()
371 hdrs.update(self.headers)
372 return list(hdrs.items())
373
374class OpenerDirector:
375 def __init__(self):
376 client_version = "Python-urllib/%s" % __version__
377 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000378 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000379 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000380 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000381 self.handle_open = {}
382 self.handle_error = {}
383 self.process_response = {}
384 self.process_request = {}
385
386 def add_handler(self, handler):
387 if not hasattr(handler, "add_parent"):
388 raise TypeError("expected BaseHandler instance, got %r" %
389 type(handler))
390
391 added = False
392 for meth in dir(handler):
393 if meth in ["redirect_request", "do_open", "proxy_open"]:
394 # oops, coincidental match
395 continue
396
397 i = meth.find("_")
398 protocol = meth[:i]
399 condition = meth[i+1:]
400
401 if condition.startswith("error"):
402 j = condition.find("_") + i + 1
403 kind = meth[j+1:]
404 try:
405 kind = int(kind)
406 except ValueError:
407 pass
408 lookup = self.handle_error.get(protocol, {})
409 self.handle_error[protocol] = lookup
410 elif condition == "open":
411 kind = protocol
412 lookup = self.handle_open
413 elif condition == "response":
414 kind = protocol
415 lookup = self.process_response
416 elif condition == "request":
417 kind = protocol
418 lookup = self.process_request
419 else:
420 continue
421
422 handlers = lookup.setdefault(kind, [])
423 if handlers:
424 bisect.insort(handlers, handler)
425 else:
426 handlers.append(handler)
427 added = True
428
429 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000430 bisect.insort(self.handlers, handler)
431 handler.add_parent(self)
432
433 def close(self):
434 # Only exists for backwards compatibility.
435 pass
436
437 def _call_chain(self, chain, kind, meth_name, *args):
438 # Handlers raise an exception if no one else should try to handle
439 # the request, or return None if they can't but another handler
440 # could. Otherwise, they return the response.
441 handlers = chain.get(kind, ())
442 for handler in handlers:
443 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000444 result = func(*args)
445 if result is not None:
446 return result
447
448 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
449 # accept a URL or a Request object
450 if isinstance(fullurl, str):
451 req = Request(fullurl, data)
452 else:
453 req = fullurl
454 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000455 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000456
457 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000458 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000459
460 # pre-process request
461 meth_name = protocol+"_request"
462 for processor in self.process_request.get(protocol, []):
463 meth = getattr(processor, meth_name)
464 req = meth(req)
465
466 response = self._open(req, data)
467
468 # post-process response
469 meth_name = protocol+"_response"
470 for processor in self.process_response.get(protocol, []):
471 meth = getattr(processor, meth_name)
472 response = meth(req, response)
473
474 return response
475
476 def _open(self, req, data=None):
477 result = self._call_chain(self.handle_open, 'default',
478 'default_open', req)
479 if result:
480 return result
481
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000482 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000483 result = self._call_chain(self.handle_open, protocol, protocol +
484 '_open', req)
485 if result:
486 return result
487
488 return self._call_chain(self.handle_open, 'unknown',
489 'unknown_open', req)
490
491 def error(self, proto, *args):
492 if proto in ('http', 'https'):
493 # XXX http[s] protocols are special-cased
494 dict = self.handle_error['http'] # https is not different than http
495 proto = args[2] # YUCK!
496 meth_name = 'http_error_%s' % proto
497 http_err = 1
498 orig_args = args
499 else:
500 dict = self.handle_error
501 meth_name = proto + '_error'
502 http_err = 0
503 args = (dict, proto, meth_name) + args
504 result = self._call_chain(*args)
505 if result:
506 return result
507
508 if http_err:
509 args = (dict, 'default', 'http_error_default') + orig_args
510 return self._call_chain(*args)
511
512# XXX probably also want an abstract factory that knows when it makes
513# sense to skip a superclass in favor of a subclass and when it might
514# make sense to include both
515
516def build_opener(*handlers):
517 """Create an opener object from a list of handlers.
518
519 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000520 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000521
522 If any of the handlers passed as arguments are subclasses of the
523 default handlers, the default handlers will not be used.
524 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000525 opener = OpenerDirector()
526 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
527 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100528 FTPHandler, FileHandler, HTTPErrorProcessor,
529 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000530 if hasattr(http.client, "HTTPSConnection"):
531 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000532 skip = set()
533 for klass in default_classes:
534 for check in handlers:
Benjamin Peterson78c85382014-04-01 16:27:30 -0400535 if isinstance(check, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000536 if issubclass(check, klass):
537 skip.add(klass)
538 elif isinstance(check, klass):
539 skip.add(klass)
540 for klass in skip:
541 default_classes.remove(klass)
542
543 for klass in default_classes:
544 opener.add_handler(klass())
545
546 for h in handlers:
Benjamin Peterson5dd3cae2014-04-01 14:20:56 -0400547 if isinstance(h, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000548 h = h()
549 opener.add_handler(h)
550 return opener
551
552class BaseHandler:
553 handler_order = 500
554
555 def add_parent(self, parent):
556 self.parent = parent
557
558 def close(self):
559 # Only exists for backwards compatibility
560 pass
561
562 def __lt__(self, other):
563 if not hasattr(other, "handler_order"):
564 # Try to preserve the old behavior of having custom classes
565 # inserted after default ones (works only for custom user
566 # classes which are not aware of handler_order).
567 return True
568 return self.handler_order < other.handler_order
569
570
571class HTTPErrorProcessor(BaseHandler):
572 """Process HTTP error responses."""
573 handler_order = 1000 # after all other processing
574
575 def http_response(self, request, response):
576 code, msg, hdrs = response.code, response.msg, response.info()
577
578 # According to RFC 2616, "2xx" code indicates that the client's
579 # request was successfully received, understood, and accepted.
580 if not (200 <= code < 300):
581 response = self.parent.error(
582 'http', request, response, code, msg, hdrs)
583
584 return response
585
586 https_response = http_response
587
588class HTTPDefaultErrorHandler(BaseHandler):
589 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000590 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000591
592class HTTPRedirectHandler(BaseHandler):
593 # maximum number of redirections to any single URL
594 # this is needed because of the state that cookies introduce
595 max_repeats = 4
596 # maximum total number of redirections (regardless of URL) before
597 # assuming we're in a loop
598 max_redirections = 10
599
600 def redirect_request(self, req, fp, code, msg, headers, newurl):
601 """Return a Request or None in response to a redirect.
602
603 This is called by the http_error_30x methods when a
604 redirection response is received. If a redirection should
605 take place, return a new Request to allow http_error_30x to
606 perform the redirect. Otherwise, raise HTTPError if no-one
607 else should try to handle this url. Return None if you can't
608 but another Handler might.
609 """
610 m = req.get_method()
611 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
612 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000613 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000614
615 # Strictly (according to RFC 2616), 301 or 302 in response to
616 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000617 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000618 # essentially all clients do redirect in this case, so we do
619 # the same.
Martin Pantere6f06092016-05-16 01:14:20 +0000620
621 # Be conciliant with URIs containing a space. This is mainly
622 # redundant with the more complete encoding done in http_error_302(),
623 # but it is kept for compatibility with other callers.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000624 newurl = newurl.replace(' ', '%20')
Martin Pantere6f06092016-05-16 01:14:20 +0000625
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000626 CONTENT_HEADERS = ("content-length", "content-type")
627 newheaders = dict((k, v) for k, v in req.headers.items()
628 if k.lower() not in CONTENT_HEADERS)
629 return Request(newurl,
630 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000631 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000632 unverifiable=True)
633
634 # Implementation note: To avoid the server sending us into an
635 # infinite loop, the request object needs to track what URLs we
636 # have already seen. Do this by adding a handler-specific
637 # attribute to the Request object.
638 def http_error_302(self, req, fp, code, msg, headers):
639 # Some servers (incorrectly) return multiple Location headers
640 # (so probably same goes for URI). Use first header.
641 if "location" in headers:
642 newurl = headers["location"]
643 elif "uri" in headers:
644 newurl = headers["uri"]
645 else:
646 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000647
648 # fix a possible malformed URL
649 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700650
651 # For security reasons we don't allow redirection to anything other
652 # than http, https or ftp.
653
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800654 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800655 raise HTTPError(
656 newurl, code,
657 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
658 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700659
Martin Panterce6e0682016-05-16 01:07:13 +0000660 if not urlparts.path and urlparts.netloc:
Facundo Batistaf24802c2008-08-17 03:36:03 +0000661 urlparts = list(urlparts)
662 urlparts[2] = "/"
663 newurl = urlunparse(urlparts)
664
Martin Pantere6f06092016-05-16 01:14:20 +0000665 # http.client.parse_headers() decodes as ISO-8859-1. Recover the
666 # original bytes and percent-encode non-ASCII bytes, and any special
667 # characters such as the space.
668 newurl = quote(
669 newurl, encoding="iso-8859-1", safe=string.punctuation)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000670 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000671
672 # XXX Probably want to forget about the state of the current
673 # request, although that might interact poorly with other
674 # handlers that also use handler-specific request attributes
675 new = self.redirect_request(req, fp, code, msg, headers, newurl)
676 if new is None:
677 return
678
679 # loop detection
680 # .redirect_dict has a key url if url was previously visited.
681 if hasattr(req, 'redirect_dict'):
682 visited = new.redirect_dict = req.redirect_dict
683 if (visited.get(newurl, 0) >= self.max_repeats or
684 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000685 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000686 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000687 else:
688 visited = new.redirect_dict = req.redirect_dict = {}
689 visited[newurl] = visited.get(newurl, 0) + 1
690
691 # Don't close the fp until we are sure that we won't use it
692 # with HTTPError.
693 fp.read()
694 fp.close()
695
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000696 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000697
698 http_error_301 = http_error_303 = http_error_307 = http_error_302
699
700 inf_msg = "The HTTP server returned a redirect error that would " \
701 "lead to an infinite loop.\n" \
702 "The last 30x error message was:\n"
703
704
705def _parse_proxy(proxy):
706 """Return (scheme, user, password, host/port) given a URL or an authority.
707
708 If a URL is supplied, it must have an authority (host:port) component.
709 According to RFC 3986, having an authority component means the URL must
Senthil Kumarand8e24f12014-04-14 16:32:20 -0400710 have two slashes after the scheme.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000711 """
Georg Brandl13e89462008-07-01 19:56:00 +0000712 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000713 if not r_scheme.startswith("/"):
714 # authority
715 scheme = None
716 authority = proxy
717 else:
718 # URL
719 if not r_scheme.startswith("//"):
720 raise ValueError("proxy URL with no authority: %r" % proxy)
721 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
722 # and 3.3.), path is empty or starts with '/'
723 end = r_scheme.find("/", 2)
724 if end == -1:
725 end = None
726 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000727 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000728 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000729 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000730 else:
731 user = password = None
732 return scheme, user, password, hostport
733
734class ProxyHandler(BaseHandler):
735 # Proxies must be in front
736 handler_order = 100
737
738 def __init__(self, proxies=None):
739 if proxies is None:
740 proxies = getproxies()
741 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
742 self.proxies = proxies
743 for type, url in proxies.items():
744 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200745 lambda r, proxy=url, type=type, meth=self.proxy_open:
746 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747
748 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000749 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750 proxy_type, user, password, hostport = _parse_proxy(proxy)
751 if proxy_type is None:
752 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000753
754 if req.host and proxy_bypass(req.host):
755 return None
756
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000757 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000758 user_pass = '%s:%s' % (unquote(user),
759 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000760 creds = base64.b64encode(user_pass.encode()).decode("ascii")
761 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000762 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000763 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000764 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000765 # let other handlers take care of it
766 return None
767 else:
768 # need to start over, because the other handlers don't
769 # grok the proxy's URL type
770 # e.g. if we have a constructor arg proxies like so:
771 # {'http': 'ftp://proxy.example.com'}, we may end up turning
772 # a request for http://acme.example.com/a into one for
773 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000774 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000775
776class HTTPPasswordMgr:
777
778 def __init__(self):
779 self.passwd = {}
780
781 def add_password(self, realm, uri, user, passwd):
782 # uri could be a single URI or a sequence
783 if isinstance(uri, str):
784 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800785 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000786 self.passwd[realm] = {}
787 for default_port in True, False:
788 reduced_uri = tuple(
789 [self.reduce_uri(u, default_port) for u in uri])
790 self.passwd[realm][reduced_uri] = (user, passwd)
791
792 def find_user_password(self, realm, authuri):
793 domains = self.passwd.get(realm, {})
794 for default_port in True, False:
795 reduced_authuri = self.reduce_uri(authuri, default_port)
796 for uris, authinfo in domains.items():
797 for uri in uris:
798 if self.is_suburi(uri, reduced_authuri):
799 return authinfo
800 return None, None
801
802 def reduce_uri(self, uri, default_port=True):
803 """Accept authority or URI and extract only the authority and path."""
804 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000805 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000806 if parts[1]:
807 # URI
808 scheme = parts[0]
809 authority = parts[1]
810 path = parts[2] or '/'
811 else:
812 # host or host:port
813 scheme = None
814 authority = uri
815 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000816 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000817 if default_port and port is None and scheme is not None:
818 dport = {"http": 80,
819 "https": 443,
820 }.get(scheme)
821 if dport is not None:
822 authority = "%s:%d" % (host, dport)
823 return authority, path
824
825 def is_suburi(self, base, test):
826 """Check if test is below base in a URI tree
827
828 Both args must be URIs in reduced form.
829 """
830 if base == test:
831 return True
832 if base[0] != test[0]:
833 return False
834 common = posixpath.commonprefix((base[1], test[1]))
835 if len(common) == len(base[1]):
836 return True
837 return False
838
839
840class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
841
842 def find_user_password(self, realm, authuri):
843 user, password = HTTPPasswordMgr.find_user_password(self, realm,
844 authuri)
845 if user is not None:
846 return user, password
847 return HTTPPasswordMgr.find_user_password(self, None, authuri)
848
849
R David Murray4c7f9952015-04-16 16:36:18 -0400850class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
851
852 def __init__(self, *args, **kwargs):
853 self.authenticated = {}
854 super().__init__(*args, **kwargs)
855
856 def add_password(self, realm, uri, user, passwd, is_authenticated=False):
857 self.update_authenticated(uri, is_authenticated)
858 # Add a default for prior auth requests
859 if realm is not None:
860 super().add_password(None, uri, user, passwd)
861 super().add_password(realm, uri, user, passwd)
862
863 def update_authenticated(self, uri, is_authenticated=False):
864 # uri could be a single URI or a sequence
865 if isinstance(uri, str):
866 uri = [uri]
867
868 for default_port in True, False:
869 for u in uri:
870 reduced_uri = self.reduce_uri(u, default_port)
871 self.authenticated[reduced_uri] = is_authenticated
872
873 def is_authenticated(self, authuri):
874 for default_port in True, False:
875 reduced_authuri = self.reduce_uri(authuri, default_port)
876 for uri in self.authenticated:
877 if self.is_suburi(uri, reduced_authuri):
878 return self.authenticated[uri]
879
880
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000881class AbstractBasicAuthHandler:
882
883 # XXX this allows for multiple auth-schemes, but will stupidly pick
884 # the last one with a realm specified.
885
886 # allow for double- and single-quoted realm values
887 # (single quotes are a violation of the RFC, but appear in the wild)
888 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800889 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000890
891 # XXX could pre-emptively send auth info already accepted (RFC 2617,
892 # end of section 2, and section 1.2 immediately after "credentials"
893 # production).
894
895 def __init__(self, password_mgr=None):
896 if password_mgr is None:
897 password_mgr = HTTPPasswordMgr()
898 self.passwd = password_mgr
899 self.add_password = self.passwd.add_password
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000900
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000901 def http_error_auth_reqed(self, authreq, host, req, headers):
902 # host may be an authority (without userinfo) or a URL with an
903 # authority
904 # XXX could be multiple headers
905 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000906
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000907 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800908 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800909 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800910 raise ValueError("AbstractBasicAuthHandler does not"
911 " support the following scheme: '%s'" %
912 scheme)
913 else:
914 mo = AbstractBasicAuthHandler.rx.search(authreq)
915 if mo:
916 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800917 if quote not in ['"',"'"]:
918 warnings.warn("Basic Auth Realm was unquoted",
919 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800920 if scheme.lower() == 'basic':
Senthil Kumaran78373762014-08-20 07:53:58 +0530921 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000922
923 def retry_http_basic_auth(self, host, req, realm):
924 user, pw = self.passwd.find_user_password(realm, host)
925 if pw is not None:
926 raw = "%s:%s" % (user, pw)
927 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
Senthil Kumaran78373762014-08-20 07:53:58 +0530928 if req.get_header(self.auth_header, None) == auth:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000929 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000930 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000931 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000932 else:
933 return None
934
R David Murray4c7f9952015-04-16 16:36:18 -0400935 def http_request(self, req):
936 if (not hasattr(self.passwd, 'is_authenticated') or
937 not self.passwd.is_authenticated(req.full_url)):
938 return req
939
940 if not req.has_header('Authorization'):
941 user, passwd = self.passwd.find_user_password(None, req.full_url)
942 credentials = '{0}:{1}'.format(user, passwd).encode()
943 auth_str = base64.standard_b64encode(credentials).decode()
944 req.add_unredirected_header('Authorization',
945 'Basic {}'.format(auth_str.strip()))
946 return req
947
948 def http_response(self, req, response):
949 if hasattr(self.passwd, 'is_authenticated'):
950 if 200 <= response.code < 300:
951 self.passwd.update_authenticated(req.full_url, True)
952 else:
953 self.passwd.update_authenticated(req.full_url, False)
954 return response
955
956 https_request = http_request
957 https_response = http_response
958
959
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000960
961class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
962
963 auth_header = 'Authorization'
964
965 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000966 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000967 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000968 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000969 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000970
971
972class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
973
974 auth_header = 'Proxy-authorization'
975
976 def http_error_407(self, req, fp, code, msg, headers):
977 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000978 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000979 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
980 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000981 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000982 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000983 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000984 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000985
986
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800987# Return n random bytes.
988_randombytes = os.urandom
989
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000990
991class AbstractDigestAuthHandler:
992 # Digest authentication is specified in RFC 2617.
993
994 # XXX The client does not inspect the Authentication-Info header
995 # in a successful response.
996
997 # XXX It should be possible to test this implementation against
998 # a mock server that just generates a static set of challenges.
999
1000 # XXX qop="auth-int" supports is shaky
1001
1002 def __init__(self, passwd=None):
1003 if passwd is None:
1004 passwd = HTTPPasswordMgr()
1005 self.passwd = passwd
1006 self.add_password = self.passwd.add_password
1007 self.retried = 0
1008 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001009 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001010
1011 def reset_retry_count(self):
1012 self.retried = 0
1013
1014 def http_error_auth_reqed(self, auth_header, host, req, headers):
1015 authreq = headers.get(auth_header, None)
1016 if self.retried > 5:
1017 # Don't fail endlessly - if we failed once, we'll probably
1018 # fail a second time. Hm. Unless the Password Manager is
1019 # prompting for the information. Crap. This isn't great
1020 # but it's better than the current 'repeat until recursion
1021 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001022 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001023 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001024 else:
1025 self.retried += 1
1026 if authreq:
1027 scheme = authreq.split()[0]
1028 if scheme.lower() == 'digest':
1029 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001030 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001031 raise ValueError("AbstractDigestAuthHandler does not support"
1032 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001033
1034 def retry_http_digest_auth(self, req, auth):
1035 token, challenge = auth.split(' ', 1)
1036 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1037 auth = self.get_authorization(req, chal)
1038 if auth:
1039 auth_val = 'Digest %s' % auth
1040 if req.headers.get(self.auth_header, None) == auth_val:
1041 return None
1042 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001043 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001044 return resp
1045
1046 def get_cnonce(self, nonce):
1047 # The cnonce-value is an opaque
1048 # quoted string value provided by the client and used by both client
1049 # and server to avoid chosen plaintext attacks, to provide mutual
1050 # authentication, and to provide some message integrity protection.
1051 # This isn't a fabulous effort, but it's probably Good Enough.
1052 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001053 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001054 dig = hashlib.sha1(b).hexdigest()
1055 return dig[:16]
1056
1057 def get_authorization(self, req, chal):
1058 try:
1059 realm = chal['realm']
1060 nonce = chal['nonce']
1061 qop = chal.get('qop')
1062 algorithm = chal.get('algorithm', 'MD5')
1063 # mod_digest doesn't send an opaque, even though it isn't
1064 # supposed to be optional
1065 opaque = chal.get('opaque', None)
1066 except KeyError:
1067 return None
1068
1069 H, KD = self.get_algorithm_impls(algorithm)
1070 if H is None:
1071 return None
1072
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001073 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001074 if user is None:
1075 return None
1076
1077 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001078 if req.data is not None:
1079 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001080 else:
1081 entdig = None
1082
1083 A1 = "%s:%s:%s" % (user, realm, pw)
1084 A2 = "%s:%s" % (req.get_method(),
1085 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001086 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001087 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001088 if nonce == self.last_nonce:
1089 self.nonce_count += 1
1090 else:
1091 self.nonce_count = 1
1092 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001093 ncvalue = '%08x' % self.nonce_count
1094 cnonce = self.get_cnonce(nonce)
1095 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1096 respdig = KD(H(A1), noncebit)
1097 elif qop is None:
1098 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1099 else:
1100 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001101 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001102
1103 # XXX should the partial digests be encoded too?
1104
1105 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001106 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001107 respdig)
1108 if opaque:
1109 base += ', opaque="%s"' % opaque
1110 if entdig:
1111 base += ', digest="%s"' % entdig
1112 base += ', algorithm="%s"' % algorithm
1113 if qop:
1114 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1115 return base
1116
1117 def get_algorithm_impls(self, algorithm):
1118 # lambdas assume digest modules are imported at the top level
1119 if algorithm == 'MD5':
1120 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1121 elif algorithm == 'SHA':
1122 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1123 # XXX MD5-sess
Berker Peksage88dd1c2016-03-06 16:16:40 +02001124 else:
1125 raise ValueError("Unsupported digest authentication "
1126 "algorithm %r" % algorithm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001127 KD = lambda s, d: H("%s:%s" % (s, d))
1128 return H, KD
1129
1130 def get_entity_digest(self, data, chal):
1131 # XXX not implemented yet
1132 return None
1133
1134
1135class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1136 """An authentication protocol defined by RFC 2069
1137
1138 Digest authentication improves on basic authentication because it
1139 does not transmit passwords in the clear.
1140 """
1141
1142 auth_header = 'Authorization'
1143 handler_order = 490 # before Basic auth
1144
1145 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001146 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001147 retry = self.http_error_auth_reqed('www-authenticate',
1148 host, req, headers)
1149 self.reset_retry_count()
1150 return retry
1151
1152
1153class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1154
1155 auth_header = 'Proxy-Authorization'
1156 handler_order = 490 # before Basic auth
1157
1158 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001159 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 retry = self.http_error_auth_reqed('proxy-authenticate',
1161 host, req, headers)
1162 self.reset_retry_count()
1163 return retry
1164
1165class AbstractHTTPHandler(BaseHandler):
1166
1167 def __init__(self, debuglevel=0):
1168 self._debuglevel = debuglevel
1169
1170 def set_http_debuglevel(self, level):
1171 self._debuglevel = level
1172
1173 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001174 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001175 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001176 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001177
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001178 if request.data is not None: # POST
1179 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001180 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001181 msg = "POST data should be bytes or an iterable of bytes. " \
1182 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001183 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001184 if not request.has_header('Content-type'):
1185 request.add_unredirected_header(
1186 'Content-type',
1187 'application/x-www-form-urlencoded')
1188 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001189 try:
1190 mv = memoryview(data)
1191 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001192 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001193 raise ValueError("Content-Length should be specified "
1194 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001195 data))
1196 else:
1197 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001198 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001199
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001200 sel_host = host
1201 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001202 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001203 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001204 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001205 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001206 for name, value in self.parent.addheaders:
1207 name = name.capitalize()
1208 if not request.has_header(name):
1209 request.add_unredirected_header(name, value)
1210
1211 return request
1212
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001213 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001214 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215
1216 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001218 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001219 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001220 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001221
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001222 # will parse host:port
1223 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran9642eed2016-05-13 01:32:42 -07001224 h.set_debuglevel(self._debuglevel)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001225
1226 headers = dict(req.unredirected_hdrs)
1227 headers.update(dict((k, v) for k, v in req.headers.items()
1228 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001229
1230 # TODO(jhylton): Should this be redesigned to handle
1231 # persistent connections?
1232
1233 # We want to make an HTTP/1.1 request, but the addinfourl
1234 # class isn't prepared to deal with a persistent connection.
1235 # It will try to read all remaining data from the socket,
1236 # which will block while the server waits for the next request.
1237 # So make sure the connection gets closed after the (only)
1238 # request.
1239 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001240 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001241
1242 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001243 tunnel_headers = {}
1244 proxy_auth_hdr = "Proxy-Authorization"
1245 if proxy_auth_hdr in headers:
1246 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1247 # Proxy-Authorization should not be sent to origin
1248 # server.
1249 del headers[proxy_auth_hdr]
1250 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001251
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001252 try:
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001253 try:
1254 h.request(req.get_method(), req.selector, req.data, headers)
1255 except OSError as err: # timeout error
1256 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001257 r = h.getresponse()
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001258 except:
1259 h.close()
1260 raise
1261
1262 # If the server does not send us a 'Connection: close' header,
1263 # HTTPConnection assumes the socket should be left open. Manually
1264 # mark the socket to be closed when this response object goes away.
1265 if h.sock:
1266 h.sock.close()
1267 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001268
Senthil Kumaran26430412011-04-13 07:01:19 +08001269 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001270 # This line replaces the .msg attribute of the HTTPResponse
1271 # with .headers, because urllib clients expect the response to
1272 # have the reason in .msg. It would be good to mark this
1273 # attribute is deprecated and get then to use info() or
1274 # .headers.
1275 r.msg = r.reason
1276 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001277
1278
1279class HTTPHandler(AbstractHTTPHandler):
1280
1281 def http_open(self, req):
1282 return self.do_open(http.client.HTTPConnection, req)
1283
1284 http_request = AbstractHTTPHandler.do_request_
1285
1286if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001287
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001288 class HTTPSHandler(AbstractHTTPHandler):
1289
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001290 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1291 AbstractHTTPHandler.__init__(self, debuglevel)
1292 self._context = context
1293 self._check_hostname = check_hostname
1294
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001295 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001296 return self.do_open(http.client.HTTPSConnection, req,
1297 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001298
1299 https_request = AbstractHTTPHandler.do_request_
1300
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001301 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001302
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001303class HTTPCookieProcessor(BaseHandler):
1304 def __init__(self, cookiejar=None):
1305 import http.cookiejar
1306 if cookiejar is None:
1307 cookiejar = http.cookiejar.CookieJar()
1308 self.cookiejar = cookiejar
1309
1310 def http_request(self, request):
1311 self.cookiejar.add_cookie_header(request)
1312 return request
1313
1314 def http_response(self, request, response):
1315 self.cookiejar.extract_cookies(response, request)
1316 return response
1317
1318 https_request = http_request
1319 https_response = http_response
1320
1321class UnknownHandler(BaseHandler):
1322 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001323 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001324 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001325
1326def parse_keqv_list(l):
1327 """Parse list of key=value strings where keys are not duplicated."""
1328 parsed = {}
1329 for elt in l:
1330 k, v = elt.split('=', 1)
1331 if v[0] == '"' and v[-1] == '"':
1332 v = v[1:-1]
1333 parsed[k] = v
1334 return parsed
1335
1336def parse_http_list(s):
1337 """Parse lists as described by RFC 2068 Section 2.
1338
1339 In particular, parse comma-separated lists where the elements of
1340 the list may include quoted-strings. A quoted-string could
1341 contain a comma. A non-quoted string could have quotes in the
1342 middle. Neither commas nor quotes count if they are escaped.
1343 Only double-quotes count, not single-quotes.
1344 """
1345 res = []
1346 part = ''
1347
1348 escape = quote = False
1349 for cur in s:
1350 if escape:
1351 part += cur
1352 escape = False
1353 continue
1354 if quote:
1355 if cur == '\\':
1356 escape = True
1357 continue
1358 elif cur == '"':
1359 quote = False
1360 part += cur
1361 continue
1362
1363 if cur == ',':
1364 res.append(part)
1365 part = ''
1366 continue
1367
1368 if cur == '"':
1369 quote = True
1370
1371 part += cur
1372
1373 # append last part
1374 if part:
1375 res.append(part)
1376
1377 return [part.strip() for part in res]
1378
1379class FileHandler(BaseHandler):
1380 # Use local file or FTP depending on form of URL
1381 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001382 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001383 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1384 req.host != 'localhost'):
Senthil Kumaranbc07ac52014-07-22 00:15:20 -07001385 if not req.host in self.get_names():
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001386 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001387 else:
1388 return self.open_local_file(req)
1389
1390 # names for the localhost
1391 names = None
1392 def get_names(self):
1393 if FileHandler.names is None:
1394 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001395 FileHandler.names = tuple(
1396 socket.gethostbyname_ex('localhost')[2] +
1397 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001398 except socket.gaierror:
1399 FileHandler.names = (socket.gethostbyname('localhost'),)
1400 return FileHandler.names
1401
1402 # not entirely sure what the rules are here
1403 def open_local_file(self, req):
1404 import email.utils
1405 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001406 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001407 filename = req.selector
1408 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001409 try:
1410 stats = os.stat(localfile)
1411 size = stats.st_size
1412 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001413 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001414 headers = email.message_from_string(
1415 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1416 (mtype or 'text/plain', size, modified))
1417 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001418 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001419 if not host or \
1420 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001421 if host:
1422 origurl = 'file://' + host + filename
1423 else:
1424 origurl = 'file://' + filename
1425 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001426 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001427 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001428 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001429 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001430
1431def _safe_gethostbyname(host):
1432 try:
1433 return socket.gethostbyname(host)
1434 except socket.gaierror:
1435 return None
1436
1437class FTPHandler(BaseHandler):
1438 def ftp_open(self, req):
1439 import ftplib
1440 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001441 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001442 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001443 raise URLError('ftp error: no host given')
1444 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001445 if port is None:
1446 port = ftplib.FTP_PORT
1447 else:
1448 port = int(port)
1449
1450 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001451 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001452 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001453 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001454 else:
1455 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001456 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001457 user = user or ''
1458 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001459
1460 try:
1461 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001462 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001463 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001464 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001465 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001466 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001467 dirs, file = dirs[:-1], dirs[-1]
1468 if dirs and not dirs[0]:
1469 dirs = dirs[1:]
1470 try:
1471 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1472 type = file and 'I' or 'D'
1473 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001474 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001475 if attr.lower() == 'type' and \
1476 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1477 type = value.upper()
1478 fp, retrlen = fw.retrfile(file, type)
1479 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001480 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001481 if mtype:
1482 headers += "Content-type: %s\n" % mtype
1483 if retrlen is not None and retrlen >= 0:
1484 headers += "Content-length: %d\n" % retrlen
1485 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001486 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001487 except ftplib.all_errors as exp:
1488 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001489 raise exc.with_traceback(sys.exc_info()[2])
1490
1491 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001492 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1493 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001494
1495class CacheFTPHandler(FTPHandler):
1496 # XXX would be nice to have pluggable cache strategies
1497 # XXX this stuff is definitely not thread safe
1498 def __init__(self):
1499 self.cache = {}
1500 self.timeout = {}
1501 self.soonest = 0
1502 self.delay = 60
1503 self.max_conns = 16
1504
1505 def setTimeout(self, t):
1506 self.delay = t
1507
1508 def setMaxConns(self, m):
1509 self.max_conns = m
1510
1511 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1512 key = user, host, port, '/'.join(dirs), timeout
1513 if key in self.cache:
1514 self.timeout[key] = time.time() + self.delay
1515 else:
1516 self.cache[key] = ftpwrapper(user, passwd, host, port,
1517 dirs, timeout)
1518 self.timeout[key] = time.time() + self.delay
1519 self.check_cache()
1520 return self.cache[key]
1521
1522 def check_cache(self):
1523 # first check for old ones
1524 t = time.time()
1525 if self.soonest <= t:
1526 for k, v in list(self.timeout.items()):
1527 if v < t:
1528 self.cache[k].close()
1529 del self.cache[k]
1530 del self.timeout[k]
1531 self.soonest = min(list(self.timeout.values()))
1532
1533 # then check the size
1534 if len(self.cache) == self.max_conns:
1535 for k, v in list(self.timeout.items()):
1536 if v == self.soonest:
1537 del self.cache[k]
1538 del self.timeout[k]
1539 break
1540 self.soonest = min(list(self.timeout.values()))
1541
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001542 def clear_cache(self):
1543 for conn in self.cache.values():
1544 conn.close()
1545 self.cache.clear()
1546 self.timeout.clear()
1547
Antoine Pitroudf204be2012-11-24 17:59:08 +01001548class DataHandler(BaseHandler):
1549 def data_open(self, req):
1550 # data URLs as specified in RFC 2397.
1551 #
1552 # ignores POSTed data
1553 #
1554 # syntax:
1555 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1556 # mediatype := [ type "/" subtype ] *( ";" parameter )
1557 # data := *urlchar
1558 # parameter := attribute "=" value
1559 url = req.full_url
1560
1561 scheme, data = url.split(":",1)
1562 mediatype, data = data.split(",",1)
1563
1564 # even base64 encoded data URLs might be quoted so unquote in any case:
1565 data = unquote_to_bytes(data)
1566 if mediatype.endswith(";base64"):
1567 data = base64.decodebytes(data)
1568 mediatype = mediatype[:-7]
1569
1570 if not mediatype:
1571 mediatype = "text/plain;charset=US-ASCII"
1572
1573 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1574 (mediatype, len(data)))
1575
1576 return addinfourl(io.BytesIO(data), headers, url)
1577
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001578
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001579# Code move from the old urllib module
1580
1581MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1582
1583# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001584if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001585 from nturl2path import url2pathname, pathname2url
1586else:
1587 def url2pathname(pathname):
1588 """OS-specific conversion from a relative URL of the 'file' scheme
1589 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001590 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001591
1592 def pathname2url(pathname):
1593 """OS-specific conversion from a file system path to a relative URL
1594 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001595 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001596
1597# This really consists of two pieces:
1598# (1) a class which handles opening of all sorts of URLs
1599# (plus assorted utilities etc.)
1600# (2) a set of functions for parsing URLs
1601# XXX Should these be separated out into different modules?
1602
1603
1604ftpcache = {}
1605class URLopener:
1606 """Class to open URLs.
1607 This is a class rather than just a subroutine because we may need
1608 more than one set of global protocol-specific options.
1609 Note -- this is a base class for those who don't want the
1610 automatic handling of errors type 302 (relocated) and 401
1611 (authorization needed)."""
1612
1613 __tempfiles = None
1614
1615 version = "Python-urllib/%s" % __version__
1616
1617 # Constructor
1618 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001619 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001620 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1621 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001622 if proxies is None:
1623 proxies = getproxies()
1624 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1625 self.proxies = proxies
1626 self.key_file = x509.get('key_file')
1627 self.cert_file = x509.get('cert_file')
1628 self.addheaders = [('User-Agent', self.version)]
1629 self.__tempfiles = []
1630 self.__unlink = os.unlink # See cleanup()
1631 self.tempcache = None
1632 # Undocumented feature: if you assign {} to tempcache,
1633 # it is used to cache files retrieved with
1634 # self.retrieve(). This is not enabled by default
1635 # since it does not work for changing documents (and I
1636 # haven't got the logic to check expiration headers
1637 # yet).
1638 self.ftpcache = ftpcache
1639 # Undocumented feature: you can use a different
1640 # ftp cache by assigning to the .ftpcache member;
1641 # in case you want logically independent URL openers
1642 # XXX This is not threadsafe. Bah.
1643
1644 def __del__(self):
1645 self.close()
1646
1647 def close(self):
1648 self.cleanup()
1649
1650 def cleanup(self):
1651 # This code sometimes runs when the rest of this module
1652 # has already been deleted, so it can't use any globals
1653 # or import anything.
1654 if self.__tempfiles:
1655 for file in self.__tempfiles:
1656 try:
1657 self.__unlink(file)
1658 except OSError:
1659 pass
1660 del self.__tempfiles[:]
1661 if self.tempcache:
1662 self.tempcache.clear()
1663
1664 def addheader(self, *args):
1665 """Add a header to be used by the HTTP interface only
1666 e.g. u.addheader('Accept', 'sound/basic')"""
1667 self.addheaders.append(args)
1668
1669 # External interface
1670 def open(self, fullurl, data=None):
1671 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001672 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001673 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001674 if self.tempcache and fullurl in self.tempcache:
1675 filename, headers = self.tempcache[fullurl]
1676 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001677 return addinfourl(fp, headers, fullurl)
1678 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001679 if not urltype:
1680 urltype = 'file'
1681 if urltype in self.proxies:
1682 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001683 urltype, proxyhost = splittype(proxy)
1684 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001685 url = (host, fullurl) # Signal special case to open_*()
1686 else:
1687 proxy = None
1688 name = 'open_' + urltype
1689 self.type = urltype
1690 name = name.replace('-', '_')
1691 if not hasattr(self, name):
1692 if proxy:
1693 return self.open_unknown_proxy(proxy, fullurl, data)
1694 else:
1695 return self.open_unknown(fullurl, data)
1696 try:
1697 if data is None:
1698 return getattr(self, name)(url)
1699 else:
1700 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001701 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001702 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001703 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001704 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001705
1706 def open_unknown(self, fullurl, data=None):
1707 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001708 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001709 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001710
1711 def open_unknown_proxy(self, proxy, fullurl, data=None):
1712 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001713 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001714 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001715
1716 # External interface
1717 def retrieve(self, url, filename=None, reporthook=None, data=None):
1718 """retrieve(url) returns (filename, headers) for a local object
1719 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001720 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001721 if self.tempcache and url in self.tempcache:
1722 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001723 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001724 if filename is None and (not type or type == 'file'):
1725 try:
1726 fp = self.open_local_file(url1)
1727 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001728 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001729 return url2pathname(splithost(url1)[1]), hdrs
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001730 except OSError as msg:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001731 pass
1732 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001733 try:
1734 headers = fp.info()
1735 if filename:
1736 tfp = open(filename, 'wb')
1737 else:
1738 import tempfile
1739 garbage, path = splittype(url)
1740 garbage, path = splithost(path or "")
1741 path, garbage = splitquery(path or "")
1742 path, garbage = splitattr(path or "")
1743 suffix = os.path.splitext(path)[1]
1744 (fd, filename) = tempfile.mkstemp(suffix)
1745 self.__tempfiles.append(filename)
1746 tfp = os.fdopen(fd, 'wb')
1747 try:
1748 result = filename, headers
1749 if self.tempcache is not None:
1750 self.tempcache[url] = result
1751 bs = 1024*8
1752 size = -1
1753 read = 0
1754 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001755 if "content-length" in headers:
1756 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001757 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001758 reporthook(blocknum, bs, size)
1759 while 1:
1760 block = fp.read(bs)
1761 if not block:
1762 break
1763 read += len(block)
1764 tfp.write(block)
1765 blocknum += 1
1766 if reporthook:
1767 reporthook(blocknum, bs, size)
1768 finally:
1769 tfp.close()
1770 finally:
1771 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001772
1773 # raise exception if actual size does not match content-length header
1774 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001775 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001776 "retrieval incomplete: got only %i out of %i bytes"
1777 % (read, size), result)
1778
1779 return result
1780
1781 # Each method named open_<type> knows how to open that type of URL
1782
1783 def _open_generic_http(self, connection_factory, url, data):
1784 """Make an HTTP connection using connection_class.
1785
1786 This is an internal method that should be called from
1787 open_http() or open_https().
1788
1789 Arguments:
1790 - connection_factory should take a host name and return an
1791 HTTPConnection instance.
1792 - url is the url to retrieval or a host, relative-path pair.
1793 - data is payload for a POST request or None.
1794 """
1795
1796 user_passwd = None
1797 proxy_passwd= None
1798 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001799 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001800 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001801 user_passwd, host = splituser(host)
1802 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001803 realhost = host
1804 else:
1805 host, selector = url
1806 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001807 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001808 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001809 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001810 url = rest
1811 user_passwd = None
1812 if urltype.lower() != 'http':
1813 realhost = None
1814 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001815 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001816 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001817 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001818 if user_passwd:
1819 selector = "%s://%s%s" % (urltype, realhost, rest)
1820 if proxy_bypass(realhost):
1821 host = realhost
1822
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001823 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001824
1825 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001826 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001827 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001828 else:
1829 proxy_auth = None
1830
1831 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001832 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001833 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001834 else:
1835 auth = None
1836 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001837 headers = {}
1838 if proxy_auth:
1839 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1840 if auth:
1841 headers["Authorization"] = "Basic %s" % auth
1842 if realhost:
1843 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001844
1845 # Add Connection:close as we don't support persistent connections yet.
1846 # This helps in closing the socket and avoiding ResourceWarning
1847
1848 headers["Connection"] = "close"
1849
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001850 for header, value in self.addheaders:
1851 headers[header] = value
1852
1853 if data is not None:
1854 headers["Content-Type"] = "application/x-www-form-urlencoded"
1855 http_conn.request("POST", selector, data, headers)
1856 else:
1857 http_conn.request("GET", selector, headers=headers)
1858
1859 try:
1860 response = http_conn.getresponse()
1861 except http.client.BadStatusLine:
1862 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001863 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001864
1865 # According to RFC 2616, "2xx" code indicates that the client's
1866 # request was successfully received, understood, and accepted.
1867 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001868 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001869 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001870 else:
1871 return self.http_error(
1872 url, response.fp,
1873 response.status, response.reason, response.msg, data)
1874
1875 def open_http(self, url, data=None):
1876 """Use HTTP protocol."""
1877 return self._open_generic_http(http.client.HTTPConnection, url, data)
1878
1879 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1880 """Handle http errors.
1881
1882 Derived class can override this, or provide specific handlers
1883 named http_error_DDD where DDD is the 3-digit error code."""
1884 # First check if there's a specific handler for this error
1885 name = 'http_error_%d' % errcode
1886 if hasattr(self, name):
1887 method = getattr(self, name)
1888 if data is None:
1889 result = method(url, fp, errcode, errmsg, headers)
1890 else:
1891 result = method(url, fp, errcode, errmsg, headers, data)
1892 if result: return result
1893 return self.http_error_default(url, fp, errcode, errmsg, headers)
1894
1895 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001896 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001897 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001898 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001899
1900 if _have_ssl:
1901 def _https_connection(self, host):
1902 return http.client.HTTPSConnection(host,
1903 key_file=self.key_file,
1904 cert_file=self.cert_file)
1905
1906 def open_https(self, url, data=None):
1907 """Use HTTPS protocol."""
1908 return self._open_generic_http(self._https_connection, url, data)
1909
1910 def open_file(self, url):
1911 """Use local file or FTP depending on form of URL."""
1912 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001913 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001914 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001915 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001916 else:
1917 return self.open_local_file(url)
1918
1919 def open_local_file(self, url):
1920 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001921 import email.utils
1922 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001923 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001924 localname = url2pathname(file)
1925 try:
1926 stats = os.stat(localname)
1927 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001928 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001929 size = stats.st_size
1930 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1931 mtype = mimetypes.guess_type(url)[0]
1932 headers = email.message_from_string(
1933 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1934 (mtype or 'text/plain', size, modified))
1935 if not host:
1936 urlfile = file
1937 if file[:1] == '/':
1938 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001939 return addinfourl(open(localname, 'rb'), headers, urlfile)
1940 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001941 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001942 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001943 urlfile = file
1944 if file[:1] == '/':
1945 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001946 elif file[:2] == './':
1947 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001948 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001949 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001950
1951 def open_ftp(self, url):
1952 """Use FTP protocol."""
1953 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001954 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001955 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001956 host, path = splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001957 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001958 host, port = splitport(host)
1959 user, host = splituser(host)
1960 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001961 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001962 host = unquote(host)
1963 user = unquote(user or '')
1964 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001965 host = socket.gethostbyname(host)
1966 if not port:
1967 import ftplib
1968 port = ftplib.FTP_PORT
1969 else:
1970 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001971 path, attrs = splitattr(path)
1972 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001973 dirs = path.split('/')
1974 dirs, file = dirs[:-1], dirs[-1]
1975 if dirs and not dirs[0]: dirs = dirs[1:]
1976 if dirs and not dirs[0]: dirs[0] = '/'
1977 key = user, host, port, '/'.join(dirs)
1978 # XXX thread unsafe!
1979 if len(self.ftpcache) > MAXFTPCACHE:
1980 # Prune the cache, rather arbitrarily
Benjamin Peterson3c2dca62014-06-07 15:08:04 -07001981 for k in list(self.ftpcache):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001982 if k != key:
1983 v = self.ftpcache[k]
1984 del self.ftpcache[k]
1985 v.close()
1986 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001987 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001988 self.ftpcache[key] = \
1989 ftpwrapper(user, passwd, host, port, dirs)
1990 if not file: type = 'D'
1991 else: type = 'I'
1992 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001993 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001994 if attr.lower() == 'type' and \
1995 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1996 type = value.upper()
1997 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1998 mtype = mimetypes.guess_type("ftp:" + url)[0]
1999 headers = ""
2000 if mtype:
2001 headers += "Content-Type: %s\n" % mtype
2002 if retrlen is not None and retrlen >= 0:
2003 headers += "Content-Length: %d\n" % retrlen
2004 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00002005 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002006 except ftperrors() as exp:
2007 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002008
2009 def open_data(self, url, data=None):
2010 """Use "data" URL."""
2011 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002012 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002013 # ignore POSTed data
2014 #
2015 # syntax of data URLs:
2016 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2017 # mediatype := [ type "/" subtype ] *( ";" parameter )
2018 # data := *urlchar
2019 # parameter := attribute "=" value
2020 try:
2021 [type, data] = url.split(',', 1)
2022 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002023 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002024 if not type:
2025 type = 'text/plain;charset=US-ASCII'
2026 semi = type.rfind(';')
2027 if semi >= 0 and '=' not in type[semi:]:
2028 encoding = type[semi+1:]
2029 type = type[:semi]
2030 else:
2031 encoding = ''
2032 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002033 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002034 time.gmtime(time.time())))
2035 msg.append('Content-type: %s' % type)
2036 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002037 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002038 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002039 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002040 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002041 msg.append('Content-Length: %d' % len(data))
2042 msg.append('')
2043 msg.append(data)
2044 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002045 headers = email.message_from_string(msg)
2046 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002047 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002048 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002049
2050
2051class FancyURLopener(URLopener):
2052 """Derived class with handlers for errors we can handle (perhaps)."""
2053
2054 def __init__(self, *args, **kwargs):
2055 URLopener.__init__(self, *args, **kwargs)
2056 self.auth_cache = {}
2057 self.tries = 0
2058 self.maxtries = 10
2059
2060 def http_error_default(self, url, fp, errcode, errmsg, headers):
2061 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002062 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002063
2064 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2065 """Error 302 -- relocated (temporarily)."""
2066 self.tries += 1
Martin Pantera0370222016-02-04 06:01:35 +00002067 try:
2068 if self.maxtries and self.tries >= self.maxtries:
2069 if hasattr(self, "http_error_500"):
2070 meth = self.http_error_500
2071 else:
2072 meth = self.http_error_default
2073 return meth(url, fp, 500,
2074 "Internal Server Error: Redirect Recursion",
2075 headers)
2076 result = self.redirect_internal(url, fp, errcode, errmsg,
2077 headers, data)
2078 return result
2079 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002080 self.tries = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002081
2082 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2083 if 'location' in headers:
2084 newurl = headers['location']
2085 elif 'uri' in headers:
2086 newurl = headers['uri']
2087 else:
2088 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002089 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002090
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002091 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002092 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002093
2094 urlparts = urlparse(newurl)
2095
2096 # For security reasons, we don't allow redirection to anything other
2097 # than http, https and ftp.
2098
2099 # We are using newer HTTPError with older redirect_internal method
2100 # This older method will get deprecated in 3.3
2101
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002102 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002103 raise HTTPError(newurl, errcode,
2104 errmsg +
2105 " Redirection to url '%s' is not allowed." % newurl,
2106 headers, fp)
2107
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002108 return self.open(newurl)
2109
2110 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2111 """Error 301 -- also relocated (permanently)."""
2112 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2113
2114 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2115 """Error 303 -- also relocated (essentially identical to 302)."""
2116 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2117
2118 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2119 """Error 307 -- relocated, but turn POST into error."""
2120 if data is None:
2121 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2122 else:
2123 return self.http_error_default(url, fp, errcode, errmsg, headers)
2124
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002125 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2126 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002127 """Error 401 -- authentication required.
2128 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002129 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002130 URLopener.http_error_default(self, url, fp,
2131 errcode, errmsg, headers)
2132 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002133 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2134 if not match:
2135 URLopener.http_error_default(self, url, fp,
2136 errcode, errmsg, headers)
2137 scheme, realm = match.groups()
2138 if scheme.lower() != 'basic':
2139 URLopener.http_error_default(self, url, fp,
2140 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002141 if not retry:
2142 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2143 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002144 name = 'retry_' + self.type + '_basic_auth'
2145 if data is None:
2146 return getattr(self,name)(url, realm)
2147 else:
2148 return getattr(self,name)(url, realm, data)
2149
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002150 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2151 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002152 """Error 407 -- proxy authentication required.
2153 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002154 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002155 URLopener.http_error_default(self, url, fp,
2156 errcode, errmsg, headers)
2157 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002158 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2159 if not match:
2160 URLopener.http_error_default(self, url, fp,
2161 errcode, errmsg, headers)
2162 scheme, realm = match.groups()
2163 if scheme.lower() != 'basic':
2164 URLopener.http_error_default(self, url, fp,
2165 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002166 if not retry:
2167 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2168 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002169 name = 'retry_proxy_' + self.type + '_basic_auth'
2170 if data is None:
2171 return getattr(self,name)(url, realm)
2172 else:
2173 return getattr(self,name)(url, realm, data)
2174
2175 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002176 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002177 newurl = 'http://' + host + selector
2178 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002179 urltype, proxyhost = splittype(proxy)
2180 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002181 i = proxyhost.find('@') + 1
2182 proxyhost = proxyhost[i:]
2183 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2184 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002185 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002186 quote(passwd, safe=''), proxyhost)
2187 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2188 if data is None:
2189 return self.open(newurl)
2190 else:
2191 return self.open(newurl, data)
2192
2193 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002194 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002195 newurl = 'https://' + host + selector
2196 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002197 urltype, proxyhost = splittype(proxy)
2198 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002199 i = proxyhost.find('@') + 1
2200 proxyhost = proxyhost[i:]
2201 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2202 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002203 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002204 quote(passwd, safe=''), proxyhost)
2205 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2206 if data is None:
2207 return self.open(newurl)
2208 else:
2209 return self.open(newurl, data)
2210
2211 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002212 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002213 i = host.find('@') + 1
2214 host = host[i:]
2215 user, passwd = self.get_user_passwd(host, realm, i)
2216 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002217 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002218 quote(passwd, safe=''), host)
2219 newurl = 'http://' + host + selector
2220 if data is None:
2221 return self.open(newurl)
2222 else:
2223 return self.open(newurl, data)
2224
2225 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002226 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002227 i = host.find('@') + 1
2228 host = host[i:]
2229 user, passwd = self.get_user_passwd(host, realm, i)
2230 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002231 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002232 quote(passwd, safe=''), host)
2233 newurl = 'https://' + host + selector
2234 if data is None:
2235 return self.open(newurl)
2236 else:
2237 return self.open(newurl, data)
2238
Florent Xicluna757445b2010-05-17 17:24:07 +00002239 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002240 key = realm + '@' + host.lower()
2241 if key in self.auth_cache:
2242 if clear_cache:
2243 del self.auth_cache[key]
2244 else:
2245 return self.auth_cache[key]
2246 user, passwd = self.prompt_user_passwd(host, realm)
2247 if user or passwd: self.auth_cache[key] = (user, passwd)
2248 return user, passwd
2249
2250 def prompt_user_passwd(self, host, realm):
2251 """Override this in a GUI environment!"""
2252 import getpass
2253 try:
2254 user = input("Enter username for %s at %s: " % (realm, host))
2255 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2256 (user, realm, host))
2257 return user, passwd
2258 except KeyboardInterrupt:
2259 print()
2260 return None, None
2261
2262
2263# Utility functions
2264
2265_localhost = None
2266def localhost():
2267 """Return the IP address of the magic hostname 'localhost'."""
2268 global _localhost
2269 if _localhost is None:
2270 _localhost = socket.gethostbyname('localhost')
2271 return _localhost
2272
2273_thishost = None
2274def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002275 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002276 global _thishost
2277 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002278 try:
2279 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2280 except socket.gaierror:
2281 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002282 return _thishost
2283
2284_ftperrors = None
2285def ftperrors():
2286 """Return the set of errors raised by the FTP class."""
2287 global _ftperrors
2288 if _ftperrors is None:
2289 import ftplib
2290 _ftperrors = ftplib.all_errors
2291 return _ftperrors
2292
2293_noheaders = None
2294def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002295 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002296 global _noheaders
2297 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002298 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002299 return _noheaders
2300
2301
2302# Utility classes
2303
2304class ftpwrapper:
2305 """Class used by open_ftp() for cache of open FTP connections."""
2306
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002307 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2308 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002309 self.user = user
2310 self.passwd = passwd
2311 self.host = host
2312 self.port = port
2313 self.dirs = dirs
2314 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002315 self.refcount = 0
2316 self.keepalive = persistent
Victor Stinnerab73e652015-04-07 12:49:27 +02002317 try:
2318 self.init()
2319 except:
2320 self.close()
2321 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002322
2323 def init(self):
2324 import ftplib
2325 self.busy = 0
2326 self.ftp = ftplib.FTP()
2327 self.ftp.connect(self.host, self.port, self.timeout)
2328 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002329 _target = '/'.join(self.dirs)
2330 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002331
2332 def retrfile(self, file, type):
2333 import ftplib
2334 self.endtransfer()
2335 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2336 else: cmd = 'TYPE ' + type; isdir = 0
2337 try:
2338 self.ftp.voidcmd(cmd)
2339 except ftplib.all_errors:
2340 self.init()
2341 self.ftp.voidcmd(cmd)
2342 conn = None
2343 if file and not isdir:
2344 # Try to retrieve as a file
2345 try:
2346 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002347 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002348 except ftplib.error_perm as reason:
2349 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002350 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002351 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002352 if not conn:
2353 # Set transfer mode to ASCII!
2354 self.ftp.voidcmd('TYPE A')
2355 # Try a directory listing. Verify that directory exists.
2356 if file:
2357 pwd = self.ftp.pwd()
2358 try:
2359 try:
2360 self.ftp.cwd(file)
2361 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002362 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002363 finally:
2364 self.ftp.cwd(pwd)
2365 cmd = 'LIST ' + file
2366 else:
2367 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002368 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002369 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002370
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002371 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2372 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002373 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002374 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002375 return (ftpobj, retrlen)
2376
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002377 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002378 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002379
2380 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002381 self.keepalive = False
2382 if self.refcount <= 0:
2383 self.real_close()
2384
2385 def file_close(self):
2386 self.endtransfer()
2387 self.refcount -= 1
2388 if self.refcount <= 0 and not self.keepalive:
2389 self.real_close()
2390
2391 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002392 self.endtransfer()
2393 try:
2394 self.ftp.close()
2395 except ftperrors():
2396 pass
2397
2398# Proxy handling
2399def getproxies_environment():
2400 """Return a dictionary of scheme -> proxy server URL mappings.
2401
2402 Scan the environment for variables named <scheme>_proxy;
2403 this seems to be the standard convention. If you need a
2404 different way, you can pass a proxies dictionary to the
2405 [Fancy]URLopener constructor.
2406
2407 """
2408 proxies = {}
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002409 # in order to prefer lowercase variables, process environment in
2410 # two passes: first matches any, second pass matches lowercase only
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002411 for name, value in os.environ.items():
2412 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002413 if value and name[-6:] == '_proxy':
2414 proxies[name[:-6]] = value
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002415 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2416 # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2417 # header from the client
Senthil Kumaran17742f22016-07-30 23:39:06 -07002418 # If "proxy" is lowercase, it will still be used thanks to the next block
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002419 if 'REQUEST_METHOD' in os.environ:
2420 proxies.pop('http', None)
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002421 for name, value in os.environ.items():
2422 if name[-6:] == '_proxy':
2423 name = name.lower()
2424 if value:
2425 proxies[name[:-6]] = value
2426 else:
2427 proxies.pop(name[:-6], None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002428 return proxies
2429
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002430def proxy_bypass_environment(host, proxies=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002431 """Test if proxies should not be used for a particular host.
2432
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002433 Checks the proxy dict for the value of no_proxy, which should
2434 be a list of comma separated DNS suffixes, or '*' for all hosts.
2435
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002436 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002437 if proxies is None:
2438 proxies = getproxies_environment()
2439 # don't bypass, if no_proxy isn't specified
2440 try:
2441 no_proxy = proxies['no']
2442 except KeyError:
2443 return 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002444 # '*' is special case for always bypass
2445 if no_proxy == '*':
2446 return 1
2447 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002448 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002449 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002450 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2451 for name in no_proxy_list:
Martin Panteraa279822016-04-30 01:03:40 +00002452 if name:
2453 name = re.escape(name)
2454 pattern = r'(.+\.)?%s$' % name
2455 if (re.match(pattern, hostonly, re.I)
2456 or re.match(pattern, host, re.I)):
2457 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002458 # otherwise, don't bypass
2459 return 0
2460
2461
Ronald Oussorene72e1612011-03-14 18:15:25 -04002462# This code tests an OSX specific data structure but is testable on all
2463# platforms
2464def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2465 """
2466 Return True iff this host shouldn't be accessed using a proxy
2467
2468 This function uses the MacOSX framework SystemConfiguration
2469 to fetch the proxy information.
2470
2471 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2472 { 'exclude_simple': bool,
2473 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2474 }
2475 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002476 from fnmatch import fnmatch
2477
2478 hostonly, port = splitport(host)
2479
2480 def ip2num(ipAddr):
2481 parts = ipAddr.split('.')
2482 parts = list(map(int, parts))
2483 if len(parts) != 4:
2484 parts = (parts + [0, 0, 0, 0])[:4]
2485 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2486
2487 # Check for simple host names:
2488 if '.' not in host:
2489 if proxy_settings['exclude_simple']:
2490 return True
2491
2492 hostIP = None
2493
2494 for value in proxy_settings.get('exceptions', ()):
2495 # Items in the list are strings like these: *.local, 169.254/16
2496 if not value: continue
2497
2498 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2499 if m is not None:
2500 if hostIP is None:
2501 try:
2502 hostIP = socket.gethostbyname(hostonly)
2503 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002504 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002505 continue
2506
2507 base = ip2num(m.group(1))
2508 mask = m.group(2)
2509 if mask is None:
2510 mask = 8 * (m.group(1).count('.') + 1)
2511 else:
2512 mask = int(mask[1:])
2513 mask = 32 - mask
2514
2515 if (hostIP >> mask) == (base >> mask):
2516 return True
2517
2518 elif fnmatch(host, value):
2519 return True
2520
2521 return False
2522
2523
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002524if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002525 from _scproxy import _get_proxy_settings, _get_proxies
2526
2527 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002528 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002529 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002530
2531 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002532 """Return a dictionary of scheme -> proxy server URL mappings.
2533
Ronald Oussoren84151202010-04-18 20:46:11 +00002534 This function uses the MacOSX framework SystemConfiguration
2535 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002536 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002537 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002538
Ronald Oussoren84151202010-04-18 20:46:11 +00002539
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002540
2541 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002542 """Return True, if host should be bypassed.
2543
2544 Checks proxy settings gathered from the environment, if specified,
2545 or from the MacOSX framework SystemConfiguration.
2546
2547 """
2548 proxies = getproxies_environment()
2549 if proxies:
2550 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002551 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002552 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002553
2554 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002555 return getproxies_environment() or getproxies_macosx_sysconf()
2556
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002557
2558elif os.name == 'nt':
2559 def getproxies_registry():
2560 """Return a dictionary of scheme -> proxy server URL mappings.
2561
2562 Win32 uses the registry to store proxies.
2563
2564 """
2565 proxies = {}
2566 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002567 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002568 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002569 # Std module, so should be around - but you never know!
2570 return proxies
2571 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002572 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002573 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002574 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002575 'ProxyEnable')[0]
2576 if proxyEnable:
2577 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002578 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002579 'ProxyServer')[0])
2580 if '=' in proxyServer:
2581 # Per-protocol settings
2582 for p in proxyServer.split(';'):
2583 protocol, address = p.split('=', 1)
2584 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002585 if not re.match('^([^/:]+)://', address):
2586 address = '%s://%s' % (protocol, address)
2587 proxies[protocol] = address
2588 else:
2589 # Use one setting for all protocols
2590 if proxyServer[:5] == 'http:':
2591 proxies['http'] = proxyServer
2592 else:
2593 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002594 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002595 proxies['ftp'] = 'ftp://%s' % proxyServer
2596 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002597 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002598 # Either registry key not found etc, or the value in an
2599 # unexpected format.
2600 # proxies already set up to be empty so nothing to do
2601 pass
2602 return proxies
2603
2604 def getproxies():
2605 """Return a dictionary of scheme -> proxy server URL mappings.
2606
2607 Returns settings gathered from the environment, if specified,
2608 or the registry.
2609
2610 """
2611 return getproxies_environment() or getproxies_registry()
2612
2613 def proxy_bypass_registry(host):
2614 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002615 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002616 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002617 # Std modules, so should be around - but you never know!
2618 return 0
2619 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002620 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002621 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002622 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002623 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002624 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002625 'ProxyOverride')[0])
2626 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002627 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002628 return 0
2629 if not proxyEnable or not proxyOverride:
2630 return 0
2631 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002632 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002633 host = [rawHost]
2634 try:
2635 addr = socket.gethostbyname(rawHost)
2636 if addr != rawHost:
2637 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002638 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002639 pass
2640 try:
2641 fqdn = socket.getfqdn(rawHost)
2642 if fqdn != rawHost:
2643 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002644 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002645 pass
2646 # make a check value list from the registry entry: replace the
2647 # '<local>' string by the localhost entry and the corresponding
2648 # canonical entry.
2649 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002650 # now check if we match one of the registry values.
2651 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002652 if test == '<local>':
2653 if '.' not in rawHost:
2654 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002655 test = test.replace(".", r"\.") # mask dots
2656 test = test.replace("*", r".*") # change glob sequence
2657 test = test.replace("?", r".") # change glob char
2658 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002659 if re.match(test, val, re.I):
2660 return 1
2661 return 0
2662
2663 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002664 """Return True, if host should be bypassed.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002665
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002666 Checks proxy settings gathered from the environment, if specified,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002667 or the registry.
2668
2669 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002670 proxies = getproxies_environment()
2671 if proxies:
2672 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002673 else:
2674 return proxy_bypass_registry(host)
2675
2676else:
2677 # By default use environment variables
2678 getproxies = getproxies_environment
2679 proxy_bypass = proxy_bypass_environment