blob: 376bba40791f0a172031f2920910e942c1b45911 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100106 splitattr, splitquery, splitvalue, splittag, to_bytes,
107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -0400113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
124 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
Antoine Pitroudf204be2012-11-24 17:59:08 +0100125 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800126 'UnknownHandler', 'HTTPErrorProcessor',
127 # Functions
128 'urlopen', 'install_opener', 'build_opener',
129 'pathname2url', 'url2pathname', 'getproxies',
130 # Legacy interface
131 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
132]
133
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000134# used in User-Agent header sent
135__version__ = sys.version[:3]
136
137_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000138def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Senthil Kumaran8b7e1612014-09-19 15:23:30 +0800139 *, cafile=None, capath=None, cadefault=False, context=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200141 if cafile or capath or cadefault:
Senthil Kumaran8b7e1612014-09-19 15:23:30 +0800142 if context is not None:
143 raise ValueError(
144 "You can't pass both context and any of cafile, capath, and "
145 "cadefault"
146 )
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000147 if not _have_ssl:
148 raise ValueError('SSL support not available')
Benjamin Petersonb6666972014-12-07 13:46:02 -0500149 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
Christian Heimes67986f92013-11-23 22:43:47 +0100150 cafile=cafile,
151 capath=capath)
Benjamin Petersonb6666972014-12-07 13:46:02 -0500152 https_handler = HTTPSHandler(context=context)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000153 opener = build_opener(https_handler)
Senthil Kumaran8b7e1612014-09-19 15:23:30 +0800154 elif context:
155 https_handler = HTTPSHandler(context=context)
156 opener = build_opener(https_handler)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000157 elif _opener is None:
158 _opener = opener = build_opener()
159 else:
160 opener = _opener
161 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000162
163def install_opener(opener):
164 global _opener
165 _opener = opener
166
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700167_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000168def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700169 """
170 Retrieve a URL into a temporary location on disk.
171
172 Requires a URL argument. If a filename is passed, it is used as
173 the temporary file location. The reporthook argument should be
174 a callable that accepts a block number, a read size, and the
175 total file size of the URL target. The data argument should be
176 valid URL encoded data.
177
178 If a filename is passed and the URL points to a local resource,
179 the result is a copy from local file to new file.
180
181 Returns a tuple containing the path to the newly created
182 data file as well as the resulting HTTPMessage object.
183 """
184 url_type, path = splittype(url)
185
186 with contextlib.closing(urlopen(url, data)) as fp:
187 headers = fp.info()
188
189 # Just return the local path and the "headers" for file://
190 # URLs. No sense in performing a copy unless requested.
191 if url_type == "file" and not filename:
192 return os.path.normpath(path), headers
193
194 # Handle temporary file setup.
195 if filename:
196 tfp = open(filename, 'wb')
197 else:
198 tfp = tempfile.NamedTemporaryFile(delete=False)
199 filename = tfp.name
200 _url_tempfiles.append(filename)
201
202 with tfp:
203 result = filename, headers
204 bs = 1024*8
205 size = -1
206 read = 0
207 blocknum = 0
208 if "content-length" in headers:
209 size = int(headers["Content-Length"])
210
211 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800212 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700213
214 while True:
215 block = fp.read(bs)
216 if not block:
217 break
218 read += len(block)
219 tfp.write(block)
220 blocknum += 1
221 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800222 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700223
224 if size >= 0 and read < size:
225 raise ContentTooShortError(
226 "retrieval incomplete: got only %i out of %i bytes"
227 % (read, size), result)
228
229 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000230
231def urlcleanup():
Robert Collins2fee5c92015-08-04 12:52:06 +1200232 """Clean up temporary files from urlretrieve calls."""
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700233 for temp_file in _url_tempfiles:
234 try:
235 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200236 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700237 pass
238
239 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000240 global _opener
241 if _opener:
242 _opener = None
243
244# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000245_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000246def request_host(request):
247 """Return request-host, as defined by RFC 2965.
248
249 Variation from RFC: returned value is lowercased, for convenient
250 comparison.
251
252 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000253 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000254 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000255 if host == "":
256 host = request.get_header("Host", "")
257
258 # remove port, if present
259 host = _cut_port_re.sub("", host, 1)
260 return host.lower()
261
262class Request:
263
264 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800265 origin_req_host=None, unverifiable=False,
266 method=None):
Senthil Kumaran52380922013-04-25 05:45:48 -0700267 self.full_url = url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000268 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200269 self.unredirected_hdrs = {}
270 self._data = None
271 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000272 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000273 for key, value in headers.items():
274 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000275 if origin_req_host is None:
276 origin_req_host = request_host(self)
277 self.origin_req_host = origin_req_host
278 self.unverifiable = unverifiable
Jason R. Coombs7dc4f4b2013-09-08 12:47:07 -0400279 if method:
280 self.method = method
Senthil Kumaran52380922013-04-25 05:45:48 -0700281
282 @property
283 def full_url(self):
Senthil Kumaran83070752013-05-24 09:14:12 -0700284 if self.fragment:
285 return '{}#{}'.format(self._full_url, self.fragment)
Senthil Kumaran52380922013-04-25 05:45:48 -0700286 return self._full_url
287
288 @full_url.setter
289 def full_url(self, url):
290 # unwrap('<URL:type://host/path>') --> 'type://host/path'
291 self._full_url = unwrap(url)
292 self._full_url, self.fragment = splittag(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000293 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294
Senthil Kumaran52380922013-04-25 05:45:48 -0700295 @full_url.deleter
296 def full_url(self):
297 self._full_url = None
298 self.fragment = None
299 self.selector = ''
300
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200301 @property
302 def data(self):
303 return self._data
304
305 @data.setter
306 def data(self, data):
307 if data != self._data:
308 self._data = data
309 # issue 16464
310 # if we change data we need to remove content-length header
311 # (cause it's most probably calculated for previous value)
312 if self.has_header("Content-length"):
313 self.remove_header("Content-length")
314
315 @data.deleter
316 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400317 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200318
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000319 def _parse(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700320 self.type, rest = splittype(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000321 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400322 raise ValueError("unknown url type: %r" % self.full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000323 self.host, self.selector = splithost(rest)
324 if self.host:
325 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000326
327 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800328 """Return a string indicating the HTTP request method."""
Jason R. Coombsaae6a1d2013-09-08 12:54:33 -0400329 default_method = "POST" if self.data is not None else "GET"
330 return getattr(self, 'method', default_method)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000331
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000332 def get_full_url(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700333 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000334
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000335 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000336 if self.type == 'https' and not self._tunnel_host:
337 self._tunnel_host = self.host
338 else:
339 self.type= type
340 self.selector = self.full_url
341 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000342
343 def has_proxy(self):
344 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000345
346 def add_header(self, key, val):
347 # useful for something like authentication
348 self.headers[key.capitalize()] = val
349
350 def add_unredirected_header(self, key, val):
351 # will not be added to a redirected request
352 self.unredirected_hdrs[key.capitalize()] = val
353
354 def has_header(self, header_name):
355 return (header_name in self.headers or
356 header_name in self.unredirected_hdrs)
357
358 def get_header(self, header_name, default=None):
359 return self.headers.get(
360 header_name,
361 self.unredirected_hdrs.get(header_name, default))
362
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200363 def remove_header(self, header_name):
364 self.headers.pop(header_name, None)
365 self.unredirected_hdrs.pop(header_name, None)
366
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000367 def header_items(self):
368 hdrs = self.unredirected_hdrs.copy()
369 hdrs.update(self.headers)
370 return list(hdrs.items())
371
372class OpenerDirector:
373 def __init__(self):
374 client_version = "Python-urllib/%s" % __version__
375 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000376 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000377 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000378 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000379 self.handle_open = {}
380 self.handle_error = {}
381 self.process_response = {}
382 self.process_request = {}
383
384 def add_handler(self, handler):
385 if not hasattr(handler, "add_parent"):
386 raise TypeError("expected BaseHandler instance, got %r" %
387 type(handler))
388
389 added = False
390 for meth in dir(handler):
391 if meth in ["redirect_request", "do_open", "proxy_open"]:
392 # oops, coincidental match
393 continue
394
395 i = meth.find("_")
396 protocol = meth[:i]
397 condition = meth[i+1:]
398
399 if condition.startswith("error"):
400 j = condition.find("_") + i + 1
401 kind = meth[j+1:]
402 try:
403 kind = int(kind)
404 except ValueError:
405 pass
406 lookup = self.handle_error.get(protocol, {})
407 self.handle_error[protocol] = lookup
408 elif condition == "open":
409 kind = protocol
410 lookup = self.handle_open
411 elif condition == "response":
412 kind = protocol
413 lookup = self.process_response
414 elif condition == "request":
415 kind = protocol
416 lookup = self.process_request
417 else:
418 continue
419
420 handlers = lookup.setdefault(kind, [])
421 if handlers:
422 bisect.insort(handlers, handler)
423 else:
424 handlers.append(handler)
425 added = True
426
427 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000428 bisect.insort(self.handlers, handler)
429 handler.add_parent(self)
430
431 def close(self):
432 # Only exists for backwards compatibility.
433 pass
434
435 def _call_chain(self, chain, kind, meth_name, *args):
436 # Handlers raise an exception if no one else should try to handle
437 # the request, or return None if they can't but another handler
438 # could. Otherwise, they return the response.
439 handlers = chain.get(kind, ())
440 for handler in handlers:
441 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000442 result = func(*args)
443 if result is not None:
444 return result
445
446 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
447 # accept a URL or a Request object
448 if isinstance(fullurl, str):
449 req = Request(fullurl, data)
450 else:
451 req = fullurl
452 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000453 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000454
455 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000456 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000457
458 # pre-process request
459 meth_name = protocol+"_request"
460 for processor in self.process_request.get(protocol, []):
461 meth = getattr(processor, meth_name)
462 req = meth(req)
463
464 response = self._open(req, data)
465
466 # post-process response
467 meth_name = protocol+"_response"
468 for processor in self.process_response.get(protocol, []):
469 meth = getattr(processor, meth_name)
470 response = meth(req, response)
471
472 return response
473
474 def _open(self, req, data=None):
475 result = self._call_chain(self.handle_open, 'default',
476 'default_open', req)
477 if result:
478 return result
479
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000480 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000481 result = self._call_chain(self.handle_open, protocol, protocol +
482 '_open', req)
483 if result:
484 return result
485
486 return self._call_chain(self.handle_open, 'unknown',
487 'unknown_open', req)
488
489 def error(self, proto, *args):
490 if proto in ('http', 'https'):
491 # XXX http[s] protocols are special-cased
492 dict = self.handle_error['http'] # https is not different than http
493 proto = args[2] # YUCK!
494 meth_name = 'http_error_%s' % proto
495 http_err = 1
496 orig_args = args
497 else:
498 dict = self.handle_error
499 meth_name = proto + '_error'
500 http_err = 0
501 args = (dict, proto, meth_name) + args
502 result = self._call_chain(*args)
503 if result:
504 return result
505
506 if http_err:
507 args = (dict, 'default', 'http_error_default') + orig_args
508 return self._call_chain(*args)
509
510# XXX probably also want an abstract factory that knows when it makes
511# sense to skip a superclass in favor of a subclass and when it might
512# make sense to include both
513
514def build_opener(*handlers):
515 """Create an opener object from a list of handlers.
516
517 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000518 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000519
520 If any of the handlers passed as arguments are subclasses of the
521 default handlers, the default handlers will not be used.
522 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000523 opener = OpenerDirector()
524 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
525 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100526 FTPHandler, FileHandler, HTTPErrorProcessor,
527 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000528 if hasattr(http.client, "HTTPSConnection"):
529 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000530 skip = set()
531 for klass in default_classes:
532 for check in handlers:
Benjamin Peterson78c85382014-04-01 16:27:30 -0400533 if isinstance(check, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000534 if issubclass(check, klass):
535 skip.add(klass)
536 elif isinstance(check, klass):
537 skip.add(klass)
538 for klass in skip:
539 default_classes.remove(klass)
540
541 for klass in default_classes:
542 opener.add_handler(klass())
543
544 for h in handlers:
Benjamin Peterson5dd3cae2014-04-01 14:20:56 -0400545 if isinstance(h, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000546 h = h()
547 opener.add_handler(h)
548 return opener
549
550class BaseHandler:
551 handler_order = 500
552
553 def add_parent(self, parent):
554 self.parent = parent
555
556 def close(self):
557 # Only exists for backwards compatibility
558 pass
559
560 def __lt__(self, other):
561 if not hasattr(other, "handler_order"):
562 # Try to preserve the old behavior of having custom classes
563 # inserted after default ones (works only for custom user
564 # classes which are not aware of handler_order).
565 return True
566 return self.handler_order < other.handler_order
567
568
569class HTTPErrorProcessor(BaseHandler):
570 """Process HTTP error responses."""
571 handler_order = 1000 # after all other processing
572
573 def http_response(self, request, response):
574 code, msg, hdrs = response.code, response.msg, response.info()
575
576 # According to RFC 2616, "2xx" code indicates that the client's
577 # request was successfully received, understood, and accepted.
578 if not (200 <= code < 300):
579 response = self.parent.error(
580 'http', request, response, code, msg, hdrs)
581
582 return response
583
584 https_response = http_response
585
586class HTTPDefaultErrorHandler(BaseHandler):
587 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000588 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000589
590class HTTPRedirectHandler(BaseHandler):
591 # maximum number of redirections to any single URL
592 # this is needed because of the state that cookies introduce
593 max_repeats = 4
594 # maximum total number of redirections (regardless of URL) before
595 # assuming we're in a loop
596 max_redirections = 10
597
598 def redirect_request(self, req, fp, code, msg, headers, newurl):
599 """Return a Request or None in response to a redirect.
600
601 This is called by the http_error_30x methods when a
602 redirection response is received. If a redirection should
603 take place, return a new Request to allow http_error_30x to
604 perform the redirect. Otherwise, raise HTTPError if no-one
605 else should try to handle this url. Return None if you can't
606 but another Handler might.
607 """
608 m = req.get_method()
609 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
610 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000611 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000612
613 # Strictly (according to RFC 2616), 301 or 302 in response to
614 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000615 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000616 # essentially all clients do redirect in this case, so we do
617 # the same.
618 # be conciliant with URIs containing a space
619 newurl = newurl.replace(' ', '%20')
620 CONTENT_HEADERS = ("content-length", "content-type")
621 newheaders = dict((k, v) for k, v in req.headers.items()
622 if k.lower() not in CONTENT_HEADERS)
623 return Request(newurl,
624 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000625 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000626 unverifiable=True)
627
628 # Implementation note: To avoid the server sending us into an
629 # infinite loop, the request object needs to track what URLs we
630 # have already seen. Do this by adding a handler-specific
631 # attribute to the Request object.
632 def http_error_302(self, req, fp, code, msg, headers):
633 # Some servers (incorrectly) return multiple Location headers
634 # (so probably same goes for URI). Use first header.
635 if "location" in headers:
636 newurl = headers["location"]
637 elif "uri" in headers:
638 newurl = headers["uri"]
639 else:
640 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000641
642 # fix a possible malformed URL
643 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700644
645 # For security reasons we don't allow redirection to anything other
646 # than http, https or ftp.
647
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800648 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800649 raise HTTPError(
650 newurl, code,
651 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
652 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700653
Facundo Batistaf24802c2008-08-17 03:36:03 +0000654 if not urlparts.path:
655 urlparts = list(urlparts)
656 urlparts[2] = "/"
657 newurl = urlunparse(urlparts)
658
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000659 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000660
661 # XXX Probably want to forget about the state of the current
662 # request, although that might interact poorly with other
663 # handlers that also use handler-specific request attributes
664 new = self.redirect_request(req, fp, code, msg, headers, newurl)
665 if new is None:
666 return
667
668 # loop detection
669 # .redirect_dict has a key url if url was previously visited.
670 if hasattr(req, 'redirect_dict'):
671 visited = new.redirect_dict = req.redirect_dict
672 if (visited.get(newurl, 0) >= self.max_repeats or
673 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000674 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000675 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000676 else:
677 visited = new.redirect_dict = req.redirect_dict = {}
678 visited[newurl] = visited.get(newurl, 0) + 1
679
680 # Don't close the fp until we are sure that we won't use it
681 # with HTTPError.
682 fp.read()
683 fp.close()
684
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000685 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000686
687 http_error_301 = http_error_303 = http_error_307 = http_error_302
688
689 inf_msg = "The HTTP server returned a redirect error that would " \
690 "lead to an infinite loop.\n" \
691 "The last 30x error message was:\n"
692
693
694def _parse_proxy(proxy):
695 """Return (scheme, user, password, host/port) given a URL or an authority.
696
697 If a URL is supplied, it must have an authority (host:port) component.
698 According to RFC 3986, having an authority component means the URL must
Senthil Kumarand8e24f12014-04-14 16:32:20 -0400699 have two slashes after the scheme.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000700 """
Georg Brandl13e89462008-07-01 19:56:00 +0000701 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000702 if not r_scheme.startswith("/"):
703 # authority
704 scheme = None
705 authority = proxy
706 else:
707 # URL
708 if not r_scheme.startswith("//"):
709 raise ValueError("proxy URL with no authority: %r" % proxy)
710 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
711 # and 3.3.), path is empty or starts with '/'
712 end = r_scheme.find("/", 2)
713 if end == -1:
714 end = None
715 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000716 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000717 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000718 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000719 else:
720 user = password = None
721 return scheme, user, password, hostport
722
723class ProxyHandler(BaseHandler):
724 # Proxies must be in front
725 handler_order = 100
726
727 def __init__(self, proxies=None):
728 if proxies is None:
729 proxies = getproxies()
730 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
731 self.proxies = proxies
732 for type, url in proxies.items():
733 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200734 lambda r, proxy=url, type=type, meth=self.proxy_open:
735 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000736
737 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000738 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000739 proxy_type, user, password, hostport = _parse_proxy(proxy)
740 if proxy_type is None:
741 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000742
743 if req.host and proxy_bypass(req.host):
744 return None
745
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000746 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000747 user_pass = '%s:%s' % (unquote(user),
748 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000749 creds = base64.b64encode(user_pass.encode()).decode("ascii")
750 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000751 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000752 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000753 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000754 # let other handlers take care of it
755 return None
756 else:
757 # need to start over, because the other handlers don't
758 # grok the proxy's URL type
759 # e.g. if we have a constructor arg proxies like so:
760 # {'http': 'ftp://proxy.example.com'}, we may end up turning
761 # a request for http://acme.example.com/a into one for
762 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000763 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764
765class HTTPPasswordMgr:
766
767 def __init__(self):
768 self.passwd = {}
769
770 def add_password(self, realm, uri, user, passwd):
771 # uri could be a single URI or a sequence
772 if isinstance(uri, str):
773 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800774 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000775 self.passwd[realm] = {}
776 for default_port in True, False:
777 reduced_uri = tuple(
778 [self.reduce_uri(u, default_port) for u in uri])
779 self.passwd[realm][reduced_uri] = (user, passwd)
780
781 def find_user_password(self, realm, authuri):
782 domains = self.passwd.get(realm, {})
783 for default_port in True, False:
784 reduced_authuri = self.reduce_uri(authuri, default_port)
785 for uris, authinfo in domains.items():
786 for uri in uris:
787 if self.is_suburi(uri, reduced_authuri):
788 return authinfo
789 return None, None
790
791 def reduce_uri(self, uri, default_port=True):
792 """Accept authority or URI and extract only the authority and path."""
793 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000794 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000795 if parts[1]:
796 # URI
797 scheme = parts[0]
798 authority = parts[1]
799 path = parts[2] or '/'
800 else:
801 # host or host:port
802 scheme = None
803 authority = uri
804 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000805 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000806 if default_port and port is None and scheme is not None:
807 dport = {"http": 80,
808 "https": 443,
809 }.get(scheme)
810 if dport is not None:
811 authority = "%s:%d" % (host, dport)
812 return authority, path
813
814 def is_suburi(self, base, test):
815 """Check if test is below base in a URI tree
816
817 Both args must be URIs in reduced form.
818 """
819 if base == test:
820 return True
821 if base[0] != test[0]:
822 return False
823 common = posixpath.commonprefix((base[1], test[1]))
824 if len(common) == len(base[1]):
825 return True
826 return False
827
828
829class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
830
831 def find_user_password(self, realm, authuri):
832 user, password = HTTPPasswordMgr.find_user_password(self, realm,
833 authuri)
834 if user is not None:
835 return user, password
836 return HTTPPasswordMgr.find_user_password(self, None, authuri)
837
838
839class AbstractBasicAuthHandler:
840
841 # XXX this allows for multiple auth-schemes, but will stupidly pick
842 # the last one with a realm specified.
843
844 # allow for double- and single-quoted realm values
845 # (single quotes are a violation of the RFC, but appear in the wild)
846 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800847 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000848
849 # XXX could pre-emptively send auth info already accepted (RFC 2617,
850 # end of section 2, and section 1.2 immediately after "credentials"
851 # production).
852
853 def __init__(self, password_mgr=None):
854 if password_mgr is None:
855 password_mgr = HTTPPasswordMgr()
856 self.passwd = password_mgr
857 self.add_password = self.passwd.add_password
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000858
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000859 def http_error_auth_reqed(self, authreq, host, req, headers):
860 # host may be an authority (without userinfo) or a URL with an
861 # authority
862 # XXX could be multiple headers
863 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000864
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000865 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800866 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800867 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800868 raise ValueError("AbstractBasicAuthHandler does not"
869 " support the following scheme: '%s'" %
870 scheme)
871 else:
872 mo = AbstractBasicAuthHandler.rx.search(authreq)
873 if mo:
874 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800875 if quote not in ['"',"'"]:
876 warnings.warn("Basic Auth Realm was unquoted",
877 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800878 if scheme.lower() == 'basic':
Senthil Kumaran78373762014-08-20 07:53:58 +0530879 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000880
881 def retry_http_basic_auth(self, host, req, realm):
882 user, pw = self.passwd.find_user_password(realm, host)
883 if pw is not None:
884 raw = "%s:%s" % (user, pw)
885 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
Senthil Kumaran78373762014-08-20 07:53:58 +0530886 if req.get_header(self.auth_header, None) == auth:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000887 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000888 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000889 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000890 else:
891 return None
892
893
894class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
895
896 auth_header = 'Authorization'
897
898 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000899 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000900 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000901 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000902 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000903
904
905class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
906
907 auth_header = 'Proxy-authorization'
908
909 def http_error_407(self, req, fp, code, msg, headers):
910 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000911 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000912 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
913 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000914 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000915 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000916 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000917 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000918
919
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800920# Return n random bytes.
921_randombytes = os.urandom
922
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000923
924class AbstractDigestAuthHandler:
925 # Digest authentication is specified in RFC 2617.
926
927 # XXX The client does not inspect the Authentication-Info header
928 # in a successful response.
929
930 # XXX It should be possible to test this implementation against
931 # a mock server that just generates a static set of challenges.
932
933 # XXX qop="auth-int" supports is shaky
934
935 def __init__(self, passwd=None):
936 if passwd is None:
937 passwd = HTTPPasswordMgr()
938 self.passwd = passwd
939 self.add_password = self.passwd.add_password
940 self.retried = 0
941 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +0000942 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000943
944 def reset_retry_count(self):
945 self.retried = 0
946
947 def http_error_auth_reqed(self, auth_header, host, req, headers):
948 authreq = headers.get(auth_header, None)
949 if self.retried > 5:
950 # Don't fail endlessly - if we failed once, we'll probably
951 # fail a second time. Hm. Unless the Password Manager is
952 # prompting for the information. Crap. This isn't great
953 # but it's better than the current 'repeat until recursion
954 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000955 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +0000956 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000957 else:
958 self.retried += 1
959 if authreq:
960 scheme = authreq.split()[0]
961 if scheme.lower() == 'digest':
962 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800963 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800964 raise ValueError("AbstractDigestAuthHandler does not support"
965 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000966
967 def retry_http_digest_auth(self, req, auth):
968 token, challenge = auth.split(' ', 1)
969 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
970 auth = self.get_authorization(req, chal)
971 if auth:
972 auth_val = 'Digest %s' % auth
973 if req.headers.get(self.auth_header, None) == auth_val:
974 return None
975 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000976 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000977 return resp
978
979 def get_cnonce(self, nonce):
980 # The cnonce-value is an opaque
981 # quoted string value provided by the client and used by both client
982 # and server to avoid chosen plaintext attacks, to provide mutual
983 # authentication, and to provide some message integrity protection.
984 # This isn't a fabulous effort, but it's probably Good Enough.
985 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800986 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987 dig = hashlib.sha1(b).hexdigest()
988 return dig[:16]
989
990 def get_authorization(self, req, chal):
991 try:
992 realm = chal['realm']
993 nonce = chal['nonce']
994 qop = chal.get('qop')
995 algorithm = chal.get('algorithm', 'MD5')
996 # mod_digest doesn't send an opaque, even though it isn't
997 # supposed to be optional
998 opaque = chal.get('opaque', None)
999 except KeyError:
1000 return None
1001
1002 H, KD = self.get_algorithm_impls(algorithm)
1003 if H is None:
1004 return None
1005
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001006 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001007 if user is None:
1008 return None
1009
1010 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001011 if req.data is not None:
1012 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001013 else:
1014 entdig = None
1015
1016 A1 = "%s:%s:%s" % (user, realm, pw)
1017 A2 = "%s:%s" % (req.get_method(),
1018 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001019 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001021 if nonce == self.last_nonce:
1022 self.nonce_count += 1
1023 else:
1024 self.nonce_count = 1
1025 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001026 ncvalue = '%08x' % self.nonce_count
1027 cnonce = self.get_cnonce(nonce)
1028 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1029 respdig = KD(H(A1), noncebit)
1030 elif qop is None:
1031 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1032 else:
1033 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001034 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001035
1036 # XXX should the partial digests be encoded too?
1037
1038 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001039 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001040 respdig)
1041 if opaque:
1042 base += ', opaque="%s"' % opaque
1043 if entdig:
1044 base += ', digest="%s"' % entdig
1045 base += ', algorithm="%s"' % algorithm
1046 if qop:
1047 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1048 return base
1049
1050 def get_algorithm_impls(self, algorithm):
1051 # lambdas assume digest modules are imported at the top level
1052 if algorithm == 'MD5':
1053 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1054 elif algorithm == 'SHA':
1055 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1056 # XXX MD5-sess
1057 KD = lambda s, d: H("%s:%s" % (s, d))
1058 return H, KD
1059
1060 def get_entity_digest(self, data, chal):
1061 # XXX not implemented yet
1062 return None
1063
1064
1065class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1066 """An authentication protocol defined by RFC 2069
1067
1068 Digest authentication improves on basic authentication because it
1069 does not transmit passwords in the clear.
1070 """
1071
1072 auth_header = 'Authorization'
1073 handler_order = 490 # before Basic auth
1074
1075 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001076 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001077 retry = self.http_error_auth_reqed('www-authenticate',
1078 host, req, headers)
1079 self.reset_retry_count()
1080 return retry
1081
1082
1083class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1084
1085 auth_header = 'Proxy-Authorization'
1086 handler_order = 490 # before Basic auth
1087
1088 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001089 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 retry = self.http_error_auth_reqed('proxy-authenticate',
1091 host, req, headers)
1092 self.reset_retry_count()
1093 return retry
1094
1095class AbstractHTTPHandler(BaseHandler):
1096
1097 def __init__(self, debuglevel=0):
1098 self._debuglevel = debuglevel
1099
1100 def set_http_debuglevel(self, level):
1101 self._debuglevel = level
1102
1103 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001104 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001105 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001106 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001107
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001108 if request.data is not None: # POST
1109 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001110 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001111 msg = "POST data should be bytes or an iterable of bytes. " \
1112 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001113 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001114 if not request.has_header('Content-type'):
1115 request.add_unredirected_header(
1116 'Content-type',
1117 'application/x-www-form-urlencoded')
1118 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001119 try:
1120 mv = memoryview(data)
1121 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001122 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001123 raise ValueError("Content-Length should be specified "
1124 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001125 data))
1126 else:
1127 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001128 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001129
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001130 sel_host = host
1131 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001132 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001133 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001134 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001135 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001136 for name, value in self.parent.addheaders:
1137 name = name.capitalize()
1138 if not request.has_header(name):
1139 request.add_unredirected_header(name, value)
1140
1141 return request
1142
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001143 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001144 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001145
1146 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001147 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001148 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001149 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001150 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001151
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001152 # will parse host:port
1153 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001154
1155 headers = dict(req.unredirected_hdrs)
1156 headers.update(dict((k, v) for k, v in req.headers.items()
1157 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001158
1159 # TODO(jhylton): Should this be redesigned to handle
1160 # persistent connections?
1161
1162 # We want to make an HTTP/1.1 request, but the addinfourl
1163 # class isn't prepared to deal with a persistent connection.
1164 # It will try to read all remaining data from the socket,
1165 # which will block while the server waits for the next request.
1166 # So make sure the connection gets closed after the (only)
1167 # request.
1168 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001169 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001170
1171 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001172 tunnel_headers = {}
1173 proxy_auth_hdr = "Proxy-Authorization"
1174 if proxy_auth_hdr in headers:
1175 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1176 # Proxy-Authorization should not be sent to origin
1177 # server.
1178 del headers[proxy_auth_hdr]
1179 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001180
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 try:
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001182 try:
1183 h.request(req.get_method(), req.selector, req.data, headers)
1184 except OSError as err: # timeout error
1185 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001186 r = h.getresponse()
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001187 except:
1188 h.close()
1189 raise
1190
1191 # If the server does not send us a 'Connection: close' header,
1192 # HTTPConnection assumes the socket should be left open. Manually
1193 # mark the socket to be closed when this response object goes away.
1194 if h.sock:
1195 h.sock.close()
1196 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001197
Senthil Kumaran26430412011-04-13 07:01:19 +08001198 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001199 # This line replaces the .msg attribute of the HTTPResponse
1200 # with .headers, because urllib clients expect the response to
1201 # have the reason in .msg. It would be good to mark this
1202 # attribute is deprecated and get then to use info() or
1203 # .headers.
1204 r.msg = r.reason
1205 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001206
1207
1208class HTTPHandler(AbstractHTTPHandler):
1209
1210 def http_open(self, req):
1211 return self.do_open(http.client.HTTPConnection, req)
1212
1213 http_request = AbstractHTTPHandler.do_request_
1214
1215if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001216
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217 class HTTPSHandler(AbstractHTTPHandler):
1218
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001219 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1220 AbstractHTTPHandler.__init__(self, debuglevel)
1221 self._context = context
1222 self._check_hostname = check_hostname
1223
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001224 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001225 return self.do_open(http.client.HTTPSConnection, req,
1226 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001227
1228 https_request = AbstractHTTPHandler.do_request_
1229
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001230 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001231
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001232class HTTPCookieProcessor(BaseHandler):
1233 def __init__(self, cookiejar=None):
1234 import http.cookiejar
1235 if cookiejar is None:
1236 cookiejar = http.cookiejar.CookieJar()
1237 self.cookiejar = cookiejar
1238
1239 def http_request(self, request):
1240 self.cookiejar.add_cookie_header(request)
1241 return request
1242
1243 def http_response(self, request, response):
1244 self.cookiejar.extract_cookies(response, request)
1245 return response
1246
1247 https_request = http_request
1248 https_response = http_response
1249
1250class UnknownHandler(BaseHandler):
1251 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001252 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001253 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001254
1255def parse_keqv_list(l):
1256 """Parse list of key=value strings where keys are not duplicated."""
1257 parsed = {}
1258 for elt in l:
1259 k, v = elt.split('=', 1)
1260 if v[0] == '"' and v[-1] == '"':
1261 v = v[1:-1]
1262 parsed[k] = v
1263 return parsed
1264
1265def parse_http_list(s):
1266 """Parse lists as described by RFC 2068 Section 2.
1267
1268 In particular, parse comma-separated lists where the elements of
1269 the list may include quoted-strings. A quoted-string could
1270 contain a comma. A non-quoted string could have quotes in the
1271 middle. Neither commas nor quotes count if they are escaped.
1272 Only double-quotes count, not single-quotes.
1273 """
1274 res = []
1275 part = ''
1276
1277 escape = quote = False
1278 for cur in s:
1279 if escape:
1280 part += cur
1281 escape = False
1282 continue
1283 if quote:
1284 if cur == '\\':
1285 escape = True
1286 continue
1287 elif cur == '"':
1288 quote = False
1289 part += cur
1290 continue
1291
1292 if cur == ',':
1293 res.append(part)
1294 part = ''
1295 continue
1296
1297 if cur == '"':
1298 quote = True
1299
1300 part += cur
1301
1302 # append last part
1303 if part:
1304 res.append(part)
1305
1306 return [part.strip() for part in res]
1307
1308class FileHandler(BaseHandler):
1309 # Use local file or FTP depending on form of URL
1310 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001311 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001312 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1313 req.host != 'localhost'):
Senthil Kumaranbc07ac52014-07-22 00:15:20 -07001314 if not req.host in self.get_names():
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001315 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001316 else:
1317 return self.open_local_file(req)
1318
1319 # names for the localhost
1320 names = None
1321 def get_names(self):
1322 if FileHandler.names is None:
1323 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001324 FileHandler.names = tuple(
1325 socket.gethostbyname_ex('localhost')[2] +
1326 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001327 except socket.gaierror:
1328 FileHandler.names = (socket.gethostbyname('localhost'),)
1329 return FileHandler.names
1330
1331 # not entirely sure what the rules are here
1332 def open_local_file(self, req):
1333 import email.utils
1334 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001335 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001336 filename = req.selector
1337 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001338 try:
1339 stats = os.stat(localfile)
1340 size = stats.st_size
1341 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001342 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001343 headers = email.message_from_string(
1344 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1345 (mtype or 'text/plain', size, modified))
1346 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001347 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001348 if not host or \
1349 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001350 if host:
1351 origurl = 'file://' + host + filename
1352 else:
1353 origurl = 'file://' + filename
1354 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001355 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001356 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001357 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001358 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001359
1360def _safe_gethostbyname(host):
1361 try:
1362 return socket.gethostbyname(host)
1363 except socket.gaierror:
1364 return None
1365
1366class FTPHandler(BaseHandler):
1367 def ftp_open(self, req):
1368 import ftplib
1369 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001370 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001371 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001372 raise URLError('ftp error: no host given')
1373 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001374 if port is None:
1375 port = ftplib.FTP_PORT
1376 else:
1377 port = int(port)
1378
1379 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001380 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001381 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001382 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001383 else:
1384 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001385 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001386 user = user or ''
1387 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001388
1389 try:
1390 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001391 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001392 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001393 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001394 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001395 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001396 dirs, file = dirs[:-1], dirs[-1]
1397 if dirs and not dirs[0]:
1398 dirs = dirs[1:]
1399 try:
1400 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1401 type = file and 'I' or 'D'
1402 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001403 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001404 if attr.lower() == 'type' and \
1405 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1406 type = value.upper()
1407 fp, retrlen = fw.retrfile(file, type)
1408 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001409 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001410 if mtype:
1411 headers += "Content-type: %s\n" % mtype
1412 if retrlen is not None and retrlen >= 0:
1413 headers += "Content-length: %d\n" % retrlen
1414 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001415 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001416 except ftplib.all_errors as exp:
1417 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001418 raise exc.with_traceback(sys.exc_info()[2])
1419
1420 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001421 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1422 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001423
1424class CacheFTPHandler(FTPHandler):
1425 # XXX would be nice to have pluggable cache strategies
1426 # XXX this stuff is definitely not thread safe
1427 def __init__(self):
1428 self.cache = {}
1429 self.timeout = {}
1430 self.soonest = 0
1431 self.delay = 60
1432 self.max_conns = 16
1433
1434 def setTimeout(self, t):
1435 self.delay = t
1436
1437 def setMaxConns(self, m):
1438 self.max_conns = m
1439
1440 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1441 key = user, host, port, '/'.join(dirs), timeout
1442 if key in self.cache:
1443 self.timeout[key] = time.time() + self.delay
1444 else:
1445 self.cache[key] = ftpwrapper(user, passwd, host, port,
1446 dirs, timeout)
1447 self.timeout[key] = time.time() + self.delay
1448 self.check_cache()
1449 return self.cache[key]
1450
1451 def check_cache(self):
1452 # first check for old ones
1453 t = time.time()
1454 if self.soonest <= t:
1455 for k, v in list(self.timeout.items()):
1456 if v < t:
1457 self.cache[k].close()
1458 del self.cache[k]
1459 del self.timeout[k]
1460 self.soonest = min(list(self.timeout.values()))
1461
1462 # then check the size
1463 if len(self.cache) == self.max_conns:
1464 for k, v in list(self.timeout.items()):
1465 if v == self.soonest:
1466 del self.cache[k]
1467 del self.timeout[k]
1468 break
1469 self.soonest = min(list(self.timeout.values()))
1470
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001471 def clear_cache(self):
1472 for conn in self.cache.values():
1473 conn.close()
1474 self.cache.clear()
1475 self.timeout.clear()
1476
Antoine Pitroudf204be2012-11-24 17:59:08 +01001477class DataHandler(BaseHandler):
1478 def data_open(self, req):
1479 # data URLs as specified in RFC 2397.
1480 #
1481 # ignores POSTed data
1482 #
1483 # syntax:
1484 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1485 # mediatype := [ type "/" subtype ] *( ";" parameter )
1486 # data := *urlchar
1487 # parameter := attribute "=" value
1488 url = req.full_url
1489
1490 scheme, data = url.split(":",1)
1491 mediatype, data = data.split(",",1)
1492
1493 # even base64 encoded data URLs might be quoted so unquote in any case:
1494 data = unquote_to_bytes(data)
1495 if mediatype.endswith(";base64"):
1496 data = base64.decodebytes(data)
1497 mediatype = mediatype[:-7]
1498
1499 if not mediatype:
1500 mediatype = "text/plain;charset=US-ASCII"
1501
1502 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1503 (mediatype, len(data)))
1504
1505 return addinfourl(io.BytesIO(data), headers, url)
1506
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001507
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001508# Code move from the old urllib module
1509
1510MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1511
1512# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001513if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001514 from nturl2path import url2pathname, pathname2url
1515else:
1516 def url2pathname(pathname):
1517 """OS-specific conversion from a relative URL of the 'file' scheme
1518 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001519 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001520
1521 def pathname2url(pathname):
1522 """OS-specific conversion from a file system path to a relative URL
1523 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001524 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001525
1526# This really consists of two pieces:
1527# (1) a class which handles opening of all sorts of URLs
1528# (plus assorted utilities etc.)
1529# (2) a set of functions for parsing URLs
1530# XXX Should these be separated out into different modules?
1531
1532
1533ftpcache = {}
1534class URLopener:
1535 """Class to open URLs.
1536 This is a class rather than just a subroutine because we may need
1537 more than one set of global protocol-specific options.
1538 Note -- this is a base class for those who don't want the
1539 automatic handling of errors type 302 (relocated) and 401
1540 (authorization needed)."""
1541
1542 __tempfiles = None
1543
1544 version = "Python-urllib/%s" % __version__
1545
1546 # Constructor
1547 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001548 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001549 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1550 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001551 if proxies is None:
1552 proxies = getproxies()
1553 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1554 self.proxies = proxies
1555 self.key_file = x509.get('key_file')
1556 self.cert_file = x509.get('cert_file')
1557 self.addheaders = [('User-Agent', self.version)]
1558 self.__tempfiles = []
1559 self.__unlink = os.unlink # See cleanup()
1560 self.tempcache = None
1561 # Undocumented feature: if you assign {} to tempcache,
1562 # it is used to cache files retrieved with
1563 # self.retrieve(). This is not enabled by default
1564 # since it does not work for changing documents (and I
1565 # haven't got the logic to check expiration headers
1566 # yet).
1567 self.ftpcache = ftpcache
1568 # Undocumented feature: you can use a different
1569 # ftp cache by assigning to the .ftpcache member;
1570 # in case you want logically independent URL openers
1571 # XXX This is not threadsafe. Bah.
1572
1573 def __del__(self):
1574 self.close()
1575
1576 def close(self):
1577 self.cleanup()
1578
1579 def cleanup(self):
1580 # This code sometimes runs when the rest of this module
1581 # has already been deleted, so it can't use any globals
1582 # or import anything.
1583 if self.__tempfiles:
1584 for file in self.__tempfiles:
1585 try:
1586 self.__unlink(file)
1587 except OSError:
1588 pass
1589 del self.__tempfiles[:]
1590 if self.tempcache:
1591 self.tempcache.clear()
1592
1593 def addheader(self, *args):
1594 """Add a header to be used by the HTTP interface only
1595 e.g. u.addheader('Accept', 'sound/basic')"""
1596 self.addheaders.append(args)
1597
1598 # External interface
1599 def open(self, fullurl, data=None):
1600 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001601 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001602 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001603 if self.tempcache and fullurl in self.tempcache:
1604 filename, headers = self.tempcache[fullurl]
1605 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001606 return addinfourl(fp, headers, fullurl)
1607 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001608 if not urltype:
1609 urltype = 'file'
1610 if urltype in self.proxies:
1611 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001612 urltype, proxyhost = splittype(proxy)
1613 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001614 url = (host, fullurl) # Signal special case to open_*()
1615 else:
1616 proxy = None
1617 name = 'open_' + urltype
1618 self.type = urltype
1619 name = name.replace('-', '_')
1620 if not hasattr(self, name):
1621 if proxy:
1622 return self.open_unknown_proxy(proxy, fullurl, data)
1623 else:
1624 return self.open_unknown(fullurl, data)
1625 try:
1626 if data is None:
1627 return getattr(self, name)(url)
1628 else:
1629 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001630 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001631 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001632 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001633 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001634
1635 def open_unknown(self, fullurl, data=None):
1636 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001637 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001638 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001639
1640 def open_unknown_proxy(self, proxy, fullurl, data=None):
1641 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001642 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001643 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001644
1645 # External interface
1646 def retrieve(self, url, filename=None, reporthook=None, data=None):
1647 """retrieve(url) returns (filename, headers) for a local object
1648 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001649 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001650 if self.tempcache and url in self.tempcache:
1651 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001652 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001653 if filename is None and (not type or type == 'file'):
1654 try:
1655 fp = self.open_local_file(url1)
1656 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001657 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001658 return url2pathname(splithost(url1)[1]), hdrs
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001659 except OSError as msg:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001660 pass
1661 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001662 try:
1663 headers = fp.info()
1664 if filename:
1665 tfp = open(filename, 'wb')
1666 else:
1667 import tempfile
1668 garbage, path = splittype(url)
1669 garbage, path = splithost(path or "")
1670 path, garbage = splitquery(path or "")
1671 path, garbage = splitattr(path or "")
1672 suffix = os.path.splitext(path)[1]
1673 (fd, filename) = tempfile.mkstemp(suffix)
1674 self.__tempfiles.append(filename)
1675 tfp = os.fdopen(fd, 'wb')
1676 try:
1677 result = filename, headers
1678 if self.tempcache is not None:
1679 self.tempcache[url] = result
1680 bs = 1024*8
1681 size = -1
1682 read = 0
1683 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001684 if "content-length" in headers:
1685 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001686 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001687 reporthook(blocknum, bs, size)
1688 while 1:
1689 block = fp.read(bs)
1690 if not block:
1691 break
1692 read += len(block)
1693 tfp.write(block)
1694 blocknum += 1
1695 if reporthook:
1696 reporthook(blocknum, bs, size)
1697 finally:
1698 tfp.close()
1699 finally:
1700 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001701
1702 # raise exception if actual size does not match content-length header
1703 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001704 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001705 "retrieval incomplete: got only %i out of %i bytes"
1706 % (read, size), result)
1707
1708 return result
1709
1710 # Each method named open_<type> knows how to open that type of URL
1711
1712 def _open_generic_http(self, connection_factory, url, data):
1713 """Make an HTTP connection using connection_class.
1714
1715 This is an internal method that should be called from
1716 open_http() or open_https().
1717
1718 Arguments:
1719 - connection_factory should take a host name and return an
1720 HTTPConnection instance.
1721 - url is the url to retrieval or a host, relative-path pair.
1722 - data is payload for a POST request or None.
1723 """
1724
1725 user_passwd = None
1726 proxy_passwd= None
1727 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001728 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001729 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001730 user_passwd, host = splituser(host)
1731 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001732 realhost = host
1733 else:
1734 host, selector = url
1735 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001736 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001737 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001738 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001739 url = rest
1740 user_passwd = None
1741 if urltype.lower() != 'http':
1742 realhost = None
1743 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001744 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001745 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001746 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001747 if user_passwd:
1748 selector = "%s://%s%s" % (urltype, realhost, rest)
1749 if proxy_bypass(realhost):
1750 host = realhost
1751
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001752 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001753
1754 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001755 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001756 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001757 else:
1758 proxy_auth = None
1759
1760 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001761 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001762 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001763 else:
1764 auth = None
1765 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001766 headers = {}
1767 if proxy_auth:
1768 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1769 if auth:
1770 headers["Authorization"] = "Basic %s" % auth
1771 if realhost:
1772 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001773
1774 # Add Connection:close as we don't support persistent connections yet.
1775 # This helps in closing the socket and avoiding ResourceWarning
1776
1777 headers["Connection"] = "close"
1778
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001779 for header, value in self.addheaders:
1780 headers[header] = value
1781
1782 if data is not None:
1783 headers["Content-Type"] = "application/x-www-form-urlencoded"
1784 http_conn.request("POST", selector, data, headers)
1785 else:
1786 http_conn.request("GET", selector, headers=headers)
1787
1788 try:
1789 response = http_conn.getresponse()
1790 except http.client.BadStatusLine:
1791 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001792 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001793
1794 # According to RFC 2616, "2xx" code indicates that the client's
1795 # request was successfully received, understood, and accepted.
1796 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001797 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001798 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001799 else:
1800 return self.http_error(
1801 url, response.fp,
1802 response.status, response.reason, response.msg, data)
1803
1804 def open_http(self, url, data=None):
1805 """Use HTTP protocol."""
1806 return self._open_generic_http(http.client.HTTPConnection, url, data)
1807
1808 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1809 """Handle http errors.
1810
1811 Derived class can override this, or provide specific handlers
1812 named http_error_DDD where DDD is the 3-digit error code."""
1813 # First check if there's a specific handler for this error
1814 name = 'http_error_%d' % errcode
1815 if hasattr(self, name):
1816 method = getattr(self, name)
1817 if data is None:
1818 result = method(url, fp, errcode, errmsg, headers)
1819 else:
1820 result = method(url, fp, errcode, errmsg, headers, data)
1821 if result: return result
1822 return self.http_error_default(url, fp, errcode, errmsg, headers)
1823
1824 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001825 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001826 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001827 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001828
1829 if _have_ssl:
1830 def _https_connection(self, host):
1831 return http.client.HTTPSConnection(host,
1832 key_file=self.key_file,
1833 cert_file=self.cert_file)
1834
1835 def open_https(self, url, data=None):
1836 """Use HTTPS protocol."""
1837 return self._open_generic_http(self._https_connection, url, data)
1838
1839 def open_file(self, url):
1840 """Use local file or FTP depending on form of URL."""
1841 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001842 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001843 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001844 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001845 else:
1846 return self.open_local_file(url)
1847
1848 def open_local_file(self, url):
1849 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001850 import email.utils
1851 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001852 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001853 localname = url2pathname(file)
1854 try:
1855 stats = os.stat(localname)
1856 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001857 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001858 size = stats.st_size
1859 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1860 mtype = mimetypes.guess_type(url)[0]
1861 headers = email.message_from_string(
1862 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1863 (mtype or 'text/plain', size, modified))
1864 if not host:
1865 urlfile = file
1866 if file[:1] == '/':
1867 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001868 return addinfourl(open(localname, 'rb'), headers, urlfile)
1869 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001870 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001871 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001872 urlfile = file
1873 if file[:1] == '/':
1874 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001875 elif file[:2] == './':
1876 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001877 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001878 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001879
1880 def open_ftp(self, url):
1881 """Use FTP protocol."""
1882 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001883 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001884 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001885 host, path = splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001886 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001887 host, port = splitport(host)
1888 user, host = splituser(host)
1889 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001890 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001891 host = unquote(host)
1892 user = unquote(user or '')
1893 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001894 host = socket.gethostbyname(host)
1895 if not port:
1896 import ftplib
1897 port = ftplib.FTP_PORT
1898 else:
1899 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001900 path, attrs = splitattr(path)
1901 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001902 dirs = path.split('/')
1903 dirs, file = dirs[:-1], dirs[-1]
1904 if dirs and not dirs[0]: dirs = dirs[1:]
1905 if dirs and not dirs[0]: dirs[0] = '/'
1906 key = user, host, port, '/'.join(dirs)
1907 # XXX thread unsafe!
1908 if len(self.ftpcache) > MAXFTPCACHE:
1909 # Prune the cache, rather arbitrarily
Benjamin Peterson3c2dca62014-06-07 15:08:04 -07001910 for k in list(self.ftpcache):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001911 if k != key:
1912 v = self.ftpcache[k]
1913 del self.ftpcache[k]
1914 v.close()
1915 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001916 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001917 self.ftpcache[key] = \
1918 ftpwrapper(user, passwd, host, port, dirs)
1919 if not file: type = 'D'
1920 else: type = 'I'
1921 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001922 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001923 if attr.lower() == 'type' and \
1924 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1925 type = value.upper()
1926 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1927 mtype = mimetypes.guess_type("ftp:" + url)[0]
1928 headers = ""
1929 if mtype:
1930 headers += "Content-Type: %s\n" % mtype
1931 if retrlen is not None and retrlen >= 0:
1932 headers += "Content-Length: %d\n" % retrlen
1933 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001934 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001935 except ftperrors() as exp:
1936 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001937
1938 def open_data(self, url, data=None):
1939 """Use "data" URL."""
1940 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001941 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001942 # ignore POSTed data
1943 #
1944 # syntax of data URLs:
1945 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1946 # mediatype := [ type "/" subtype ] *( ";" parameter )
1947 # data := *urlchar
1948 # parameter := attribute "=" value
1949 try:
1950 [type, data] = url.split(',', 1)
1951 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001952 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001953 if not type:
1954 type = 'text/plain;charset=US-ASCII'
1955 semi = type.rfind(';')
1956 if semi >= 0 and '=' not in type[semi:]:
1957 encoding = type[semi+1:]
1958 type = type[:semi]
1959 else:
1960 encoding = ''
1961 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001962 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001963 time.gmtime(time.time())))
1964 msg.append('Content-type: %s' % type)
1965 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00001966 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001967 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001968 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001969 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001970 msg.append('Content-Length: %d' % len(data))
1971 msg.append('')
1972 msg.append(data)
1973 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00001974 headers = email.message_from_string(msg)
1975 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001976 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00001977 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001978
1979
1980class FancyURLopener(URLopener):
1981 """Derived class with handlers for errors we can handle (perhaps)."""
1982
1983 def __init__(self, *args, **kwargs):
1984 URLopener.__init__(self, *args, **kwargs)
1985 self.auth_cache = {}
1986 self.tries = 0
1987 self.maxtries = 10
1988
1989 def http_error_default(self, url, fp, errcode, errmsg, headers):
1990 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00001991 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001992
1993 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
1994 """Error 302 -- relocated (temporarily)."""
1995 self.tries += 1
1996 if self.maxtries and self.tries >= self.maxtries:
1997 if hasattr(self, "http_error_500"):
1998 meth = self.http_error_500
1999 else:
2000 meth = self.http_error_default
2001 self.tries = 0
2002 return meth(url, fp, 500,
2003 "Internal Server Error: Redirect Recursion", headers)
2004 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2005 data)
2006 self.tries = 0
2007 return result
2008
2009 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2010 if 'location' in headers:
2011 newurl = headers['location']
2012 elif 'uri' in headers:
2013 newurl = headers['uri']
2014 else:
2015 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002016 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002017
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002018 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002019 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002020
2021 urlparts = urlparse(newurl)
2022
2023 # For security reasons, we don't allow redirection to anything other
2024 # than http, https and ftp.
2025
2026 # We are using newer HTTPError with older redirect_internal method
2027 # This older method will get deprecated in 3.3
2028
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002029 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002030 raise HTTPError(newurl, errcode,
2031 errmsg +
2032 " Redirection to url '%s' is not allowed." % newurl,
2033 headers, fp)
2034
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002035 return self.open(newurl)
2036
2037 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2038 """Error 301 -- also relocated (permanently)."""
2039 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2040
2041 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2042 """Error 303 -- also relocated (essentially identical to 302)."""
2043 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2044
2045 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2046 """Error 307 -- relocated, but turn POST into error."""
2047 if data is None:
2048 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2049 else:
2050 return self.http_error_default(url, fp, errcode, errmsg, headers)
2051
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002052 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2053 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002054 """Error 401 -- authentication required.
2055 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002056 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002057 URLopener.http_error_default(self, url, fp,
2058 errcode, errmsg, headers)
2059 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002060 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2061 if not match:
2062 URLopener.http_error_default(self, url, fp,
2063 errcode, errmsg, headers)
2064 scheme, realm = match.groups()
2065 if scheme.lower() != 'basic':
2066 URLopener.http_error_default(self, url, fp,
2067 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002068 if not retry:
2069 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2070 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002071 name = 'retry_' + self.type + '_basic_auth'
2072 if data is None:
2073 return getattr(self,name)(url, realm)
2074 else:
2075 return getattr(self,name)(url, realm, data)
2076
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002077 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2078 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002079 """Error 407 -- proxy authentication required.
2080 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002081 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002082 URLopener.http_error_default(self, url, fp,
2083 errcode, errmsg, headers)
2084 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002085 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2086 if not match:
2087 URLopener.http_error_default(self, url, fp,
2088 errcode, errmsg, headers)
2089 scheme, realm = match.groups()
2090 if scheme.lower() != 'basic':
2091 URLopener.http_error_default(self, url, fp,
2092 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002093 if not retry:
2094 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2095 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002096 name = 'retry_proxy_' + self.type + '_basic_auth'
2097 if data is None:
2098 return getattr(self,name)(url, realm)
2099 else:
2100 return getattr(self,name)(url, realm, data)
2101
2102 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002103 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002104 newurl = 'http://' + host + selector
2105 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002106 urltype, proxyhost = splittype(proxy)
2107 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002108 i = proxyhost.find('@') + 1
2109 proxyhost = proxyhost[i:]
2110 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2111 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002112 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002113 quote(passwd, safe=''), proxyhost)
2114 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2115 if data is None:
2116 return self.open(newurl)
2117 else:
2118 return self.open(newurl, data)
2119
2120 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002121 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002122 newurl = 'https://' + host + selector
2123 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002124 urltype, proxyhost = splittype(proxy)
2125 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002126 i = proxyhost.find('@') + 1
2127 proxyhost = proxyhost[i:]
2128 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2129 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002130 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002131 quote(passwd, safe=''), proxyhost)
2132 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2133 if data is None:
2134 return self.open(newurl)
2135 else:
2136 return self.open(newurl, data)
2137
2138 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002139 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002140 i = host.find('@') + 1
2141 host = host[i:]
2142 user, passwd = self.get_user_passwd(host, realm, i)
2143 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002144 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002145 quote(passwd, safe=''), host)
2146 newurl = 'http://' + host + selector
2147 if data is None:
2148 return self.open(newurl)
2149 else:
2150 return self.open(newurl, data)
2151
2152 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002153 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002154 i = host.find('@') + 1
2155 host = host[i:]
2156 user, passwd = self.get_user_passwd(host, realm, i)
2157 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002158 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002159 quote(passwd, safe=''), host)
2160 newurl = 'https://' + host + selector
2161 if data is None:
2162 return self.open(newurl)
2163 else:
2164 return self.open(newurl, data)
2165
Florent Xicluna757445b2010-05-17 17:24:07 +00002166 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002167 key = realm + '@' + host.lower()
2168 if key in self.auth_cache:
2169 if clear_cache:
2170 del self.auth_cache[key]
2171 else:
2172 return self.auth_cache[key]
2173 user, passwd = self.prompt_user_passwd(host, realm)
2174 if user or passwd: self.auth_cache[key] = (user, passwd)
2175 return user, passwd
2176
2177 def prompt_user_passwd(self, host, realm):
2178 """Override this in a GUI environment!"""
2179 import getpass
2180 try:
2181 user = input("Enter username for %s at %s: " % (realm, host))
2182 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2183 (user, realm, host))
2184 return user, passwd
2185 except KeyboardInterrupt:
2186 print()
2187 return None, None
2188
2189
2190# Utility functions
2191
2192_localhost = None
2193def localhost():
2194 """Return the IP address of the magic hostname 'localhost'."""
2195 global _localhost
2196 if _localhost is None:
2197 _localhost = socket.gethostbyname('localhost')
2198 return _localhost
2199
2200_thishost = None
2201def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002202 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002203 global _thishost
2204 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002205 try:
2206 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2207 except socket.gaierror:
2208 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002209 return _thishost
2210
2211_ftperrors = None
2212def ftperrors():
2213 """Return the set of errors raised by the FTP class."""
2214 global _ftperrors
2215 if _ftperrors is None:
2216 import ftplib
2217 _ftperrors = ftplib.all_errors
2218 return _ftperrors
2219
2220_noheaders = None
2221def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002222 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002223 global _noheaders
2224 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002225 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002226 return _noheaders
2227
2228
2229# Utility classes
2230
2231class ftpwrapper:
2232 """Class used by open_ftp() for cache of open FTP connections."""
2233
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002234 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2235 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002236 self.user = user
2237 self.passwd = passwd
2238 self.host = host
2239 self.port = port
2240 self.dirs = dirs
2241 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002242 self.refcount = 0
2243 self.keepalive = persistent
Victor Stinnerab73e652015-04-07 12:49:27 +02002244 try:
2245 self.init()
2246 except:
2247 self.close()
2248 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002249
2250 def init(self):
2251 import ftplib
2252 self.busy = 0
2253 self.ftp = ftplib.FTP()
2254 self.ftp.connect(self.host, self.port, self.timeout)
2255 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002256 _target = '/'.join(self.dirs)
2257 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002258
2259 def retrfile(self, file, type):
2260 import ftplib
2261 self.endtransfer()
2262 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2263 else: cmd = 'TYPE ' + type; isdir = 0
2264 try:
2265 self.ftp.voidcmd(cmd)
2266 except ftplib.all_errors:
2267 self.init()
2268 self.ftp.voidcmd(cmd)
2269 conn = None
2270 if file and not isdir:
2271 # Try to retrieve as a file
2272 try:
2273 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002274 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002275 except ftplib.error_perm as reason:
2276 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002277 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002278 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002279 if not conn:
2280 # Set transfer mode to ASCII!
2281 self.ftp.voidcmd('TYPE A')
2282 # Try a directory listing. Verify that directory exists.
2283 if file:
2284 pwd = self.ftp.pwd()
2285 try:
2286 try:
2287 self.ftp.cwd(file)
2288 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002289 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002290 finally:
2291 self.ftp.cwd(pwd)
2292 cmd = 'LIST ' + file
2293 else:
2294 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002295 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002296 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002297
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002298 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2299 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002300 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002301 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002302 return (ftpobj, retrlen)
2303
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002304 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002305 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002306
2307 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002308 self.keepalive = False
2309 if self.refcount <= 0:
2310 self.real_close()
2311
2312 def file_close(self):
2313 self.endtransfer()
2314 self.refcount -= 1
2315 if self.refcount <= 0 and not self.keepalive:
2316 self.real_close()
2317
2318 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002319 self.endtransfer()
2320 try:
2321 self.ftp.close()
2322 except ftperrors():
2323 pass
2324
2325# Proxy handling
2326def getproxies_environment():
2327 """Return a dictionary of scheme -> proxy server URL mappings.
2328
2329 Scan the environment for variables named <scheme>_proxy;
2330 this seems to be the standard convention. If you need a
2331 different way, you can pass a proxies dictionary to the
2332 [Fancy]URLopener constructor.
2333
2334 """
2335 proxies = {}
2336 for name, value in os.environ.items():
2337 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002338 if value and name[-6:] == '_proxy':
2339 proxies[name[:-6]] = value
2340 return proxies
2341
2342def proxy_bypass_environment(host):
2343 """Test if proxies should not be used for a particular host.
2344
2345 Checks the environment for a variable named no_proxy, which should
2346 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2347 """
2348 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2349 # '*' is special case for always bypass
2350 if no_proxy == '*':
2351 return 1
2352 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002353 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002354 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002355 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2356 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002357 if name and (hostonly.endswith(name) or host.endswith(name)):
2358 return 1
2359 # otherwise, don't bypass
2360 return 0
2361
2362
Ronald Oussorene72e1612011-03-14 18:15:25 -04002363# This code tests an OSX specific data structure but is testable on all
2364# platforms
2365def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2366 """
2367 Return True iff this host shouldn't be accessed using a proxy
2368
2369 This function uses the MacOSX framework SystemConfiguration
2370 to fetch the proxy information.
2371
2372 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2373 { 'exclude_simple': bool,
2374 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2375 }
2376 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002377 from fnmatch import fnmatch
2378
2379 hostonly, port = splitport(host)
2380
2381 def ip2num(ipAddr):
2382 parts = ipAddr.split('.')
2383 parts = list(map(int, parts))
2384 if len(parts) != 4:
2385 parts = (parts + [0, 0, 0, 0])[:4]
2386 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2387
2388 # Check for simple host names:
2389 if '.' not in host:
2390 if proxy_settings['exclude_simple']:
2391 return True
2392
2393 hostIP = None
2394
2395 for value in proxy_settings.get('exceptions', ()):
2396 # Items in the list are strings like these: *.local, 169.254/16
2397 if not value: continue
2398
2399 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2400 if m is not None:
2401 if hostIP is None:
2402 try:
2403 hostIP = socket.gethostbyname(hostonly)
2404 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002405 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002406 continue
2407
2408 base = ip2num(m.group(1))
2409 mask = m.group(2)
2410 if mask is None:
2411 mask = 8 * (m.group(1).count('.') + 1)
2412 else:
2413 mask = int(mask[1:])
2414 mask = 32 - mask
2415
2416 if (hostIP >> mask) == (base >> mask):
2417 return True
2418
2419 elif fnmatch(host, value):
2420 return True
2421
2422 return False
2423
2424
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002425if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002426 from _scproxy import _get_proxy_settings, _get_proxies
2427
2428 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002429 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002430 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002431
2432 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002433 """Return a dictionary of scheme -> proxy server URL mappings.
2434
Ronald Oussoren84151202010-04-18 20:46:11 +00002435 This function uses the MacOSX framework SystemConfiguration
2436 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002437 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002438 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002439
Ronald Oussoren84151202010-04-18 20:46:11 +00002440
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002441
2442 def proxy_bypass(host):
2443 if getproxies_environment():
2444 return proxy_bypass_environment(host)
2445 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002446 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002447
2448 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002449 return getproxies_environment() or getproxies_macosx_sysconf()
2450
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002451
2452elif os.name == 'nt':
2453 def getproxies_registry():
2454 """Return a dictionary of scheme -> proxy server URL mappings.
2455
2456 Win32 uses the registry to store proxies.
2457
2458 """
2459 proxies = {}
2460 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002461 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002462 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463 # Std module, so should be around - but you never know!
2464 return proxies
2465 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002466 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002467 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002468 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002469 'ProxyEnable')[0]
2470 if proxyEnable:
2471 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002472 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002473 'ProxyServer')[0])
2474 if '=' in proxyServer:
2475 # Per-protocol settings
2476 for p in proxyServer.split(';'):
2477 protocol, address = p.split('=', 1)
2478 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002479 if not re.match('^([^/:]+)://', address):
2480 address = '%s://%s' % (protocol, address)
2481 proxies[protocol] = address
2482 else:
2483 # Use one setting for all protocols
2484 if proxyServer[:5] == 'http:':
2485 proxies['http'] = proxyServer
2486 else:
2487 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002488 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002489 proxies['ftp'] = 'ftp://%s' % proxyServer
2490 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002491 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002492 # Either registry key not found etc, or the value in an
2493 # unexpected format.
2494 # proxies already set up to be empty so nothing to do
2495 pass
2496 return proxies
2497
2498 def getproxies():
2499 """Return a dictionary of scheme -> proxy server URL mappings.
2500
2501 Returns settings gathered from the environment, if specified,
2502 or the registry.
2503
2504 """
2505 return getproxies_environment() or getproxies_registry()
2506
2507 def proxy_bypass_registry(host):
2508 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002509 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002510 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002511 # Std modules, so should be around - but you never know!
2512 return 0
2513 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002514 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002515 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002516 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002517 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002518 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002519 'ProxyOverride')[0])
2520 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002521 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002522 return 0
2523 if not proxyEnable or not proxyOverride:
2524 return 0
2525 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002526 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002527 host = [rawHost]
2528 try:
2529 addr = socket.gethostbyname(rawHost)
2530 if addr != rawHost:
2531 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002532 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002533 pass
2534 try:
2535 fqdn = socket.getfqdn(rawHost)
2536 if fqdn != rawHost:
2537 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002538 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002539 pass
2540 # make a check value list from the registry entry: replace the
2541 # '<local>' string by the localhost entry and the corresponding
2542 # canonical entry.
2543 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002544 # now check if we match one of the registry values.
2545 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002546 if test == '<local>':
2547 if '.' not in rawHost:
2548 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002549 test = test.replace(".", r"\.") # mask dots
2550 test = test.replace("*", r".*") # change glob sequence
2551 test = test.replace("?", r".") # change glob char
2552 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002553 if re.match(test, val, re.I):
2554 return 1
2555 return 0
2556
2557 def proxy_bypass(host):
2558 """Return a dictionary of scheme -> proxy server URL mappings.
2559
2560 Returns settings gathered from the environment, if specified,
2561 or the registry.
2562
2563 """
2564 if getproxies_environment():
2565 return proxy_bypass_environment(host)
2566 else:
2567 return proxy_bypass_registry(host)
2568
2569else:
2570 # By default use environment variables
2571 getproxies = getproxies_environment
2572 proxy_bypass = proxy_bypass_environment