blob: ef62acc71028c1129565b665005744b987aaef99 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Senthil Kumarand95cc752010-08-08 11:27:53 +0000106 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000107from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108
109# check for SSL
110try:
111 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000112except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 _have_ssl = False
114else:
115 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800117__all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
123 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
124 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
125 'UnknownHandler', 'HTTPErrorProcessor',
126 # Functions
127 'urlopen', 'install_opener', 'build_opener',
128 'pathname2url', 'url2pathname', 'getproxies',
129 # Legacy interface
130 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
131]
132
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133# used in User-Agent header sent
134__version__ = sys.version[:3]
135
136_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000137def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200138 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000139 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200140 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000141 if not _have_ssl:
142 raise ValueError('SSL support not available')
143 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
144 context.options |= ssl.OP_NO_SSLv2
Senthil Kumaran4a2ab122013-04-04 19:34:02 -0700145 context.verify_mode = ssl.CERT_REQUIRED
146 if cafile or capath:
147 context.load_verify_locations(cafile, capath)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000148 else:
Senthil Kumaran4a2ab122013-04-04 19:34:02 -0700149 context.set_default_verify_paths()
150 https_handler = HTTPSHandler(context=context, check_hostname=True)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000151 opener = build_opener(https_handler)
152 elif _opener is None:
153 _opener = opener = build_opener()
154 else:
155 opener = _opener
156 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000157
158def install_opener(opener):
159 global _opener
160 _opener = opener
161
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700162_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000163def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700164 """
165 Retrieve a URL into a temporary location on disk.
166
167 Requires a URL argument. If a filename is passed, it is used as
168 the temporary file location. The reporthook argument should be
169 a callable that accepts a block number, a read size, and the
170 total file size of the URL target. The data argument should be
171 valid URL encoded data.
172
173 If a filename is passed and the URL points to a local resource,
174 the result is a copy from local file to new file.
175
176 Returns a tuple containing the path to the newly created
177 data file as well as the resulting HTTPMessage object.
178 """
179 url_type, path = splittype(url)
180
181 with contextlib.closing(urlopen(url, data)) as fp:
182 headers = fp.info()
183
184 # Just return the local path and the "headers" for file://
185 # URLs. No sense in performing a copy unless requested.
186 if url_type == "file" and not filename:
187 return os.path.normpath(path), headers
188
189 # Handle temporary file setup.
190 if filename:
191 tfp = open(filename, 'wb')
192 else:
193 tfp = tempfile.NamedTemporaryFile(delete=False)
194 filename = tfp.name
195 _url_tempfiles.append(filename)
196
197 with tfp:
198 result = filename, headers
199 bs = 1024*8
200 size = -1
201 read = 0
202 blocknum = 0
203 if "content-length" in headers:
204 size = int(headers["Content-Length"])
205
206 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800207 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700208
209 while True:
210 block = fp.read(bs)
211 if not block:
212 break
213 read += len(block)
214 tfp.write(block)
215 blocknum += 1
216 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800217 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700218
219 if size >= 0 and read < size:
220 raise ContentTooShortError(
221 "retrieval incomplete: got only %i out of %i bytes"
222 % (read, size), result)
223
224 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000225
226def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700227 for temp_file in _url_tempfiles:
228 try:
229 os.unlink(temp_file)
230 except EnvironmentError:
231 pass
232
233 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234 global _opener
235 if _opener:
236 _opener = None
237
238# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000239_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000240def request_host(request):
241 """Return request-host, as defined by RFC 2965.
242
243 Variation from RFC: returned value is lowercased, for convenient
244 comparison.
245
246 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000247 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000248 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000249 if host == "":
250 host = request.get_header("Host", "")
251
252 # remove port, if present
253 host = _cut_port_re.sub("", host, 1)
254 return host.lower()
255
256class Request:
257
258 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800259 origin_req_host=None, unverifiable=False,
260 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000261 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaran45ce4dc2012-07-08 02:08:48 -0700262 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800263 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000264 self.data = data
265 self.headers = {}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000266 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000267 for key, value in headers.items():
268 self.add_header(key, value)
269 self.unredirected_hdrs = {}
270 if origin_req_host is None:
271 origin_req_host = request_host(self)
272 self.origin_req_host = origin_req_host
273 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800274 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000275 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000276
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000277 def _parse(self):
278 self.type, rest = splittype(self.full_url)
279 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400280 raise ValueError("unknown url type: %r" % self.full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000281 self.host, self.selector = splithost(rest)
282 if self.host:
283 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000284
285 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800286 """Return a string indicating the HTTP request method."""
287 if self.method is not None:
288 return self.method
289 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000290 return "POST"
291 else:
292 return "GET"
293
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800295 if self.fragment:
296 return '%s#%s' % (self.full_url, self.fragment)
297 else:
298 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000299
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700300 # Begin deprecated methods
301
302 def add_data(self, data):
303 msg = "Request.add_data method is deprecated."
304 warnings.warn(msg, DeprecationWarning, stacklevel=1)
305 self.data = data
306
307 def has_data(self):
308 msg = "Request.has_data method is deprecated."
309 warnings.warn(msg, DeprecationWarning, stacklevel=1)
310 return self.data is not None
311
312 def get_data(self):
313 msg = "Request.get_data method is deprecated."
314 warnings.warn(msg, DeprecationWarning, stacklevel=1)
315 return self.data
316
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000317 def get_type(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700318 msg = "Request.get_type method is deprecated."
319 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000320 return self.type
321
322 def get_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700323 msg = "Request.get_host method is deprecated."
324 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000325 return self.host
326
327 def get_selector(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700328 msg = "Request.get_selector method is deprecated."
329 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000330 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000331
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000332 def is_unverifiable(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700333 msg = "Request.is_unverifiable method is deprecated."
334 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000335 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000336
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000337 def get_origin_req_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700338 msg = "Request.get_origin_req_host method is deprecated."
339 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000340 return self.origin_req_host
341
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000342 # End deprecated methods
343
344 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000345 if self.type == 'https' and not self._tunnel_host:
346 self._tunnel_host = self.host
347 else:
348 self.type= type
349 self.selector = self.full_url
350 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000351
352 def has_proxy(self):
353 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000354
355 def add_header(self, key, val):
356 # useful for something like authentication
357 self.headers[key.capitalize()] = val
358
359 def add_unredirected_header(self, key, val):
360 # will not be added to a redirected request
361 self.unredirected_hdrs[key.capitalize()] = val
362
363 def has_header(self, header_name):
364 return (header_name in self.headers or
365 header_name in self.unredirected_hdrs)
366
367 def get_header(self, header_name, default=None):
368 return self.headers.get(
369 header_name,
370 self.unredirected_hdrs.get(header_name, default))
371
372 def header_items(self):
373 hdrs = self.unredirected_hdrs.copy()
374 hdrs.update(self.headers)
375 return list(hdrs.items())
376
377class OpenerDirector:
378 def __init__(self):
379 client_version = "Python-urllib/%s" % __version__
380 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000381 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000382 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000383 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000384 self.handle_open = {}
385 self.handle_error = {}
386 self.process_response = {}
387 self.process_request = {}
388
389 def add_handler(self, handler):
390 if not hasattr(handler, "add_parent"):
391 raise TypeError("expected BaseHandler instance, got %r" %
392 type(handler))
393
394 added = False
395 for meth in dir(handler):
396 if meth in ["redirect_request", "do_open", "proxy_open"]:
397 # oops, coincidental match
398 continue
399
400 i = meth.find("_")
401 protocol = meth[:i]
402 condition = meth[i+1:]
403
404 if condition.startswith("error"):
405 j = condition.find("_") + i + 1
406 kind = meth[j+1:]
407 try:
408 kind = int(kind)
409 except ValueError:
410 pass
411 lookup = self.handle_error.get(protocol, {})
412 self.handle_error[protocol] = lookup
413 elif condition == "open":
414 kind = protocol
415 lookup = self.handle_open
416 elif condition == "response":
417 kind = protocol
418 lookup = self.process_response
419 elif condition == "request":
420 kind = protocol
421 lookup = self.process_request
422 else:
423 continue
424
425 handlers = lookup.setdefault(kind, [])
426 if handlers:
427 bisect.insort(handlers, handler)
428 else:
429 handlers.append(handler)
430 added = True
431
432 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000433 bisect.insort(self.handlers, handler)
434 handler.add_parent(self)
435
436 def close(self):
437 # Only exists for backwards compatibility.
438 pass
439
440 def _call_chain(self, chain, kind, meth_name, *args):
441 # Handlers raise an exception if no one else should try to handle
442 # the request, or return None if they can't but another handler
443 # could. Otherwise, they return the response.
444 handlers = chain.get(kind, ())
445 for handler in handlers:
446 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000447 result = func(*args)
448 if result is not None:
449 return result
450
451 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
452 # accept a URL or a Request object
453 if isinstance(fullurl, str):
454 req = Request(fullurl, data)
455 else:
456 req = fullurl
457 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000458 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000459
460 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000461 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000462
463 # pre-process request
464 meth_name = protocol+"_request"
465 for processor in self.process_request.get(protocol, []):
466 meth = getattr(processor, meth_name)
467 req = meth(req)
468
469 response = self._open(req, data)
470
471 # post-process response
472 meth_name = protocol+"_response"
473 for processor in self.process_response.get(protocol, []):
474 meth = getattr(processor, meth_name)
475 response = meth(req, response)
476
477 return response
478
479 def _open(self, req, data=None):
480 result = self._call_chain(self.handle_open, 'default',
481 'default_open', req)
482 if result:
483 return result
484
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000485 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 result = self._call_chain(self.handle_open, protocol, protocol +
487 '_open', req)
488 if result:
489 return result
490
491 return self._call_chain(self.handle_open, 'unknown',
492 'unknown_open', req)
493
494 def error(self, proto, *args):
495 if proto in ('http', 'https'):
496 # XXX http[s] protocols are special-cased
497 dict = self.handle_error['http'] # https is not different than http
498 proto = args[2] # YUCK!
499 meth_name = 'http_error_%s' % proto
500 http_err = 1
501 orig_args = args
502 else:
503 dict = self.handle_error
504 meth_name = proto + '_error'
505 http_err = 0
506 args = (dict, proto, meth_name) + args
507 result = self._call_chain(*args)
508 if result:
509 return result
510
511 if http_err:
512 args = (dict, 'default', 'http_error_default') + orig_args
513 return self._call_chain(*args)
514
515# XXX probably also want an abstract factory that knows when it makes
516# sense to skip a superclass in favor of a subclass and when it might
517# make sense to include both
518
519def build_opener(*handlers):
520 """Create an opener object from a list of handlers.
521
522 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000523 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000524
525 If any of the handlers passed as arguments are subclasses of the
526 default handlers, the default handlers will not be used.
527 """
528 def isclass(obj):
529 return isinstance(obj, type) or hasattr(obj, "__bases__")
530
531 opener = OpenerDirector()
532 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
533 HTTPDefaultErrorHandler, HTTPRedirectHandler,
534 FTPHandler, FileHandler, HTTPErrorProcessor]
535 if hasattr(http.client, "HTTPSConnection"):
536 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000537 skip = set()
538 for klass in default_classes:
539 for check in handlers:
540 if isclass(check):
541 if issubclass(check, klass):
542 skip.add(klass)
543 elif isinstance(check, klass):
544 skip.add(klass)
545 for klass in skip:
546 default_classes.remove(klass)
547
548 for klass in default_classes:
549 opener.add_handler(klass())
550
551 for h in handlers:
552 if isclass(h):
553 h = h()
554 opener.add_handler(h)
555 return opener
556
557class BaseHandler:
558 handler_order = 500
559
560 def add_parent(self, parent):
561 self.parent = parent
562
563 def close(self):
564 # Only exists for backwards compatibility
565 pass
566
567 def __lt__(self, other):
568 if not hasattr(other, "handler_order"):
569 # Try to preserve the old behavior of having custom classes
570 # inserted after default ones (works only for custom user
571 # classes which are not aware of handler_order).
572 return True
573 return self.handler_order < other.handler_order
574
575
576class HTTPErrorProcessor(BaseHandler):
577 """Process HTTP error responses."""
578 handler_order = 1000 # after all other processing
579
580 def http_response(self, request, response):
581 code, msg, hdrs = response.code, response.msg, response.info()
582
583 # According to RFC 2616, "2xx" code indicates that the client's
584 # request was successfully received, understood, and accepted.
585 if not (200 <= code < 300):
586 response = self.parent.error(
587 'http', request, response, code, msg, hdrs)
588
589 return response
590
591 https_response = http_response
592
593class HTTPDefaultErrorHandler(BaseHandler):
594 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000595 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000596
597class HTTPRedirectHandler(BaseHandler):
598 # maximum number of redirections to any single URL
599 # this is needed because of the state that cookies introduce
600 max_repeats = 4
601 # maximum total number of redirections (regardless of URL) before
602 # assuming we're in a loop
603 max_redirections = 10
604
605 def redirect_request(self, req, fp, code, msg, headers, newurl):
606 """Return a Request or None in response to a redirect.
607
608 This is called by the http_error_30x methods when a
609 redirection response is received. If a redirection should
610 take place, return a new Request to allow http_error_30x to
611 perform the redirect. Otherwise, raise HTTPError if no-one
612 else should try to handle this url. Return None if you can't
613 but another Handler might.
614 """
615 m = req.get_method()
616 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
617 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000618 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000619
620 # Strictly (according to RFC 2616), 301 or 302 in response to
621 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000622 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000623 # essentially all clients do redirect in this case, so we do
624 # the same.
625 # be conciliant with URIs containing a space
626 newurl = newurl.replace(' ', '%20')
627 CONTENT_HEADERS = ("content-length", "content-type")
628 newheaders = dict((k, v) for k, v in req.headers.items()
629 if k.lower() not in CONTENT_HEADERS)
630 return Request(newurl,
631 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000632 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000633 unverifiable=True)
634
635 # Implementation note: To avoid the server sending us into an
636 # infinite loop, the request object needs to track what URLs we
637 # have already seen. Do this by adding a handler-specific
638 # attribute to the Request object.
639 def http_error_302(self, req, fp, code, msg, headers):
640 # Some servers (incorrectly) return multiple Location headers
641 # (so probably same goes for URI). Use first header.
642 if "location" in headers:
643 newurl = headers["location"]
644 elif "uri" in headers:
645 newurl = headers["uri"]
646 else:
647 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000648
649 # fix a possible malformed URL
650 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700651
652 # For security reasons we don't allow redirection to anything other
653 # than http, https or ftp.
654
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800655 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800656 raise HTTPError(
657 newurl, code,
658 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
659 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700660
Facundo Batistaf24802c2008-08-17 03:36:03 +0000661 if not urlparts.path:
662 urlparts = list(urlparts)
663 urlparts[2] = "/"
664 newurl = urlunparse(urlparts)
665
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000666 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000667
668 # XXX Probably want to forget about the state of the current
669 # request, although that might interact poorly with other
670 # handlers that also use handler-specific request attributes
671 new = self.redirect_request(req, fp, code, msg, headers, newurl)
672 if new is None:
673 return
674
675 # loop detection
676 # .redirect_dict has a key url if url was previously visited.
677 if hasattr(req, 'redirect_dict'):
678 visited = new.redirect_dict = req.redirect_dict
679 if (visited.get(newurl, 0) >= self.max_repeats or
680 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000681 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000682 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 else:
684 visited = new.redirect_dict = req.redirect_dict = {}
685 visited[newurl] = visited.get(newurl, 0) + 1
686
687 # Don't close the fp until we are sure that we won't use it
688 # with HTTPError.
689 fp.read()
690 fp.close()
691
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000692 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000693
694 http_error_301 = http_error_303 = http_error_307 = http_error_302
695
696 inf_msg = "The HTTP server returned a redirect error that would " \
697 "lead to an infinite loop.\n" \
698 "The last 30x error message was:\n"
699
700
701def _parse_proxy(proxy):
702 """Return (scheme, user, password, host/port) given a URL or an authority.
703
704 If a URL is supplied, it must have an authority (host:port) component.
705 According to RFC 3986, having an authority component means the URL must
706 have two slashes after the scheme:
707
708 >>> _parse_proxy('file:/ftp.example.com/')
709 Traceback (most recent call last):
710 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
711
712 The first three items of the returned tuple may be None.
713
714 Examples of authority parsing:
715
716 >>> _parse_proxy('proxy.example.com')
717 (None, None, None, 'proxy.example.com')
718 >>> _parse_proxy('proxy.example.com:3128')
719 (None, None, None, 'proxy.example.com:3128')
720
721 The authority component may optionally include userinfo (assumed to be
722 username:password):
723
724 >>> _parse_proxy('joe:password@proxy.example.com')
725 (None, 'joe', 'password', 'proxy.example.com')
726 >>> _parse_proxy('joe:password@proxy.example.com:3128')
727 (None, 'joe', 'password', 'proxy.example.com:3128')
728
729 Same examples, but with URLs instead:
730
731 >>> _parse_proxy('http://proxy.example.com/')
732 ('http', None, None, 'proxy.example.com')
733 >>> _parse_proxy('http://proxy.example.com:3128/')
734 ('http', None, None, 'proxy.example.com:3128')
735 >>> _parse_proxy('http://joe:password@proxy.example.com/')
736 ('http', 'joe', 'password', 'proxy.example.com')
737 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
738 ('http', 'joe', 'password', 'proxy.example.com:3128')
739
740 Everything after the authority is ignored:
741
742 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
743 ('ftp', 'joe', 'password', 'proxy.example.com')
744
745 Test for no trailing '/' case:
746
747 >>> _parse_proxy('http://joe:password@proxy.example.com')
748 ('http', 'joe', 'password', 'proxy.example.com')
749
750 """
Georg Brandl13e89462008-07-01 19:56:00 +0000751 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000752 if not r_scheme.startswith("/"):
753 # authority
754 scheme = None
755 authority = proxy
756 else:
757 # URL
758 if not r_scheme.startswith("//"):
759 raise ValueError("proxy URL with no authority: %r" % proxy)
760 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
761 # and 3.3.), path is empty or starts with '/'
762 end = r_scheme.find("/", 2)
763 if end == -1:
764 end = None
765 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000766 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000767 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000768 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000769 else:
770 user = password = None
771 return scheme, user, password, hostport
772
773class ProxyHandler(BaseHandler):
774 # Proxies must be in front
775 handler_order = 100
776
777 def __init__(self, proxies=None):
778 if proxies is None:
779 proxies = getproxies()
780 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
781 self.proxies = proxies
782 for type, url in proxies.items():
783 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200784 lambda r, proxy=url, type=type, meth=self.proxy_open:
785 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000786
787 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000788 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000789 proxy_type, user, password, hostport = _parse_proxy(proxy)
790 if proxy_type is None:
791 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000792
793 if req.host and proxy_bypass(req.host):
794 return None
795
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000796 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000797 user_pass = '%s:%s' % (unquote(user),
798 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000799 creds = base64.b64encode(user_pass.encode()).decode("ascii")
800 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000801 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000803 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 # let other handlers take care of it
805 return None
806 else:
807 # need to start over, because the other handlers don't
808 # grok the proxy's URL type
809 # e.g. if we have a constructor arg proxies like so:
810 # {'http': 'ftp://proxy.example.com'}, we may end up turning
811 # a request for http://acme.example.com/a into one for
812 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000813 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000814
815class HTTPPasswordMgr:
816
817 def __init__(self):
818 self.passwd = {}
819
820 def add_password(self, realm, uri, user, passwd):
821 # uri could be a single URI or a sequence
822 if isinstance(uri, str):
823 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800824 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 self.passwd[realm] = {}
826 for default_port in True, False:
827 reduced_uri = tuple(
828 [self.reduce_uri(u, default_port) for u in uri])
829 self.passwd[realm][reduced_uri] = (user, passwd)
830
831 def find_user_password(self, realm, authuri):
832 domains = self.passwd.get(realm, {})
833 for default_port in True, False:
834 reduced_authuri = self.reduce_uri(authuri, default_port)
835 for uris, authinfo in domains.items():
836 for uri in uris:
837 if self.is_suburi(uri, reduced_authuri):
838 return authinfo
839 return None, None
840
841 def reduce_uri(self, uri, default_port=True):
842 """Accept authority or URI and extract only the authority and path."""
843 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000844 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000845 if parts[1]:
846 # URI
847 scheme = parts[0]
848 authority = parts[1]
849 path = parts[2] or '/'
850 else:
851 # host or host:port
852 scheme = None
853 authority = uri
854 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000855 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000856 if default_port and port is None and scheme is not None:
857 dport = {"http": 80,
858 "https": 443,
859 }.get(scheme)
860 if dport is not None:
861 authority = "%s:%d" % (host, dport)
862 return authority, path
863
864 def is_suburi(self, base, test):
865 """Check if test is below base in a URI tree
866
867 Both args must be URIs in reduced form.
868 """
869 if base == test:
870 return True
871 if base[0] != test[0]:
872 return False
873 common = posixpath.commonprefix((base[1], test[1]))
874 if len(common) == len(base[1]):
875 return True
876 return False
877
878
879class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
880
881 def find_user_password(self, realm, authuri):
882 user, password = HTTPPasswordMgr.find_user_password(self, realm,
883 authuri)
884 if user is not None:
885 return user, password
886 return HTTPPasswordMgr.find_user_password(self, None, authuri)
887
888
889class AbstractBasicAuthHandler:
890
891 # XXX this allows for multiple auth-schemes, but will stupidly pick
892 # the last one with a realm specified.
893
894 # allow for double- and single-quoted realm values
895 # (single quotes are a violation of the RFC, but appear in the wild)
896 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800897 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000898
899 # XXX could pre-emptively send auth info already accepted (RFC 2617,
900 # end of section 2, and section 1.2 immediately after "credentials"
901 # production).
902
903 def __init__(self, password_mgr=None):
904 if password_mgr is None:
905 password_mgr = HTTPPasswordMgr()
906 self.passwd = password_mgr
907 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000908 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000909
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000910 def reset_retry_count(self):
911 self.retried = 0
912
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000913 def http_error_auth_reqed(self, authreq, host, req, headers):
914 # host may be an authority (without userinfo) or a URL with an
915 # authority
916 # XXX could be multiple headers
917 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000918
919 if self.retried > 5:
920 # retry sending the username:password 5 times before failing.
921 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
922 headers, None)
923 else:
924 self.retried += 1
925
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800927 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800928 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800929 raise ValueError("AbstractBasicAuthHandler does not"
930 " support the following scheme: '%s'" %
931 scheme)
932 else:
933 mo = AbstractBasicAuthHandler.rx.search(authreq)
934 if mo:
935 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800936 if quote not in ['"',"'"]:
937 warnings.warn("Basic Auth Realm was unquoted",
938 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800939 if scheme.lower() == 'basic':
940 response = self.retry_http_basic_auth(host, req, realm)
941 if response and response.code != 401:
942 self.retried = 0
943 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000944
945 def retry_http_basic_auth(self, host, req, realm):
946 user, pw = self.passwd.find_user_password(realm, host)
947 if pw is not None:
948 raw = "%s:%s" % (user, pw)
949 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
950 if req.headers.get(self.auth_header, None) == auth:
951 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000952 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000953 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000954 else:
955 return None
956
957
958class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
959
960 auth_header = 'Authorization'
961
962 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000963 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000964 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000966 self.reset_retry_count()
967 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000968
969
970class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
971
972 auth_header = 'Proxy-authorization'
973
974 def http_error_407(self, req, fp, code, msg, headers):
975 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000976 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000977 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
978 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000979 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000980 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000982 self.reset_retry_count()
983 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000984
985
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800986# Return n random bytes.
987_randombytes = os.urandom
988
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000989
990class AbstractDigestAuthHandler:
991 # Digest authentication is specified in RFC 2617.
992
993 # XXX The client does not inspect the Authentication-Info header
994 # in a successful response.
995
996 # XXX It should be possible to test this implementation against
997 # a mock server that just generates a static set of challenges.
998
999 # XXX qop="auth-int" supports is shaky
1000
1001 def __init__(self, passwd=None):
1002 if passwd is None:
1003 passwd = HTTPPasswordMgr()
1004 self.passwd = passwd
1005 self.add_password = self.passwd.add_password
1006 self.retried = 0
1007 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001008 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001009
1010 def reset_retry_count(self):
1011 self.retried = 0
1012
1013 def http_error_auth_reqed(self, auth_header, host, req, headers):
1014 authreq = headers.get(auth_header, None)
1015 if self.retried > 5:
1016 # Don't fail endlessly - if we failed once, we'll probably
1017 # fail a second time. Hm. Unless the Password Manager is
1018 # prompting for the information. Crap. This isn't great
1019 # but it's better than the current 'repeat until recursion
1020 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001021 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001022 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 else:
1024 self.retried += 1
1025 if authreq:
1026 scheme = authreq.split()[0]
1027 if scheme.lower() == 'digest':
1028 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001029 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001030 raise ValueError("AbstractDigestAuthHandler does not support"
1031 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001032
1033 def retry_http_digest_auth(self, req, auth):
1034 token, challenge = auth.split(' ', 1)
1035 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1036 auth = self.get_authorization(req, chal)
1037 if auth:
1038 auth_val = 'Digest %s' % auth
1039 if req.headers.get(self.auth_header, None) == auth_val:
1040 return None
1041 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001042 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001043 return resp
1044
1045 def get_cnonce(self, nonce):
1046 # The cnonce-value is an opaque
1047 # quoted string value provided by the client and used by both client
1048 # and server to avoid chosen plaintext attacks, to provide mutual
1049 # authentication, and to provide some message integrity protection.
1050 # This isn't a fabulous effort, but it's probably Good Enough.
1051 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001052 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001053 dig = hashlib.sha1(b).hexdigest()
1054 return dig[:16]
1055
1056 def get_authorization(self, req, chal):
1057 try:
1058 realm = chal['realm']
1059 nonce = chal['nonce']
1060 qop = chal.get('qop')
1061 algorithm = chal.get('algorithm', 'MD5')
1062 # mod_digest doesn't send an opaque, even though it isn't
1063 # supposed to be optional
1064 opaque = chal.get('opaque', None)
1065 except KeyError:
1066 return None
1067
1068 H, KD = self.get_algorithm_impls(algorithm)
1069 if H is None:
1070 return None
1071
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001072 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001073 if user is None:
1074 return None
1075
1076 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001077 if req.data is not None:
1078 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001079 else:
1080 entdig = None
1081
1082 A1 = "%s:%s:%s" % (user, realm, pw)
1083 A2 = "%s:%s" % (req.get_method(),
1084 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001085 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001086 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001087 if nonce == self.last_nonce:
1088 self.nonce_count += 1
1089 else:
1090 self.nonce_count = 1
1091 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001092 ncvalue = '%08x' % self.nonce_count
1093 cnonce = self.get_cnonce(nonce)
1094 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1095 respdig = KD(H(A1), noncebit)
1096 elif qop is None:
1097 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1098 else:
1099 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001100 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001101
1102 # XXX should the partial digests be encoded too?
1103
1104 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001105 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001106 respdig)
1107 if opaque:
1108 base += ', opaque="%s"' % opaque
1109 if entdig:
1110 base += ', digest="%s"' % entdig
1111 base += ', algorithm="%s"' % algorithm
1112 if qop:
1113 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1114 return base
1115
1116 def get_algorithm_impls(self, algorithm):
1117 # lambdas assume digest modules are imported at the top level
1118 if algorithm == 'MD5':
1119 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1120 elif algorithm == 'SHA':
1121 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1122 # XXX MD5-sess
1123 KD = lambda s, d: H("%s:%s" % (s, d))
1124 return H, KD
1125
1126 def get_entity_digest(self, data, chal):
1127 # XXX not implemented yet
1128 return None
1129
1130
1131class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1132 """An authentication protocol defined by RFC 2069
1133
1134 Digest authentication improves on basic authentication because it
1135 does not transmit passwords in the clear.
1136 """
1137
1138 auth_header = 'Authorization'
1139 handler_order = 490 # before Basic auth
1140
1141 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001142 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001143 retry = self.http_error_auth_reqed('www-authenticate',
1144 host, req, headers)
1145 self.reset_retry_count()
1146 return retry
1147
1148
1149class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1150
1151 auth_header = 'Proxy-Authorization'
1152 handler_order = 490 # before Basic auth
1153
1154 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001155 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001156 retry = self.http_error_auth_reqed('proxy-authenticate',
1157 host, req, headers)
1158 self.reset_retry_count()
1159 return retry
1160
1161class AbstractHTTPHandler(BaseHandler):
1162
1163 def __init__(self, debuglevel=0):
1164 self._debuglevel = debuglevel
1165
1166 def set_http_debuglevel(self, level):
1167 self._debuglevel = level
1168
1169 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001170 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001171 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001172 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001173
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001174 if request.data is not None: # POST
1175 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001176 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001177 msg = "POST data should be bytes or an iterable of bytes. " \
1178 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001179 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001180 if not request.has_header('Content-type'):
1181 request.add_unredirected_header(
1182 'Content-type',
1183 'application/x-www-form-urlencoded')
1184 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001185 try:
1186 mv = memoryview(data)
1187 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001188 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001189 raise ValueError("Content-Length should be specified "
1190 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001191 data))
1192 else:
1193 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001194 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001195
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001196 sel_host = host
1197 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001198 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001199 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001200 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001201 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001202 for name, value in self.parent.addheaders:
1203 name = name.capitalize()
1204 if not request.has_header(name):
1205 request.add_unredirected_header(name, value)
1206
1207 return request
1208
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001209 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001210 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001211
1212 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001213 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001214 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001216 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001218 # will parse host:port
1219 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001220
1221 headers = dict(req.unredirected_hdrs)
1222 headers.update(dict((k, v) for k, v in req.headers.items()
1223 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001224
1225 # TODO(jhylton): Should this be redesigned to handle
1226 # persistent connections?
1227
1228 # We want to make an HTTP/1.1 request, but the addinfourl
1229 # class isn't prepared to deal with a persistent connection.
1230 # It will try to read all remaining data from the socket,
1231 # which will block while the server waits for the next request.
1232 # So make sure the connection gets closed after the (only)
1233 # request.
1234 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001235 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001236
1237 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001238 tunnel_headers = {}
1239 proxy_auth_hdr = "Proxy-Authorization"
1240 if proxy_auth_hdr in headers:
1241 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1242 # Proxy-Authorization should not be sent to origin
1243 # server.
1244 del headers[proxy_auth_hdr]
1245 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001246
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001247 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001248 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001249 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001250 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001251 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001252 else:
1253 r = h.getresponse()
Senthil Kumaranb6fac242013-12-28 17:36:18 -08001254 # If the server does not send us a 'Connection: close' header,
1255 # HTTPConnection assumes the socket should be left open. Manually
1256 # mark the socket to be closed when this response object goes away.
1257 if h.sock:
1258 h.sock.close()
1259 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001260
Senthil Kumaran26430412011-04-13 07:01:19 +08001261 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001262 # This line replaces the .msg attribute of the HTTPResponse
1263 # with .headers, because urllib clients expect the response to
1264 # have the reason in .msg. It would be good to mark this
1265 # attribute is deprecated and get then to use info() or
1266 # .headers.
1267 r.msg = r.reason
1268 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001269
1270
1271class HTTPHandler(AbstractHTTPHandler):
1272
1273 def http_open(self, req):
1274 return self.do_open(http.client.HTTPConnection, req)
1275
1276 http_request = AbstractHTTPHandler.do_request_
1277
1278if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001279
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001280 class HTTPSHandler(AbstractHTTPHandler):
1281
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001282 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1283 AbstractHTTPHandler.__init__(self, debuglevel)
1284 self._context = context
1285 self._check_hostname = check_hostname
1286
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001287 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001288 return self.do_open(http.client.HTTPSConnection, req,
1289 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001290
1291 https_request = AbstractHTTPHandler.do_request_
1292
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001293 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001294
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001295class HTTPCookieProcessor(BaseHandler):
1296 def __init__(self, cookiejar=None):
1297 import http.cookiejar
1298 if cookiejar is None:
1299 cookiejar = http.cookiejar.CookieJar()
1300 self.cookiejar = cookiejar
1301
1302 def http_request(self, request):
1303 self.cookiejar.add_cookie_header(request)
1304 return request
1305
1306 def http_response(self, request, response):
1307 self.cookiejar.extract_cookies(response, request)
1308 return response
1309
1310 https_request = http_request
1311 https_response = http_response
1312
1313class UnknownHandler(BaseHandler):
1314 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001315 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001316 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001317
1318def parse_keqv_list(l):
1319 """Parse list of key=value strings where keys are not duplicated."""
1320 parsed = {}
1321 for elt in l:
1322 k, v = elt.split('=', 1)
1323 if v[0] == '"' and v[-1] == '"':
1324 v = v[1:-1]
1325 parsed[k] = v
1326 return parsed
1327
1328def parse_http_list(s):
1329 """Parse lists as described by RFC 2068 Section 2.
1330
1331 In particular, parse comma-separated lists where the elements of
1332 the list may include quoted-strings. A quoted-string could
1333 contain a comma. A non-quoted string could have quotes in the
1334 middle. Neither commas nor quotes count if they are escaped.
1335 Only double-quotes count, not single-quotes.
1336 """
1337 res = []
1338 part = ''
1339
1340 escape = quote = False
1341 for cur in s:
1342 if escape:
1343 part += cur
1344 escape = False
1345 continue
1346 if quote:
1347 if cur == '\\':
1348 escape = True
1349 continue
1350 elif cur == '"':
1351 quote = False
1352 part += cur
1353 continue
1354
1355 if cur == ',':
1356 res.append(part)
1357 part = ''
1358 continue
1359
1360 if cur == '"':
1361 quote = True
1362
1363 part += cur
1364
1365 # append last part
1366 if part:
1367 res.append(part)
1368
1369 return [part.strip() for part in res]
1370
1371class FileHandler(BaseHandler):
1372 # Use local file or FTP depending on form of URL
1373 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001374 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001375 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1376 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001377 if not req.host is self.get_names():
1378 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001379 else:
1380 return self.open_local_file(req)
1381
1382 # names for the localhost
1383 names = None
1384 def get_names(self):
1385 if FileHandler.names is None:
1386 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001387 FileHandler.names = tuple(
1388 socket.gethostbyname_ex('localhost')[2] +
1389 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001390 except socket.gaierror:
1391 FileHandler.names = (socket.gethostbyname('localhost'),)
1392 return FileHandler.names
1393
1394 # not entirely sure what the rules are here
1395 def open_local_file(self, req):
1396 import email.utils
1397 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001398 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001399 filename = req.selector
1400 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001401 try:
1402 stats = os.stat(localfile)
1403 size = stats.st_size
1404 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001405 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001406 headers = email.message_from_string(
1407 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1408 (mtype or 'text/plain', size, modified))
1409 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001410 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001411 if not host or \
1412 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001413 if host:
1414 origurl = 'file://' + host + filename
1415 else:
1416 origurl = 'file://' + filename
1417 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001418 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001419 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumarancad7b312012-10-27 02:26:46 -07001420 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001421 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001422
1423def _safe_gethostbyname(host):
1424 try:
1425 return socket.gethostbyname(host)
1426 except socket.gaierror:
1427 return None
1428
1429class FTPHandler(BaseHandler):
1430 def ftp_open(self, req):
1431 import ftplib
1432 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001433 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001434 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001435 raise URLError('ftp error: no host given')
1436 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001437 if port is None:
1438 port = ftplib.FTP_PORT
1439 else:
1440 port = int(port)
1441
1442 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001443 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001444 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001445 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001446 else:
1447 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001448 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001449 user = user or ''
1450 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001451
1452 try:
1453 host = socket.gethostbyname(host)
1454 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001455 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001456 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001457 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001458 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001459 dirs, file = dirs[:-1], dirs[-1]
1460 if dirs and not dirs[0]:
1461 dirs = dirs[1:]
1462 try:
1463 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1464 type = file and 'I' or 'D'
1465 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001466 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001467 if attr.lower() == 'type' and \
1468 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1469 type = value.upper()
1470 fp, retrlen = fw.retrfile(file, type)
1471 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001472 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001473 if mtype:
1474 headers += "Content-type: %s\n" % mtype
1475 if retrlen is not None and retrlen >= 0:
1476 headers += "Content-length: %d\n" % retrlen
1477 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001478 return addinfourl(fp, headers, req.full_url)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001479 except ftplib.all_errors as exp:
1480 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001481 raise exc.with_traceback(sys.exc_info()[2])
1482
1483 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001484 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1485 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001486
1487class CacheFTPHandler(FTPHandler):
1488 # XXX would be nice to have pluggable cache strategies
1489 # XXX this stuff is definitely not thread safe
1490 def __init__(self):
1491 self.cache = {}
1492 self.timeout = {}
1493 self.soonest = 0
1494 self.delay = 60
1495 self.max_conns = 16
1496
1497 def setTimeout(self, t):
1498 self.delay = t
1499
1500 def setMaxConns(self, m):
1501 self.max_conns = m
1502
1503 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1504 key = user, host, port, '/'.join(dirs), timeout
1505 if key in self.cache:
1506 self.timeout[key] = time.time() + self.delay
1507 else:
1508 self.cache[key] = ftpwrapper(user, passwd, host, port,
1509 dirs, timeout)
1510 self.timeout[key] = time.time() + self.delay
1511 self.check_cache()
1512 return self.cache[key]
1513
1514 def check_cache(self):
1515 # first check for old ones
1516 t = time.time()
1517 if self.soonest <= t:
1518 for k, v in list(self.timeout.items()):
1519 if v < t:
1520 self.cache[k].close()
1521 del self.cache[k]
1522 del self.timeout[k]
1523 self.soonest = min(list(self.timeout.values()))
1524
1525 # then check the size
1526 if len(self.cache) == self.max_conns:
1527 for k, v in list(self.timeout.items()):
1528 if v == self.soonest:
1529 del self.cache[k]
1530 del self.timeout[k]
1531 break
1532 self.soonest = min(list(self.timeout.values()))
1533
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001534 def clear_cache(self):
1535 for conn in self.cache.values():
1536 conn.close()
1537 self.cache.clear()
1538 self.timeout.clear()
1539
1540
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001541# Code move from the old urllib module
1542
1543MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1544
1545# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001546if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001547 from nturl2path import url2pathname, pathname2url
1548else:
1549 def url2pathname(pathname):
1550 """OS-specific conversion from a relative URL of the 'file' scheme
1551 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001552 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001553
1554 def pathname2url(pathname):
1555 """OS-specific conversion from a file system path to a relative URL
1556 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001557 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001558
1559# This really consists of two pieces:
1560# (1) a class which handles opening of all sorts of URLs
1561# (plus assorted utilities etc.)
1562# (2) a set of functions for parsing URLs
1563# XXX Should these be separated out into different modules?
1564
1565
1566ftpcache = {}
1567class URLopener:
1568 """Class to open URLs.
1569 This is a class rather than just a subroutine because we may need
1570 more than one set of global protocol-specific options.
1571 Note -- this is a base class for those who don't want the
1572 automatic handling of errors type 302 (relocated) and 401
1573 (authorization needed)."""
1574
1575 __tempfiles = None
1576
1577 version = "Python-urllib/%s" % __version__
1578
1579 # Constructor
1580 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001581 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001582 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1583 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001584 if proxies is None:
1585 proxies = getproxies()
1586 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1587 self.proxies = proxies
1588 self.key_file = x509.get('key_file')
1589 self.cert_file = x509.get('cert_file')
1590 self.addheaders = [('User-Agent', self.version)]
1591 self.__tempfiles = []
1592 self.__unlink = os.unlink # See cleanup()
1593 self.tempcache = None
1594 # Undocumented feature: if you assign {} to tempcache,
1595 # it is used to cache files retrieved with
1596 # self.retrieve(). This is not enabled by default
1597 # since it does not work for changing documents (and I
1598 # haven't got the logic to check expiration headers
1599 # yet).
1600 self.ftpcache = ftpcache
1601 # Undocumented feature: you can use a different
1602 # ftp cache by assigning to the .ftpcache member;
1603 # in case you want logically independent URL openers
1604 # XXX This is not threadsafe. Bah.
1605
1606 def __del__(self):
1607 self.close()
1608
1609 def close(self):
1610 self.cleanup()
1611
1612 def cleanup(self):
1613 # This code sometimes runs when the rest of this module
1614 # has already been deleted, so it can't use any globals
1615 # or import anything.
1616 if self.__tempfiles:
1617 for file in self.__tempfiles:
1618 try:
1619 self.__unlink(file)
1620 except OSError:
1621 pass
1622 del self.__tempfiles[:]
1623 if self.tempcache:
1624 self.tempcache.clear()
1625
1626 def addheader(self, *args):
1627 """Add a header to be used by the HTTP interface only
1628 e.g. u.addheader('Accept', 'sound/basic')"""
1629 self.addheaders.append(args)
1630
1631 # External interface
1632 def open(self, fullurl, data=None):
1633 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001634 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001635 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001636 if self.tempcache and fullurl in self.tempcache:
1637 filename, headers = self.tempcache[fullurl]
1638 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001639 return addinfourl(fp, headers, fullurl)
1640 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001641 if not urltype:
1642 urltype = 'file'
1643 if urltype in self.proxies:
1644 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001645 urltype, proxyhost = splittype(proxy)
1646 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001647 url = (host, fullurl) # Signal special case to open_*()
1648 else:
1649 proxy = None
1650 name = 'open_' + urltype
1651 self.type = urltype
1652 name = name.replace('-', '_')
1653 if not hasattr(self, name):
1654 if proxy:
1655 return self.open_unknown_proxy(proxy, fullurl, data)
1656 else:
1657 return self.open_unknown(fullurl, data)
1658 try:
1659 if data is None:
1660 return getattr(self, name)(url)
1661 else:
1662 return getattr(self, name)(url, data)
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001663 except HTTPError:
1664 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001665 except socket.error as msg:
1666 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1667
1668 def open_unknown(self, fullurl, data=None):
1669 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001670 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001671 raise IOError('url error', 'unknown url type', type)
1672
1673 def open_unknown_proxy(self, proxy, fullurl, data=None):
1674 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001675 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001676 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1677
1678 # External interface
1679 def retrieve(self, url, filename=None, reporthook=None, data=None):
1680 """retrieve(url) returns (filename, headers) for a local object
1681 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001682 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001683 if self.tempcache and url in self.tempcache:
1684 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001685 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001686 if filename is None and (not type or type == 'file'):
1687 try:
1688 fp = self.open_local_file(url1)
1689 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001690 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001691 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001692 except IOError as msg:
1693 pass
1694 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001695 try:
1696 headers = fp.info()
1697 if filename:
1698 tfp = open(filename, 'wb')
1699 else:
1700 import tempfile
1701 garbage, path = splittype(url)
1702 garbage, path = splithost(path or "")
1703 path, garbage = splitquery(path or "")
1704 path, garbage = splitattr(path or "")
1705 suffix = os.path.splitext(path)[1]
1706 (fd, filename) = tempfile.mkstemp(suffix)
1707 self.__tempfiles.append(filename)
1708 tfp = os.fdopen(fd, 'wb')
1709 try:
1710 result = filename, headers
1711 if self.tempcache is not None:
1712 self.tempcache[url] = result
1713 bs = 1024*8
1714 size = -1
1715 read = 0
1716 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001717 if "content-length" in headers:
1718 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001719 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001720 reporthook(blocknum, bs, size)
1721 while 1:
1722 block = fp.read(bs)
1723 if not block:
1724 break
1725 read += len(block)
1726 tfp.write(block)
1727 blocknum += 1
1728 if reporthook:
1729 reporthook(blocknum, bs, size)
1730 finally:
1731 tfp.close()
1732 finally:
1733 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001734
1735 # raise exception if actual size does not match content-length header
1736 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001737 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001738 "retrieval incomplete: got only %i out of %i bytes"
1739 % (read, size), result)
1740
1741 return result
1742
1743 # Each method named open_<type> knows how to open that type of URL
1744
1745 def _open_generic_http(self, connection_factory, url, data):
1746 """Make an HTTP connection using connection_class.
1747
1748 This is an internal method that should be called from
1749 open_http() or open_https().
1750
1751 Arguments:
1752 - connection_factory should take a host name and return an
1753 HTTPConnection instance.
1754 - url is the url to retrieval or a host, relative-path pair.
1755 - data is payload for a POST request or None.
1756 """
1757
1758 user_passwd = None
1759 proxy_passwd= None
1760 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001761 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001762 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001763 user_passwd, host = splituser(host)
1764 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001765 realhost = host
1766 else:
1767 host, selector = url
1768 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001769 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001770 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001771 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001772 url = rest
1773 user_passwd = None
1774 if urltype.lower() != 'http':
1775 realhost = None
1776 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001777 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001778 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001779 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001780 if user_passwd:
1781 selector = "%s://%s%s" % (urltype, realhost, rest)
1782 if proxy_bypass(realhost):
1783 host = realhost
1784
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001785 if not host: raise IOError('http error', 'no host given')
1786
1787 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001788 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001789 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001790 else:
1791 proxy_auth = None
1792
1793 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001794 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001795 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001796 else:
1797 auth = None
1798 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001799 headers = {}
1800 if proxy_auth:
1801 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1802 if auth:
1803 headers["Authorization"] = "Basic %s" % auth
1804 if realhost:
1805 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001806
1807 # Add Connection:close as we don't support persistent connections yet.
1808 # This helps in closing the socket and avoiding ResourceWarning
1809
1810 headers["Connection"] = "close"
1811
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001812 for header, value in self.addheaders:
1813 headers[header] = value
1814
1815 if data is not None:
1816 headers["Content-Type"] = "application/x-www-form-urlencoded"
1817 http_conn.request("POST", selector, data, headers)
1818 else:
1819 http_conn.request("GET", selector, headers=headers)
1820
1821 try:
1822 response = http_conn.getresponse()
1823 except http.client.BadStatusLine:
1824 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001825 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001826
1827 # According to RFC 2616, "2xx" code indicates that the client's
1828 # request was successfully received, understood, and accepted.
1829 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001830 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001831 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001832 else:
1833 return self.http_error(
1834 url, response.fp,
1835 response.status, response.reason, response.msg, data)
1836
1837 def open_http(self, url, data=None):
1838 """Use HTTP protocol."""
1839 return self._open_generic_http(http.client.HTTPConnection, url, data)
1840
1841 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1842 """Handle http errors.
1843
1844 Derived class can override this, or provide specific handlers
1845 named http_error_DDD where DDD is the 3-digit error code."""
1846 # First check if there's a specific handler for this error
1847 name = 'http_error_%d' % errcode
1848 if hasattr(self, name):
1849 method = getattr(self, name)
1850 if data is None:
1851 result = method(url, fp, errcode, errmsg, headers)
1852 else:
1853 result = method(url, fp, errcode, errmsg, headers, data)
1854 if result: return result
1855 return self.http_error_default(url, fp, errcode, errmsg, headers)
1856
1857 def http_error_default(self, url, fp, errcode, errmsg, headers):
1858 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001859 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001860 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001861
1862 if _have_ssl:
1863 def _https_connection(self, host):
1864 return http.client.HTTPSConnection(host,
1865 key_file=self.key_file,
1866 cert_file=self.cert_file)
1867
1868 def open_https(self, url, data=None):
1869 """Use HTTPS protocol."""
1870 return self._open_generic_http(self._https_connection, url, data)
1871
1872 def open_file(self, url):
1873 """Use local file or FTP depending on form of URL."""
1874 if not isinstance(url, str):
Senthil Kumarancad7b312012-10-27 02:26:46 -07001875 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001876 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001877 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001878 else:
1879 return self.open_local_file(url)
1880
1881 def open_local_file(self, url):
1882 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001883 import email.utils
1884 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001885 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001886 localname = url2pathname(file)
1887 try:
1888 stats = os.stat(localname)
1889 except OSError as e:
Senthil Kumarancad7b312012-10-27 02:26:46 -07001890 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001891 size = stats.st_size
1892 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1893 mtype = mimetypes.guess_type(url)[0]
1894 headers = email.message_from_string(
1895 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1896 (mtype or 'text/plain', size, modified))
1897 if not host:
1898 urlfile = file
1899 if file[:1] == '/':
1900 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001901 return addinfourl(open(localname, 'rb'), headers, urlfile)
1902 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001903 if (not port
Senthil Kumarancad7b312012-10-27 02:26:46 -07001904 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001905 urlfile = file
1906 if file[:1] == '/':
1907 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001908 elif file[:2] == './':
1909 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001910 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001911 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001912
1913 def open_ftp(self, url):
1914 """Use FTP protocol."""
1915 if not isinstance(url, str):
Senthil Kumarancad7b312012-10-27 02:26:46 -07001916 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001917 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001918 host, path = splithost(url)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001919 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001920 host, port = splitport(host)
1921 user, host = splituser(host)
1922 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001923 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001924 host = unquote(host)
1925 user = unquote(user or '')
1926 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001927 host = socket.gethostbyname(host)
1928 if not port:
1929 import ftplib
1930 port = ftplib.FTP_PORT
1931 else:
1932 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001933 path, attrs = splitattr(path)
1934 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001935 dirs = path.split('/')
1936 dirs, file = dirs[:-1], dirs[-1]
1937 if dirs and not dirs[0]: dirs = dirs[1:]
1938 if dirs and not dirs[0]: dirs[0] = '/'
1939 key = user, host, port, '/'.join(dirs)
1940 # XXX thread unsafe!
1941 if len(self.ftpcache) > MAXFTPCACHE:
1942 # Prune the cache, rather arbitrarily
1943 for k in self.ftpcache.keys():
1944 if k != key:
1945 v = self.ftpcache[k]
1946 del self.ftpcache[k]
1947 v.close()
1948 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001949 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001950 self.ftpcache[key] = \
1951 ftpwrapper(user, passwd, host, port, dirs)
1952 if not file: type = 'D'
1953 else: type = 'I'
1954 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001955 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001956 if attr.lower() == 'type' and \
1957 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1958 type = value.upper()
1959 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1960 mtype = mimetypes.guess_type("ftp:" + url)[0]
1961 headers = ""
1962 if mtype:
1963 headers += "Content-Type: %s\n" % mtype
1964 if retrlen is not None and retrlen >= 0:
1965 headers += "Content-Length: %d\n" % retrlen
1966 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001967 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001968 except ftperrors() as exp:
1969 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001970
1971 def open_data(self, url, data=None):
1972 """Use "data" URL."""
1973 if not isinstance(url, str):
Senthil Kumarancad7b312012-10-27 02:26:46 -07001974 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001975 # ignore POSTed data
1976 #
1977 # syntax of data URLs:
1978 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1979 # mediatype := [ type "/" subtype ] *( ";" parameter )
1980 # data := *urlchar
1981 # parameter := attribute "=" value
1982 try:
1983 [type, data] = url.split(',', 1)
1984 except ValueError:
1985 raise IOError('data error', 'bad data URL')
1986 if not type:
1987 type = 'text/plain;charset=US-ASCII'
1988 semi = type.rfind(';')
1989 if semi >= 0 and '=' not in type[semi:]:
1990 encoding = type[semi+1:]
1991 type = type[:semi]
1992 else:
1993 encoding = ''
1994 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001995 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001996 time.gmtime(time.time())))
1997 msg.append('Content-type: %s' % type)
1998 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00001999 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002000 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002001 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002002 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002003 msg.append('Content-Length: %d' % len(data))
2004 msg.append('')
2005 msg.append(data)
2006 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002007 headers = email.message_from_string(msg)
2008 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002009 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002010 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002011
2012
2013class FancyURLopener(URLopener):
2014 """Derived class with handlers for errors we can handle (perhaps)."""
2015
2016 def __init__(self, *args, **kwargs):
2017 URLopener.__init__(self, *args, **kwargs)
2018 self.auth_cache = {}
2019 self.tries = 0
2020 self.maxtries = 10
2021
2022 def http_error_default(self, url, fp, errcode, errmsg, headers):
2023 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002024 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002025
2026 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2027 """Error 302 -- relocated (temporarily)."""
2028 self.tries += 1
2029 if self.maxtries and self.tries >= self.maxtries:
2030 if hasattr(self, "http_error_500"):
2031 meth = self.http_error_500
2032 else:
2033 meth = self.http_error_default
2034 self.tries = 0
2035 return meth(url, fp, 500,
2036 "Internal Server Error: Redirect Recursion", headers)
2037 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2038 data)
2039 self.tries = 0
2040 return result
2041
2042 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2043 if 'location' in headers:
2044 newurl = headers['location']
2045 elif 'uri' in headers:
2046 newurl = headers['uri']
2047 else:
2048 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002049 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002050
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002051 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002052 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002053
2054 urlparts = urlparse(newurl)
2055
2056 # For security reasons, we don't allow redirection to anything other
2057 # than http, https and ftp.
2058
2059 # We are using newer HTTPError with older redirect_internal method
2060 # This older method will get deprecated in 3.3
2061
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002062 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002063 raise HTTPError(newurl, errcode,
2064 errmsg +
2065 " Redirection to url '%s' is not allowed." % newurl,
2066 headers, fp)
2067
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002068 return self.open(newurl)
2069
2070 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2071 """Error 301 -- also relocated (permanently)."""
2072 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2073
2074 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2075 """Error 303 -- also relocated (essentially identical to 302)."""
2076 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2077
2078 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2079 """Error 307 -- relocated, but turn POST into error."""
2080 if data is None:
2081 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2082 else:
2083 return self.http_error_default(url, fp, errcode, errmsg, headers)
2084
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002085 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2086 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002087 """Error 401 -- authentication required.
2088 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002089 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002090 URLopener.http_error_default(self, url, fp,
2091 errcode, errmsg, headers)
2092 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002093 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2094 if not match:
2095 URLopener.http_error_default(self, url, fp,
2096 errcode, errmsg, headers)
2097 scheme, realm = match.groups()
2098 if scheme.lower() != 'basic':
2099 URLopener.http_error_default(self, url, fp,
2100 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002101 if not retry:
2102 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2103 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002104 name = 'retry_' + self.type + '_basic_auth'
2105 if data is None:
2106 return getattr(self,name)(url, realm)
2107 else:
2108 return getattr(self,name)(url, realm, data)
2109
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002110 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2111 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002112 """Error 407 -- proxy authentication required.
2113 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002114 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002115 URLopener.http_error_default(self, url, fp,
2116 errcode, errmsg, headers)
2117 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002118 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2119 if not match:
2120 URLopener.http_error_default(self, url, fp,
2121 errcode, errmsg, headers)
2122 scheme, realm = match.groups()
2123 if scheme.lower() != 'basic':
2124 URLopener.http_error_default(self, url, fp,
2125 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002126 if not retry:
2127 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2128 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002129 name = 'retry_proxy_' + self.type + '_basic_auth'
2130 if data is None:
2131 return getattr(self,name)(url, realm)
2132 else:
2133 return getattr(self,name)(url, realm, data)
2134
2135 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002136 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002137 newurl = 'http://' + host + selector
2138 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002139 urltype, proxyhost = splittype(proxy)
2140 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002141 i = proxyhost.find('@') + 1
2142 proxyhost = proxyhost[i:]
2143 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2144 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002145 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002146 quote(passwd, safe=''), proxyhost)
2147 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2148 if data is None:
2149 return self.open(newurl)
2150 else:
2151 return self.open(newurl, data)
2152
2153 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002154 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002155 newurl = 'https://' + host + selector
2156 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002157 urltype, proxyhost = splittype(proxy)
2158 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002159 i = proxyhost.find('@') + 1
2160 proxyhost = proxyhost[i:]
2161 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2162 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002163 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002164 quote(passwd, safe=''), proxyhost)
2165 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2166 if data is None:
2167 return self.open(newurl)
2168 else:
2169 return self.open(newurl, data)
2170
2171 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002172 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002173 i = host.find('@') + 1
2174 host = host[i:]
2175 user, passwd = self.get_user_passwd(host, realm, i)
2176 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002177 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002178 quote(passwd, safe=''), host)
2179 newurl = 'http://' + host + selector
2180 if data is None:
2181 return self.open(newurl)
2182 else:
2183 return self.open(newurl, data)
2184
2185 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002186 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002187 i = host.find('@') + 1
2188 host = host[i:]
2189 user, passwd = self.get_user_passwd(host, realm, i)
2190 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002191 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002192 quote(passwd, safe=''), host)
2193 newurl = 'https://' + host + selector
2194 if data is None:
2195 return self.open(newurl)
2196 else:
2197 return self.open(newurl, data)
2198
Florent Xicluna757445b2010-05-17 17:24:07 +00002199 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002200 key = realm + '@' + host.lower()
2201 if key in self.auth_cache:
2202 if clear_cache:
2203 del self.auth_cache[key]
2204 else:
2205 return self.auth_cache[key]
2206 user, passwd = self.prompt_user_passwd(host, realm)
2207 if user or passwd: self.auth_cache[key] = (user, passwd)
2208 return user, passwd
2209
2210 def prompt_user_passwd(self, host, realm):
2211 """Override this in a GUI environment!"""
2212 import getpass
2213 try:
2214 user = input("Enter username for %s at %s: " % (realm, host))
2215 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2216 (user, realm, host))
2217 return user, passwd
2218 except KeyboardInterrupt:
2219 print()
2220 return None, None
2221
2222
2223# Utility functions
2224
2225_localhost = None
2226def localhost():
2227 """Return the IP address of the magic hostname 'localhost'."""
2228 global _localhost
2229 if _localhost is None:
2230 _localhost = socket.gethostbyname('localhost')
2231 return _localhost
2232
2233_thishost = None
2234def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002235 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002236 global _thishost
2237 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002238 try:
2239 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2240 except socket.gaierror:
2241 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002242 return _thishost
2243
2244_ftperrors = None
2245def ftperrors():
2246 """Return the set of errors raised by the FTP class."""
2247 global _ftperrors
2248 if _ftperrors is None:
2249 import ftplib
2250 _ftperrors = ftplib.all_errors
2251 return _ftperrors
2252
2253_noheaders = None
2254def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002255 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002256 global _noheaders
2257 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002258 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002259 return _noheaders
2260
2261
2262# Utility classes
2263
2264class ftpwrapper:
2265 """Class used by open_ftp() for cache of open FTP connections."""
2266
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002267 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2268 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002269 self.user = user
2270 self.passwd = passwd
2271 self.host = host
2272 self.port = port
2273 self.dirs = dirs
2274 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002275 self.refcount = 0
2276 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002277 self.init()
2278
2279 def init(self):
2280 import ftplib
2281 self.busy = 0
2282 self.ftp = ftplib.FTP()
2283 self.ftp.connect(self.host, self.port, self.timeout)
2284 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002285 _target = '/'.join(self.dirs)
2286 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002287
2288 def retrfile(self, file, type):
2289 import ftplib
2290 self.endtransfer()
2291 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2292 else: cmd = 'TYPE ' + type; isdir = 0
2293 try:
2294 self.ftp.voidcmd(cmd)
2295 except ftplib.all_errors:
2296 self.init()
2297 self.ftp.voidcmd(cmd)
2298 conn = None
2299 if file and not isdir:
2300 # Try to retrieve as a file
2301 try:
2302 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002303 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002304 except ftplib.error_perm as reason:
2305 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002306 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002307 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002308 if not conn:
2309 # Set transfer mode to ASCII!
2310 self.ftp.voidcmd('TYPE A')
2311 # Try a directory listing. Verify that directory exists.
2312 if file:
2313 pwd = self.ftp.pwd()
2314 try:
2315 try:
2316 self.ftp.cwd(file)
2317 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002318 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002319 finally:
2320 self.ftp.cwd(pwd)
2321 cmd = 'LIST ' + file
2322 else:
2323 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002324 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002325 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002326
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002327 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2328 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002329 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002330 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002331 return (ftpobj, retrlen)
2332
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002333 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002334 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002335
2336 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002337 self.keepalive = False
2338 if self.refcount <= 0:
2339 self.real_close()
2340
2341 def file_close(self):
2342 self.endtransfer()
2343 self.refcount -= 1
2344 if self.refcount <= 0 and not self.keepalive:
2345 self.real_close()
2346
2347 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002348 self.endtransfer()
2349 try:
2350 self.ftp.close()
2351 except ftperrors():
2352 pass
2353
2354# Proxy handling
2355def getproxies_environment():
2356 """Return a dictionary of scheme -> proxy server URL mappings.
2357
2358 Scan the environment for variables named <scheme>_proxy;
2359 this seems to be the standard convention. If you need a
2360 different way, you can pass a proxies dictionary to the
2361 [Fancy]URLopener constructor.
2362
2363 """
2364 proxies = {}
2365 for name, value in os.environ.items():
2366 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002367 if value and name[-6:] == '_proxy':
2368 proxies[name[:-6]] = value
2369 return proxies
2370
2371def proxy_bypass_environment(host):
2372 """Test if proxies should not be used for a particular host.
2373
2374 Checks the environment for a variable named no_proxy, which should
2375 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2376 """
2377 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2378 # '*' is special case for always bypass
2379 if no_proxy == '*':
2380 return 1
2381 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002382 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002383 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002384 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2385 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002386 if name and (hostonly.endswith(name) or host.endswith(name)):
2387 return 1
2388 # otherwise, don't bypass
2389 return 0
2390
2391
Ronald Oussorene72e1612011-03-14 18:15:25 -04002392# This code tests an OSX specific data structure but is testable on all
2393# platforms
2394def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2395 """
2396 Return True iff this host shouldn't be accessed using a proxy
2397
2398 This function uses the MacOSX framework SystemConfiguration
2399 to fetch the proxy information.
2400
2401 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2402 { 'exclude_simple': bool,
2403 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2404 }
2405 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002406 from fnmatch import fnmatch
2407
2408 hostonly, port = splitport(host)
2409
2410 def ip2num(ipAddr):
2411 parts = ipAddr.split('.')
2412 parts = list(map(int, parts))
2413 if len(parts) != 4:
2414 parts = (parts + [0, 0, 0, 0])[:4]
2415 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2416
2417 # Check for simple host names:
2418 if '.' not in host:
2419 if proxy_settings['exclude_simple']:
2420 return True
2421
2422 hostIP = None
2423
2424 for value in proxy_settings.get('exceptions', ()):
2425 # Items in the list are strings like these: *.local, 169.254/16
2426 if not value: continue
2427
2428 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2429 if m is not None:
2430 if hostIP is None:
2431 try:
2432 hostIP = socket.gethostbyname(hostonly)
2433 hostIP = ip2num(hostIP)
2434 except socket.error:
2435 continue
2436
2437 base = ip2num(m.group(1))
2438 mask = m.group(2)
2439 if mask is None:
2440 mask = 8 * (m.group(1).count('.') + 1)
2441 else:
2442 mask = int(mask[1:])
2443 mask = 32 - mask
2444
2445 if (hostIP >> mask) == (base >> mask):
2446 return True
2447
2448 elif fnmatch(host, value):
2449 return True
2450
2451 return False
2452
2453
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002454if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002455 from _scproxy import _get_proxy_settings, _get_proxies
2456
2457 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002458 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002459 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002460
2461 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002462 """Return a dictionary of scheme -> proxy server URL mappings.
2463
Ronald Oussoren84151202010-04-18 20:46:11 +00002464 This function uses the MacOSX framework SystemConfiguration
2465 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002466 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002467 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002468
Ronald Oussoren84151202010-04-18 20:46:11 +00002469
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002470
2471 def proxy_bypass(host):
2472 if getproxies_environment():
2473 return proxy_bypass_environment(host)
2474 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002475 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002476
2477 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002478 return getproxies_environment() or getproxies_macosx_sysconf()
2479
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002480
2481elif os.name == 'nt':
2482 def getproxies_registry():
2483 """Return a dictionary of scheme -> proxy server URL mappings.
2484
2485 Win32 uses the registry to store proxies.
2486
2487 """
2488 proxies = {}
2489 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002490 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002491 except ImportError:
2492 # Std module, so should be around - but you never know!
2493 return proxies
2494 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002495 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002496 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002497 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002498 'ProxyEnable')[0]
2499 if proxyEnable:
2500 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002501 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002502 'ProxyServer')[0])
2503 if '=' in proxyServer:
2504 # Per-protocol settings
2505 for p in proxyServer.split(';'):
2506 protocol, address = p.split('=', 1)
2507 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002508 if not re.match('^([^/:]+)://', address):
2509 address = '%s://%s' % (protocol, address)
2510 proxies[protocol] = address
2511 else:
2512 # Use one setting for all protocols
2513 if proxyServer[:5] == 'http:':
2514 proxies['http'] = proxyServer
2515 else:
2516 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002517 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002518 proxies['ftp'] = 'ftp://%s' % proxyServer
2519 internetSettings.Close()
2520 except (WindowsError, ValueError, TypeError):
2521 # Either registry key not found etc, or the value in an
2522 # unexpected format.
2523 # proxies already set up to be empty so nothing to do
2524 pass
2525 return proxies
2526
2527 def getproxies():
2528 """Return a dictionary of scheme -> proxy server URL mappings.
2529
2530 Returns settings gathered from the environment, if specified,
2531 or the registry.
2532
2533 """
2534 return getproxies_environment() or getproxies_registry()
2535
2536 def proxy_bypass_registry(host):
2537 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002538 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002539 except ImportError:
2540 # Std modules, so should be around - but you never know!
2541 return 0
2542 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002543 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002544 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002545 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002546 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002547 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002548 'ProxyOverride')[0])
2549 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2550 except WindowsError:
2551 return 0
2552 if not proxyEnable or not proxyOverride:
2553 return 0
2554 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002555 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002556 host = [rawHost]
2557 try:
2558 addr = socket.gethostbyname(rawHost)
2559 if addr != rawHost:
2560 host.append(addr)
2561 except socket.error:
2562 pass
2563 try:
2564 fqdn = socket.getfqdn(rawHost)
2565 if fqdn != rawHost:
2566 host.append(fqdn)
2567 except socket.error:
2568 pass
2569 # make a check value list from the registry entry: replace the
2570 # '<local>' string by the localhost entry and the corresponding
2571 # canonical entry.
2572 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002573 # now check if we match one of the registry values.
2574 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002575 if test == '<local>':
2576 if '.' not in rawHost:
2577 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002578 test = test.replace(".", r"\.") # mask dots
2579 test = test.replace("*", r".*") # change glob sequence
2580 test = test.replace("?", r".") # change glob char
2581 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002582 if re.match(test, val, re.I):
2583 return 1
2584 return 0
2585
2586 def proxy_bypass(host):
2587 """Return a dictionary of scheme -> proxy server URL mappings.
2588
2589 Returns settings gathered from the environment, if specified,
2590 or the registry.
2591
2592 """
2593 if getproxies_environment():
2594 return proxy_bypass_environment(host)
2595 else:
2596 return proxy_bypass_registry(host)
2597
2598else:
2599 # By default use environment variables
2600 getproxies = getproxies_environment
2601 proxy_bypass = proxy_bypass_environment