blob: a5f0866cb6b58d850b1201dee771d45e2f57c713 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b2012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Senthil Kumarand95cc752010-08-08 11:27:53 +0000106 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000107from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108
109# check for SSL
110try:
111 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000112except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 _have_ssl = False
114else:
115 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800117__all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
123 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
124 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
125 'UnknownHandler', 'HTTPErrorProcessor',
126 # Functions
127 'urlopen', 'install_opener', 'build_opener',
128 'pathname2url', 'url2pathname', 'getproxies',
129 # Legacy interface
130 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
131]
132
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133# used in User-Agent header sent
134__version__ = sys.version[:3]
135
136_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000137def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
138 *, cafile=None, capath=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000139 global _opener
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000140 if cafile or capath:
141 if not _have_ssl:
142 raise ValueError('SSL support not available')
143 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
144 context.options |= ssl.OP_NO_SSLv2
145 if cafile or capath:
146 context.verify_mode = ssl.CERT_REQUIRED
147 context.load_verify_locations(cafile, capath)
148 check_hostname = True
149 else:
150 check_hostname = False
151 https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
152 opener = build_opener(https_handler)
153 elif _opener is None:
154 _opener = opener = build_opener()
155 else:
156 opener = _opener
157 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000158
159def install_opener(opener):
160 global _opener
161 _opener = opener
162
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700163_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000164def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700165 """
166 Retrieve a URL into a temporary location on disk.
167
168 Requires a URL argument. If a filename is passed, it is used as
169 the temporary file location. The reporthook argument should be
170 a callable that accepts a block number, a read size, and the
171 total file size of the URL target. The data argument should be
172 valid URL encoded data.
173
174 If a filename is passed and the URL points to a local resource,
175 the result is a copy from local file to new file.
176
177 Returns a tuple containing the path to the newly created
178 data file as well as the resulting HTTPMessage object.
179 """
180 url_type, path = splittype(url)
181
182 with contextlib.closing(urlopen(url, data)) as fp:
183 headers = fp.info()
184
185 # Just return the local path and the "headers" for file://
186 # URLs. No sense in performing a copy unless requested.
187 if url_type == "file" and not filename:
188 return os.path.normpath(path), headers
189
190 # Handle temporary file setup.
191 if filename:
192 tfp = open(filename, 'wb')
193 else:
194 tfp = tempfile.NamedTemporaryFile(delete=False)
195 filename = tfp.name
196 _url_tempfiles.append(filename)
197
198 with tfp:
199 result = filename, headers
200 bs = 1024*8
201 size = -1
202 read = 0
203 blocknum = 0
204 if "content-length" in headers:
205 size = int(headers["Content-Length"])
206
207 if reporthook:
208 reporthook(blocknum, 0, size)
209
210 while True:
211 block = fp.read(bs)
212 if not block:
213 break
214 read += len(block)
215 tfp.write(block)
216 blocknum += 1
217 if reporthook:
218 reporthook(blocknum, len(block), size)
219
220 if size >= 0 and read < size:
221 raise ContentTooShortError(
222 "retrieval incomplete: got only %i out of %i bytes"
223 % (read, size), result)
224
225 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000226
227def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700228 for temp_file in _url_tempfiles:
229 try:
230 os.unlink(temp_file)
231 except EnvironmentError:
232 pass
233
234 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000235 global _opener
236 if _opener:
237 _opener = None
238
239# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000240_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000241def request_host(request):
242 """Return request-host, as defined by RFC 2965.
243
244 Variation from RFC: returned value is lowercased, for convenient
245 comparison.
246
247 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000248 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000249 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000250 if host == "":
251 host = request.get_header("Host", "")
252
253 # remove port, if present
254 host = _cut_port_re.sub("", host, 1)
255 return host.lower()
256
257class Request:
258
259 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800260 origin_req_host=None, unverifiable=False,
261 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000262 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000263 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800264 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000265 self.data = data
266 self.headers = {}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000267 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000268 for key, value in headers.items():
269 self.add_header(key, value)
270 self.unredirected_hdrs = {}
271 if origin_req_host is None:
272 origin_req_host = request_host(self)
273 self.origin_req_host = origin_req_host
274 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800275 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000276 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000277
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000278 def _parse(self):
279 self.type, rest = splittype(self.full_url)
280 if self.type is None:
281 raise ValueError("unknown url type: %s" % self.full_url)
282 self.host, self.selector = splithost(rest)
283 if self.host:
284 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000285
286 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800287 """Return a string indicating the HTTP request method."""
288 if self.method is not None:
289 return self.method
290 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000291 return "POST"
292 else:
293 return "GET"
294
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000295 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800296 if self.fragment:
297 return '%s#%s' % (self.full_url, self.fragment)
298 else:
299 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000300
Senthil Kumaran38b968b2012-03-14 13:43:53 -0700301 # Begin deprecated methods
302
303 def add_data(self, data):
304 msg = "Request.add_data method is deprecated."
305 warnings.warn(msg, DeprecationWarning, stacklevel=1)
306 self.data = data
307
308 def has_data(self):
309 msg = "Request.has_data method is deprecated."
310 warnings.warn(msg, DeprecationWarning, stacklevel=1)
311 return self.data is not None
312
313 def get_data(self):
314 msg = "Request.get_data method is deprecated."
315 warnings.warn(msg, DeprecationWarning, stacklevel=1)
316 return self.data
317
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000318 def get_type(self):
Senthil Kumaran38b968b2012-03-14 13:43:53 -0700319 msg = "Request.get_type method is deprecated."
320 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000321 return self.type
322
323 def get_host(self):
Senthil Kumaran38b968b2012-03-14 13:43:53 -0700324 msg = "Request.get_host method is deprecated."
325 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000326 return self.host
327
328 def get_selector(self):
Senthil Kumaran38b968b2012-03-14 13:43:53 -0700329 msg = "Request.get_selector method is deprecated."
330 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000331 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000332
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000333 def is_unverifiable(self):
Senthil Kumaran38b968b2012-03-14 13:43:53 -0700334 msg = "Request.is_unverifiable method is deprecated."
335 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000336 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000337
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000338 def get_origin_req_host(self):
Senthil Kumaran38b968b2012-03-14 13:43:53 -0700339 msg = "Request.get_origin_req_host method is deprecated."
340 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000341 return self.origin_req_host
342
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000343 # End deprecated methods
344
345 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000346 if self.type == 'https' and not self._tunnel_host:
347 self._tunnel_host = self.host
348 else:
349 self.type= type
350 self.selector = self.full_url
351 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000352
353 def has_proxy(self):
354 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000355
356 def add_header(self, key, val):
357 # useful for something like authentication
358 self.headers[key.capitalize()] = val
359
360 def add_unredirected_header(self, key, val):
361 # will not be added to a redirected request
362 self.unredirected_hdrs[key.capitalize()] = val
363
364 def has_header(self, header_name):
365 return (header_name in self.headers or
366 header_name in self.unredirected_hdrs)
367
368 def get_header(self, header_name, default=None):
369 return self.headers.get(
370 header_name,
371 self.unredirected_hdrs.get(header_name, default))
372
373 def header_items(self):
374 hdrs = self.unredirected_hdrs.copy()
375 hdrs.update(self.headers)
376 return list(hdrs.items())
377
378class OpenerDirector:
379 def __init__(self):
380 client_version = "Python-urllib/%s" % __version__
381 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000382 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000383 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000384 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000385 self.handle_open = {}
386 self.handle_error = {}
387 self.process_response = {}
388 self.process_request = {}
389
390 def add_handler(self, handler):
391 if not hasattr(handler, "add_parent"):
392 raise TypeError("expected BaseHandler instance, got %r" %
393 type(handler))
394
395 added = False
396 for meth in dir(handler):
397 if meth in ["redirect_request", "do_open", "proxy_open"]:
398 # oops, coincidental match
399 continue
400
401 i = meth.find("_")
402 protocol = meth[:i]
403 condition = meth[i+1:]
404
405 if condition.startswith("error"):
406 j = condition.find("_") + i + 1
407 kind = meth[j+1:]
408 try:
409 kind = int(kind)
410 except ValueError:
411 pass
412 lookup = self.handle_error.get(protocol, {})
413 self.handle_error[protocol] = lookup
414 elif condition == "open":
415 kind = protocol
416 lookup = self.handle_open
417 elif condition == "response":
418 kind = protocol
419 lookup = self.process_response
420 elif condition == "request":
421 kind = protocol
422 lookup = self.process_request
423 else:
424 continue
425
426 handlers = lookup.setdefault(kind, [])
427 if handlers:
428 bisect.insort(handlers, handler)
429 else:
430 handlers.append(handler)
431 added = True
432
433 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000434 bisect.insort(self.handlers, handler)
435 handler.add_parent(self)
436
437 def close(self):
438 # Only exists for backwards compatibility.
439 pass
440
441 def _call_chain(self, chain, kind, meth_name, *args):
442 # Handlers raise an exception if no one else should try to handle
443 # the request, or return None if they can't but another handler
444 # could. Otherwise, they return the response.
445 handlers = chain.get(kind, ())
446 for handler in handlers:
447 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000448 result = func(*args)
449 if result is not None:
450 return result
451
452 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
453 # accept a URL or a Request object
454 if isinstance(fullurl, str):
455 req = Request(fullurl, data)
456 else:
457 req = fullurl
458 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000459 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000460
461 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000462 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000463
464 # pre-process request
465 meth_name = protocol+"_request"
466 for processor in self.process_request.get(protocol, []):
467 meth = getattr(processor, meth_name)
468 req = meth(req)
469
470 response = self._open(req, data)
471
472 # post-process response
473 meth_name = protocol+"_response"
474 for processor in self.process_response.get(protocol, []):
475 meth = getattr(processor, meth_name)
476 response = meth(req, response)
477
478 return response
479
480 def _open(self, req, data=None):
481 result = self._call_chain(self.handle_open, 'default',
482 'default_open', req)
483 if result:
484 return result
485
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000486 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000487 result = self._call_chain(self.handle_open, protocol, protocol +
488 '_open', req)
489 if result:
490 return result
491
492 return self._call_chain(self.handle_open, 'unknown',
493 'unknown_open', req)
494
495 def error(self, proto, *args):
496 if proto in ('http', 'https'):
497 # XXX http[s] protocols are special-cased
498 dict = self.handle_error['http'] # https is not different than http
499 proto = args[2] # YUCK!
500 meth_name = 'http_error_%s' % proto
501 http_err = 1
502 orig_args = args
503 else:
504 dict = self.handle_error
505 meth_name = proto + '_error'
506 http_err = 0
507 args = (dict, proto, meth_name) + args
508 result = self._call_chain(*args)
509 if result:
510 return result
511
512 if http_err:
513 args = (dict, 'default', 'http_error_default') + orig_args
514 return self._call_chain(*args)
515
516# XXX probably also want an abstract factory that knows when it makes
517# sense to skip a superclass in favor of a subclass and when it might
518# make sense to include both
519
520def build_opener(*handlers):
521 """Create an opener object from a list of handlers.
522
523 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000524 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000525
526 If any of the handlers passed as arguments are subclasses of the
527 default handlers, the default handlers will not be used.
528 """
529 def isclass(obj):
530 return isinstance(obj, type) or hasattr(obj, "__bases__")
531
532 opener = OpenerDirector()
533 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
534 HTTPDefaultErrorHandler, HTTPRedirectHandler,
535 FTPHandler, FileHandler, HTTPErrorProcessor]
536 if hasattr(http.client, "HTTPSConnection"):
537 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000538 skip = set()
539 for klass in default_classes:
540 for check in handlers:
541 if isclass(check):
542 if issubclass(check, klass):
543 skip.add(klass)
544 elif isinstance(check, klass):
545 skip.add(klass)
546 for klass in skip:
547 default_classes.remove(klass)
548
549 for klass in default_classes:
550 opener.add_handler(klass())
551
552 for h in handlers:
553 if isclass(h):
554 h = h()
555 opener.add_handler(h)
556 return opener
557
558class BaseHandler:
559 handler_order = 500
560
561 def add_parent(self, parent):
562 self.parent = parent
563
564 def close(self):
565 # Only exists for backwards compatibility
566 pass
567
568 def __lt__(self, other):
569 if not hasattr(other, "handler_order"):
570 # Try to preserve the old behavior of having custom classes
571 # inserted after default ones (works only for custom user
572 # classes which are not aware of handler_order).
573 return True
574 return self.handler_order < other.handler_order
575
576
577class HTTPErrorProcessor(BaseHandler):
578 """Process HTTP error responses."""
579 handler_order = 1000 # after all other processing
580
581 def http_response(self, request, response):
582 code, msg, hdrs = response.code, response.msg, response.info()
583
584 # According to RFC 2616, "2xx" code indicates that the client's
585 # request was successfully received, understood, and accepted.
586 if not (200 <= code < 300):
587 response = self.parent.error(
588 'http', request, response, code, msg, hdrs)
589
590 return response
591
592 https_response = http_response
593
594class HTTPDefaultErrorHandler(BaseHandler):
595 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000596 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000597
598class HTTPRedirectHandler(BaseHandler):
599 # maximum number of redirections to any single URL
600 # this is needed because of the state that cookies introduce
601 max_repeats = 4
602 # maximum total number of redirections (regardless of URL) before
603 # assuming we're in a loop
604 max_redirections = 10
605
606 def redirect_request(self, req, fp, code, msg, headers, newurl):
607 """Return a Request or None in response to a redirect.
608
609 This is called by the http_error_30x methods when a
610 redirection response is received. If a redirection should
611 take place, return a new Request to allow http_error_30x to
612 perform the redirect. Otherwise, raise HTTPError if no-one
613 else should try to handle this url. Return None if you can't
614 but another Handler might.
615 """
616 m = req.get_method()
617 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
618 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000619 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000620
621 # Strictly (according to RFC 2616), 301 or 302 in response to
622 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000623 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000624 # essentially all clients do redirect in this case, so we do
625 # the same.
626 # be conciliant with URIs containing a space
627 newurl = newurl.replace(' ', '%20')
628 CONTENT_HEADERS = ("content-length", "content-type")
629 newheaders = dict((k, v) for k, v in req.headers.items()
630 if k.lower() not in CONTENT_HEADERS)
631 return Request(newurl,
632 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000633 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000634 unverifiable=True)
635
636 # Implementation note: To avoid the server sending us into an
637 # infinite loop, the request object needs to track what URLs we
638 # have already seen. Do this by adding a handler-specific
639 # attribute to the Request object.
640 def http_error_302(self, req, fp, code, msg, headers):
641 # Some servers (incorrectly) return multiple Location headers
642 # (so probably same goes for URI). Use first header.
643 if "location" in headers:
644 newurl = headers["location"]
645 elif "uri" in headers:
646 newurl = headers["uri"]
647 else:
648 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000649
650 # fix a possible malformed URL
651 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700652
653 # For security reasons we don't allow redirection to anything other
654 # than http, https or ftp.
655
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800656 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800657 raise HTTPError(
658 newurl, code,
659 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
660 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700661
Facundo Batistaf24802c2008-08-17 03:36:03 +0000662 if not urlparts.path:
663 urlparts = list(urlparts)
664 urlparts[2] = "/"
665 newurl = urlunparse(urlparts)
666
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000667 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000668
669 # XXX Probably want to forget about the state of the current
670 # request, although that might interact poorly with other
671 # handlers that also use handler-specific request attributes
672 new = self.redirect_request(req, fp, code, msg, headers, newurl)
673 if new is None:
674 return
675
676 # loop detection
677 # .redirect_dict has a key url if url was previously visited.
678 if hasattr(req, 'redirect_dict'):
679 visited = new.redirect_dict = req.redirect_dict
680 if (visited.get(newurl, 0) >= self.max_repeats or
681 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000682 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000683 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000684 else:
685 visited = new.redirect_dict = req.redirect_dict = {}
686 visited[newurl] = visited.get(newurl, 0) + 1
687
688 # Don't close the fp until we are sure that we won't use it
689 # with HTTPError.
690 fp.read()
691 fp.close()
692
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000693 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000694
695 http_error_301 = http_error_303 = http_error_307 = http_error_302
696
697 inf_msg = "The HTTP server returned a redirect error that would " \
698 "lead to an infinite loop.\n" \
699 "The last 30x error message was:\n"
700
701
702def _parse_proxy(proxy):
703 """Return (scheme, user, password, host/port) given a URL or an authority.
704
705 If a URL is supplied, it must have an authority (host:port) component.
706 According to RFC 3986, having an authority component means the URL must
707 have two slashes after the scheme:
708
709 >>> _parse_proxy('file:/ftp.example.com/')
710 Traceback (most recent call last):
711 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
712
713 The first three items of the returned tuple may be None.
714
715 Examples of authority parsing:
716
717 >>> _parse_proxy('proxy.example.com')
718 (None, None, None, 'proxy.example.com')
719 >>> _parse_proxy('proxy.example.com:3128')
720 (None, None, None, 'proxy.example.com:3128')
721
722 The authority component may optionally include userinfo (assumed to be
723 username:password):
724
725 >>> _parse_proxy('joe:password@proxy.example.com')
726 (None, 'joe', 'password', 'proxy.example.com')
727 >>> _parse_proxy('joe:password@proxy.example.com:3128')
728 (None, 'joe', 'password', 'proxy.example.com:3128')
729
730 Same examples, but with URLs instead:
731
732 >>> _parse_proxy('http://proxy.example.com/')
733 ('http', None, None, 'proxy.example.com')
734 >>> _parse_proxy('http://proxy.example.com:3128/')
735 ('http', None, None, 'proxy.example.com:3128')
736 >>> _parse_proxy('http://joe:password@proxy.example.com/')
737 ('http', 'joe', 'password', 'proxy.example.com')
738 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
739 ('http', 'joe', 'password', 'proxy.example.com:3128')
740
741 Everything after the authority is ignored:
742
743 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
744 ('ftp', 'joe', 'password', 'proxy.example.com')
745
746 Test for no trailing '/' case:
747
748 >>> _parse_proxy('http://joe:password@proxy.example.com')
749 ('http', 'joe', 'password', 'proxy.example.com')
750
751 """
Georg Brandl13e89462008-07-01 19:56:00 +0000752 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000753 if not r_scheme.startswith("/"):
754 # authority
755 scheme = None
756 authority = proxy
757 else:
758 # URL
759 if not r_scheme.startswith("//"):
760 raise ValueError("proxy URL with no authority: %r" % proxy)
761 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
762 # and 3.3.), path is empty or starts with '/'
763 end = r_scheme.find("/", 2)
764 if end == -1:
765 end = None
766 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000767 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000768 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000769 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000770 else:
771 user = password = None
772 return scheme, user, password, hostport
773
774class ProxyHandler(BaseHandler):
775 # Proxies must be in front
776 handler_order = 100
777
778 def __init__(self, proxies=None):
779 if proxies is None:
780 proxies = getproxies()
781 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
782 self.proxies = proxies
783 for type, url in proxies.items():
784 setattr(self, '%s_open' % type,
785 lambda r, proxy=url, type=type, meth=self.proxy_open: \
786 meth(r, proxy, type))
787
788 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000789 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000790 proxy_type, user, password, hostport = _parse_proxy(proxy)
791 if proxy_type is None:
792 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000793
794 if req.host and proxy_bypass(req.host):
795 return None
796
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000797 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000798 user_pass = '%s:%s' % (unquote(user),
799 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000800 creds = base64.b64encode(user_pass.encode()).decode("ascii")
801 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000802 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000804 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000805 # let other handlers take care of it
806 return None
807 else:
808 # need to start over, because the other handlers don't
809 # grok the proxy's URL type
810 # e.g. if we have a constructor arg proxies like so:
811 # {'http': 'ftp://proxy.example.com'}, we may end up turning
812 # a request for http://acme.example.com/a into one for
813 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000814 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815
816class HTTPPasswordMgr:
817
818 def __init__(self):
819 self.passwd = {}
820
821 def add_password(self, realm, uri, user, passwd):
822 # uri could be a single URI or a sequence
823 if isinstance(uri, str):
824 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800825 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000826 self.passwd[realm] = {}
827 for default_port in True, False:
828 reduced_uri = tuple(
829 [self.reduce_uri(u, default_port) for u in uri])
830 self.passwd[realm][reduced_uri] = (user, passwd)
831
832 def find_user_password(self, realm, authuri):
833 domains = self.passwd.get(realm, {})
834 for default_port in True, False:
835 reduced_authuri = self.reduce_uri(authuri, default_port)
836 for uris, authinfo in domains.items():
837 for uri in uris:
838 if self.is_suburi(uri, reduced_authuri):
839 return authinfo
840 return None, None
841
842 def reduce_uri(self, uri, default_port=True):
843 """Accept authority or URI and extract only the authority and path."""
844 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000845 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000846 if parts[1]:
847 # URI
848 scheme = parts[0]
849 authority = parts[1]
850 path = parts[2] or '/'
851 else:
852 # host or host:port
853 scheme = None
854 authority = uri
855 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000856 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000857 if default_port and port is None and scheme is not None:
858 dport = {"http": 80,
859 "https": 443,
860 }.get(scheme)
861 if dport is not None:
862 authority = "%s:%d" % (host, dport)
863 return authority, path
864
865 def is_suburi(self, base, test):
866 """Check if test is below base in a URI tree
867
868 Both args must be URIs in reduced form.
869 """
870 if base == test:
871 return True
872 if base[0] != test[0]:
873 return False
874 common = posixpath.commonprefix((base[1], test[1]))
875 if len(common) == len(base[1]):
876 return True
877 return False
878
879
880class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
881
882 def find_user_password(self, realm, authuri):
883 user, password = HTTPPasswordMgr.find_user_password(self, realm,
884 authuri)
885 if user is not None:
886 return user, password
887 return HTTPPasswordMgr.find_user_password(self, None, authuri)
888
889
890class AbstractBasicAuthHandler:
891
892 # XXX this allows for multiple auth-schemes, but will stupidly pick
893 # the last one with a realm specified.
894
895 # allow for double- and single-quoted realm values
896 # (single quotes are a violation of the RFC, but appear in the wild)
897 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
898 'realm=(["\'])(.*?)\\2', re.I)
899
900 # XXX could pre-emptively send auth info already accepted (RFC 2617,
901 # end of section 2, and section 1.2 immediately after "credentials"
902 # production).
903
904 def __init__(self, password_mgr=None):
905 if password_mgr is None:
906 password_mgr = HTTPPasswordMgr()
907 self.passwd = password_mgr
908 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000909 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000910
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000911 def reset_retry_count(self):
912 self.retried = 0
913
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000914 def http_error_auth_reqed(self, authreq, host, req, headers):
915 # host may be an authority (without userinfo) or a URL with an
916 # authority
917 # XXX could be multiple headers
918 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000919
920 if self.retried > 5:
921 # retry sending the username:password 5 times before failing.
922 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
923 headers, None)
924 else:
925 self.retried += 1
926
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000927 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800928 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800929 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800930 raise ValueError("AbstractBasicAuthHandler does not"
931 " support the following scheme: '%s'" %
932 scheme)
933 else:
934 mo = AbstractBasicAuthHandler.rx.search(authreq)
935 if mo:
936 scheme, quote, realm = mo.groups()
937 if scheme.lower() == 'basic':
938 response = self.retry_http_basic_auth(host, req, realm)
939 if response and response.code != 401:
940 self.retried = 0
941 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000942
943 def retry_http_basic_auth(self, host, req, realm):
944 user, pw = self.passwd.find_user_password(realm, host)
945 if pw is not None:
946 raw = "%s:%s" % (user, pw)
947 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
948 if req.headers.get(self.auth_header, None) == auth:
949 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000950 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000951 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000952 else:
953 return None
954
955
956class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
957
958 auth_header = 'Authorization'
959
960 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000961 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000962 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000963 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000964 self.reset_retry_count()
965 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000966
967
968class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
969
970 auth_header = 'Proxy-authorization'
971
972 def http_error_407(self, req, fp, code, msg, headers):
973 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000974 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000975 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
976 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000977 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000978 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000979 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000980 self.reset_retry_count()
981 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000982
983
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800984# Return n random bytes.
985_randombytes = os.urandom
986
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987
988class AbstractDigestAuthHandler:
989 # Digest authentication is specified in RFC 2617.
990
991 # XXX The client does not inspect the Authentication-Info header
992 # in a successful response.
993
994 # XXX It should be possible to test this implementation against
995 # a mock server that just generates a static set of challenges.
996
997 # XXX qop="auth-int" supports is shaky
998
999 def __init__(self, passwd=None):
1000 if passwd is None:
1001 passwd = HTTPPasswordMgr()
1002 self.passwd = passwd
1003 self.add_password = self.passwd.add_password
1004 self.retried = 0
1005 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001006 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001007
1008 def reset_retry_count(self):
1009 self.retried = 0
1010
1011 def http_error_auth_reqed(self, auth_header, host, req, headers):
1012 authreq = headers.get(auth_header, None)
1013 if self.retried > 5:
1014 # Don't fail endlessly - if we failed once, we'll probably
1015 # fail a second time. Hm. Unless the Password Manager is
1016 # prompting for the information. Crap. This isn't great
1017 # but it's better than the current 'repeat until recursion
1018 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001019 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001020 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 else:
1022 self.retried += 1
1023 if authreq:
1024 scheme = authreq.split()[0]
1025 if scheme.lower() == 'digest':
1026 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001027 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001028 raise ValueError("AbstractDigestAuthHandler does not support"
1029 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001030
1031 def retry_http_digest_auth(self, req, auth):
1032 token, challenge = auth.split(' ', 1)
1033 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1034 auth = self.get_authorization(req, chal)
1035 if auth:
1036 auth_val = 'Digest %s' % auth
1037 if req.headers.get(self.auth_header, None) == auth_val:
1038 return None
1039 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001040 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001041 return resp
1042
1043 def get_cnonce(self, nonce):
1044 # The cnonce-value is an opaque
1045 # quoted string value provided by the client and used by both client
1046 # and server to avoid chosen plaintext attacks, to provide mutual
1047 # authentication, and to provide some message integrity protection.
1048 # This isn't a fabulous effort, but it's probably Good Enough.
1049 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001050 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001051 dig = hashlib.sha1(b).hexdigest()
1052 return dig[:16]
1053
1054 def get_authorization(self, req, chal):
1055 try:
1056 realm = chal['realm']
1057 nonce = chal['nonce']
1058 qop = chal.get('qop')
1059 algorithm = chal.get('algorithm', 'MD5')
1060 # mod_digest doesn't send an opaque, even though it isn't
1061 # supposed to be optional
1062 opaque = chal.get('opaque', None)
1063 except KeyError:
1064 return None
1065
1066 H, KD = self.get_algorithm_impls(algorithm)
1067 if H is None:
1068 return None
1069
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001070 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001071 if user is None:
1072 return None
1073
1074 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001075 if req.data is not None:
1076 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001077 else:
1078 entdig = None
1079
1080 A1 = "%s:%s:%s" % (user, realm, pw)
1081 A2 = "%s:%s" % (req.get_method(),
1082 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001083 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001084 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001085 if nonce == self.last_nonce:
1086 self.nonce_count += 1
1087 else:
1088 self.nonce_count = 1
1089 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 ncvalue = '%08x' % self.nonce_count
1091 cnonce = self.get_cnonce(nonce)
1092 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1093 respdig = KD(H(A1), noncebit)
1094 elif qop is None:
1095 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1096 else:
1097 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001098 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001099
1100 # XXX should the partial digests be encoded too?
1101
1102 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001103 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001104 respdig)
1105 if opaque:
1106 base += ', opaque="%s"' % opaque
1107 if entdig:
1108 base += ', digest="%s"' % entdig
1109 base += ', algorithm="%s"' % algorithm
1110 if qop:
1111 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1112 return base
1113
1114 def get_algorithm_impls(self, algorithm):
1115 # lambdas assume digest modules are imported at the top level
1116 if algorithm == 'MD5':
1117 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1118 elif algorithm == 'SHA':
1119 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1120 # XXX MD5-sess
1121 KD = lambda s, d: H("%s:%s" % (s, d))
1122 return H, KD
1123
1124 def get_entity_digest(self, data, chal):
1125 # XXX not implemented yet
1126 return None
1127
1128
1129class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1130 """An authentication protocol defined by RFC 2069
1131
1132 Digest authentication improves on basic authentication because it
1133 does not transmit passwords in the clear.
1134 """
1135
1136 auth_header = 'Authorization'
1137 handler_order = 490 # before Basic auth
1138
1139 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001140 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001141 retry = self.http_error_auth_reqed('www-authenticate',
1142 host, req, headers)
1143 self.reset_retry_count()
1144 return retry
1145
1146
1147class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1148
1149 auth_header = 'Proxy-Authorization'
1150 handler_order = 490 # before Basic auth
1151
1152 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001153 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001154 retry = self.http_error_auth_reqed('proxy-authenticate',
1155 host, req, headers)
1156 self.reset_retry_count()
1157 return retry
1158
1159class AbstractHTTPHandler(BaseHandler):
1160
1161 def __init__(self, debuglevel=0):
1162 self._debuglevel = debuglevel
1163
1164 def set_http_debuglevel(self, level):
1165 self._debuglevel = level
1166
1167 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001168 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001169 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001170 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001171
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001172 if request.data is not None: # POST
1173 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001174 if isinstance(data, str):
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001175 msg = "POST data should be bytes or an iterable of bytes."\
1176 "It cannot be str"
1177 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001178 if not request.has_header('Content-type'):
1179 request.add_unredirected_header(
1180 'Content-type',
1181 'application/x-www-form-urlencoded')
1182 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001183 try:
1184 mv = memoryview(data)
1185 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001186 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001187 raise ValueError("Content-Length should be specified "
1188 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001189 data))
1190 else:
1191 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001192 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001193
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001194 sel_host = host
1195 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001196 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001197 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001198 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001199 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001200 for name, value in self.parent.addheaders:
1201 name = name.capitalize()
1202 if not request.has_header(name):
1203 request.add_unredirected_header(name, value)
1204
1205 return request
1206
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001207 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001208 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001209
1210 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001211 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001212 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001213 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001214 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001216 # will parse host:port
1217 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001218
1219 headers = dict(req.unredirected_hdrs)
1220 headers.update(dict((k, v) for k, v in req.headers.items()
1221 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001222
1223 # TODO(jhylton): Should this be redesigned to handle
1224 # persistent connections?
1225
1226 # We want to make an HTTP/1.1 request, but the addinfourl
1227 # class isn't prepared to deal with a persistent connection.
1228 # It will try to read all remaining data from the socket,
1229 # which will block while the server waits for the next request.
1230 # So make sure the connection gets closed after the (only)
1231 # request.
1232 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001233 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001234
1235 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001236 tunnel_headers = {}
1237 proxy_auth_hdr = "Proxy-Authorization"
1238 if proxy_auth_hdr in headers:
1239 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1240 # Proxy-Authorization should not be sent to origin
1241 # server.
1242 del headers[proxy_auth_hdr]
1243 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001244
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001245 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001246 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001247 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001248 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001249 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001250 else:
1251 r = h.getresponse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001252
Senthil Kumaran26430412011-04-13 07:01:19 +08001253 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001254 # This line replaces the .msg attribute of the HTTPResponse
1255 # with .headers, because urllib clients expect the response to
1256 # have the reason in .msg. It would be good to mark this
1257 # attribute is deprecated and get then to use info() or
1258 # .headers.
1259 r.msg = r.reason
1260 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001261
1262
1263class HTTPHandler(AbstractHTTPHandler):
1264
1265 def http_open(self, req):
1266 return self.do_open(http.client.HTTPConnection, req)
1267
1268 http_request = AbstractHTTPHandler.do_request_
1269
1270if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001271
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001272 class HTTPSHandler(AbstractHTTPHandler):
1273
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001274 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1275 AbstractHTTPHandler.__init__(self, debuglevel)
1276 self._context = context
1277 self._check_hostname = check_hostname
1278
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001279 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001280 return self.do_open(http.client.HTTPSConnection, req,
1281 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001282
1283 https_request = AbstractHTTPHandler.do_request_
1284
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001285 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001286
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001287class HTTPCookieProcessor(BaseHandler):
1288 def __init__(self, cookiejar=None):
1289 import http.cookiejar
1290 if cookiejar is None:
1291 cookiejar = http.cookiejar.CookieJar()
1292 self.cookiejar = cookiejar
1293
1294 def http_request(self, request):
1295 self.cookiejar.add_cookie_header(request)
1296 return request
1297
1298 def http_response(self, request, response):
1299 self.cookiejar.extract_cookies(response, request)
1300 return response
1301
1302 https_request = http_request
1303 https_response = http_response
1304
1305class UnknownHandler(BaseHandler):
1306 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001307 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001308 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309
1310def parse_keqv_list(l):
1311 """Parse list of key=value strings where keys are not duplicated."""
1312 parsed = {}
1313 for elt in l:
1314 k, v = elt.split('=', 1)
1315 if v[0] == '"' and v[-1] == '"':
1316 v = v[1:-1]
1317 parsed[k] = v
1318 return parsed
1319
1320def parse_http_list(s):
1321 """Parse lists as described by RFC 2068 Section 2.
1322
1323 In particular, parse comma-separated lists where the elements of
1324 the list may include quoted-strings. A quoted-string could
1325 contain a comma. A non-quoted string could have quotes in the
1326 middle. Neither commas nor quotes count if they are escaped.
1327 Only double-quotes count, not single-quotes.
1328 """
1329 res = []
1330 part = ''
1331
1332 escape = quote = False
1333 for cur in s:
1334 if escape:
1335 part += cur
1336 escape = False
1337 continue
1338 if quote:
1339 if cur == '\\':
1340 escape = True
1341 continue
1342 elif cur == '"':
1343 quote = False
1344 part += cur
1345 continue
1346
1347 if cur == ',':
1348 res.append(part)
1349 part = ''
1350 continue
1351
1352 if cur == '"':
1353 quote = True
1354
1355 part += cur
1356
1357 # append last part
1358 if part:
1359 res.append(part)
1360
1361 return [part.strip() for part in res]
1362
1363class FileHandler(BaseHandler):
1364 # Use local file or FTP depending on form of URL
1365 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001366 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001367 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1368 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001369 if not req.host is self.get_names():
1370 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001371 else:
1372 return self.open_local_file(req)
1373
1374 # names for the localhost
1375 names = None
1376 def get_names(self):
1377 if FileHandler.names is None:
1378 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001379 FileHandler.names = tuple(
1380 socket.gethostbyname_ex('localhost')[2] +
1381 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001382 except socket.gaierror:
1383 FileHandler.names = (socket.gethostbyname('localhost'),)
1384 return FileHandler.names
1385
1386 # not entirely sure what the rules are here
1387 def open_local_file(self, req):
1388 import email.utils
1389 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001390 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001391 filename = req.selector
1392 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001393 try:
1394 stats = os.stat(localfile)
1395 size = stats.st_size
1396 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001397 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001398 headers = email.message_from_string(
1399 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1400 (mtype or 'text/plain', size, modified))
1401 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001402 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001403 if not host or \
1404 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001405 if host:
1406 origurl = 'file://' + host + filename
1407 else:
1408 origurl = 'file://' + filename
1409 return addinfourl(open(localfile, 'rb'), headers, origurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001410 except OSError as msg:
Georg Brandl029986a2008-06-23 11:44:14 +00001411 # users shouldn't expect OSErrors coming from urlopen()
Georg Brandl13e89462008-07-01 19:56:00 +00001412 raise URLError(msg)
1413 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001414
1415def _safe_gethostbyname(host):
1416 try:
1417 return socket.gethostbyname(host)
1418 except socket.gaierror:
1419 return None
1420
1421class FTPHandler(BaseHandler):
1422 def ftp_open(self, req):
1423 import ftplib
1424 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001425 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001426 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001427 raise URLError('ftp error: no host given')
1428 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001429 if port is None:
1430 port = ftplib.FTP_PORT
1431 else:
1432 port = int(port)
1433
1434 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001435 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001436 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001437 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001438 else:
1439 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001440 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001441 user = user or ''
1442 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001443
1444 try:
1445 host = socket.gethostbyname(host)
1446 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001447 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001448 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001449 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001450 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001451 dirs, file = dirs[:-1], dirs[-1]
1452 if dirs and not dirs[0]:
1453 dirs = dirs[1:]
1454 try:
1455 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1456 type = file and 'I' or 'D'
1457 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001458 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001459 if attr.lower() == 'type' and \
1460 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1461 type = value.upper()
1462 fp, retrlen = fw.retrfile(file, type)
1463 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001464 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001465 if mtype:
1466 headers += "Content-type: %s\n" % mtype
1467 if retrlen is not None and retrlen >= 0:
1468 headers += "Content-length: %d\n" % retrlen
1469 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001470 return addinfourl(fp, headers, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001471 except ftplib.all_errors as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001472 exc = URLError('ftp error: %s' % msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001473 raise exc.with_traceback(sys.exc_info()[2])
1474
1475 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001476 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1477 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001478
1479class CacheFTPHandler(FTPHandler):
1480 # XXX would be nice to have pluggable cache strategies
1481 # XXX this stuff is definitely not thread safe
1482 def __init__(self):
1483 self.cache = {}
1484 self.timeout = {}
1485 self.soonest = 0
1486 self.delay = 60
1487 self.max_conns = 16
1488
1489 def setTimeout(self, t):
1490 self.delay = t
1491
1492 def setMaxConns(self, m):
1493 self.max_conns = m
1494
1495 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1496 key = user, host, port, '/'.join(dirs), timeout
1497 if key in self.cache:
1498 self.timeout[key] = time.time() + self.delay
1499 else:
1500 self.cache[key] = ftpwrapper(user, passwd, host, port,
1501 dirs, timeout)
1502 self.timeout[key] = time.time() + self.delay
1503 self.check_cache()
1504 return self.cache[key]
1505
1506 def check_cache(self):
1507 # first check for old ones
1508 t = time.time()
1509 if self.soonest <= t:
1510 for k, v in list(self.timeout.items()):
1511 if v < t:
1512 self.cache[k].close()
1513 del self.cache[k]
1514 del self.timeout[k]
1515 self.soonest = min(list(self.timeout.values()))
1516
1517 # then check the size
1518 if len(self.cache) == self.max_conns:
1519 for k, v in list(self.timeout.items()):
1520 if v == self.soonest:
1521 del self.cache[k]
1522 del self.timeout[k]
1523 break
1524 self.soonest = min(list(self.timeout.values()))
1525
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001526 def clear_cache(self):
1527 for conn in self.cache.values():
1528 conn.close()
1529 self.cache.clear()
1530 self.timeout.clear()
1531
1532
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001533# Code move from the old urllib module
1534
1535MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1536
1537# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001538if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001539 from nturl2path import url2pathname, pathname2url
1540else:
1541 def url2pathname(pathname):
1542 """OS-specific conversion from a relative URL of the 'file' scheme
1543 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001544 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001545
1546 def pathname2url(pathname):
1547 """OS-specific conversion from a file system path to a relative URL
1548 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001549 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001550
1551# This really consists of two pieces:
1552# (1) a class which handles opening of all sorts of URLs
1553# (plus assorted utilities etc.)
1554# (2) a set of functions for parsing URLs
1555# XXX Should these be separated out into different modules?
1556
1557
1558ftpcache = {}
1559class URLopener:
1560 """Class to open URLs.
1561 This is a class rather than just a subroutine because we may need
1562 more than one set of global protocol-specific options.
1563 Note -- this is a base class for those who don't want the
1564 automatic handling of errors type 302 (relocated) and 401
1565 (authorization needed)."""
1566
1567 __tempfiles = None
1568
1569 version = "Python-urllib/%s" % __version__
1570
1571 # Constructor
1572 def __init__(self, proxies=None, **x509):
Senthil Kumaran38b968b2012-03-14 13:43:53 -07001573 msg = "%(class)s style of invoking requests is deprecated."\
1574 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1575 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001576 if proxies is None:
1577 proxies = getproxies()
1578 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1579 self.proxies = proxies
1580 self.key_file = x509.get('key_file')
1581 self.cert_file = x509.get('cert_file')
1582 self.addheaders = [('User-Agent', self.version)]
1583 self.__tempfiles = []
1584 self.__unlink = os.unlink # See cleanup()
1585 self.tempcache = None
1586 # Undocumented feature: if you assign {} to tempcache,
1587 # it is used to cache files retrieved with
1588 # self.retrieve(). This is not enabled by default
1589 # since it does not work for changing documents (and I
1590 # haven't got the logic to check expiration headers
1591 # yet).
1592 self.ftpcache = ftpcache
1593 # Undocumented feature: you can use a different
1594 # ftp cache by assigning to the .ftpcache member;
1595 # in case you want logically independent URL openers
1596 # XXX This is not threadsafe. Bah.
1597
1598 def __del__(self):
1599 self.close()
1600
1601 def close(self):
1602 self.cleanup()
1603
1604 def cleanup(self):
1605 # This code sometimes runs when the rest of this module
1606 # has already been deleted, so it can't use any globals
1607 # or import anything.
1608 if self.__tempfiles:
1609 for file in self.__tempfiles:
1610 try:
1611 self.__unlink(file)
1612 except OSError:
1613 pass
1614 del self.__tempfiles[:]
1615 if self.tempcache:
1616 self.tempcache.clear()
1617
1618 def addheader(self, *args):
1619 """Add a header to be used by the HTTP interface only
1620 e.g. u.addheader('Accept', 'sound/basic')"""
1621 self.addheaders.append(args)
1622
1623 # External interface
1624 def open(self, fullurl, data=None):
1625 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001626 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001627 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001628 if self.tempcache and fullurl in self.tempcache:
1629 filename, headers = self.tempcache[fullurl]
1630 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001631 return addinfourl(fp, headers, fullurl)
1632 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001633 if not urltype:
1634 urltype = 'file'
1635 if urltype in self.proxies:
1636 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001637 urltype, proxyhost = splittype(proxy)
1638 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001639 url = (host, fullurl) # Signal special case to open_*()
1640 else:
1641 proxy = None
1642 name = 'open_' + urltype
1643 self.type = urltype
1644 name = name.replace('-', '_')
1645 if not hasattr(self, name):
1646 if proxy:
1647 return self.open_unknown_proxy(proxy, fullurl, data)
1648 else:
1649 return self.open_unknown(fullurl, data)
1650 try:
1651 if data is None:
1652 return getattr(self, name)(url)
1653 else:
1654 return getattr(self, name)(url, data)
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001655 except HTTPError:
1656 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001657 except socket.error as msg:
1658 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1659
1660 def open_unknown(self, fullurl, data=None):
1661 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001662 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001663 raise IOError('url error', 'unknown url type', type)
1664
1665 def open_unknown_proxy(self, proxy, fullurl, data=None):
1666 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001667 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001668 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1669
1670 # External interface
1671 def retrieve(self, url, filename=None, reporthook=None, data=None):
1672 """retrieve(url) returns (filename, headers) for a local object
1673 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001674 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001675 if self.tempcache and url in self.tempcache:
1676 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001677 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001678 if filename is None and (not type or type == 'file'):
1679 try:
1680 fp = self.open_local_file(url1)
1681 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001682 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001683 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001684 except IOError as msg:
1685 pass
1686 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001687 try:
1688 headers = fp.info()
1689 if filename:
1690 tfp = open(filename, 'wb')
1691 else:
1692 import tempfile
1693 garbage, path = splittype(url)
1694 garbage, path = splithost(path or "")
1695 path, garbage = splitquery(path or "")
1696 path, garbage = splitattr(path or "")
1697 suffix = os.path.splitext(path)[1]
1698 (fd, filename) = tempfile.mkstemp(suffix)
1699 self.__tempfiles.append(filename)
1700 tfp = os.fdopen(fd, 'wb')
1701 try:
1702 result = filename, headers
1703 if self.tempcache is not None:
1704 self.tempcache[url] = result
1705 bs = 1024*8
1706 size = -1
1707 read = 0
1708 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001709 if "content-length" in headers:
1710 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001711 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001712 reporthook(blocknum, bs, size)
1713 while 1:
1714 block = fp.read(bs)
1715 if not block:
1716 break
1717 read += len(block)
1718 tfp.write(block)
1719 blocknum += 1
1720 if reporthook:
1721 reporthook(blocknum, bs, size)
1722 finally:
1723 tfp.close()
1724 finally:
1725 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001726
1727 # raise exception if actual size does not match content-length header
1728 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001729 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001730 "retrieval incomplete: got only %i out of %i bytes"
1731 % (read, size), result)
1732
1733 return result
1734
1735 # Each method named open_<type> knows how to open that type of URL
1736
1737 def _open_generic_http(self, connection_factory, url, data):
1738 """Make an HTTP connection using connection_class.
1739
1740 This is an internal method that should be called from
1741 open_http() or open_https().
1742
1743 Arguments:
1744 - connection_factory should take a host name and return an
1745 HTTPConnection instance.
1746 - url is the url to retrieval or a host, relative-path pair.
1747 - data is payload for a POST request or None.
1748 """
1749
1750 user_passwd = None
1751 proxy_passwd= None
1752 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001753 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001754 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001755 user_passwd, host = splituser(host)
1756 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001757 realhost = host
1758 else:
1759 host, selector = url
1760 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001761 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001762 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001763 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001764 url = rest
1765 user_passwd = None
1766 if urltype.lower() != 'http':
1767 realhost = None
1768 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001769 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001770 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001771 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001772 if user_passwd:
1773 selector = "%s://%s%s" % (urltype, realhost, rest)
1774 if proxy_bypass(realhost):
1775 host = realhost
1776
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001777 if not host: raise IOError('http error', 'no host given')
1778
1779 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001780 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001781 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001782 else:
1783 proxy_auth = None
1784
1785 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001786 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001787 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001788 else:
1789 auth = None
1790 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001791 headers = {}
1792 if proxy_auth:
1793 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1794 if auth:
1795 headers["Authorization"] = "Basic %s" % auth
1796 if realhost:
1797 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001798
1799 # Add Connection:close as we don't support persistent connections yet.
1800 # This helps in closing the socket and avoiding ResourceWarning
1801
1802 headers["Connection"] = "close"
1803
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001804 for header, value in self.addheaders:
1805 headers[header] = value
1806
1807 if data is not None:
1808 headers["Content-Type"] = "application/x-www-form-urlencoded"
1809 http_conn.request("POST", selector, data, headers)
1810 else:
1811 http_conn.request("GET", selector, headers=headers)
1812
1813 try:
1814 response = http_conn.getresponse()
1815 except http.client.BadStatusLine:
1816 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001817 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001818
1819 # According to RFC 2616, "2xx" code indicates that the client's
1820 # request was successfully received, understood, and accepted.
1821 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001822 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001823 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001824 else:
1825 return self.http_error(
1826 url, response.fp,
1827 response.status, response.reason, response.msg, data)
1828
1829 def open_http(self, url, data=None):
1830 """Use HTTP protocol."""
1831 return self._open_generic_http(http.client.HTTPConnection, url, data)
1832
1833 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1834 """Handle http errors.
1835
1836 Derived class can override this, or provide specific handlers
1837 named http_error_DDD where DDD is the 3-digit error code."""
1838 # First check if there's a specific handler for this error
1839 name = 'http_error_%d' % errcode
1840 if hasattr(self, name):
1841 method = getattr(self, name)
1842 if data is None:
1843 result = method(url, fp, errcode, errmsg, headers)
1844 else:
1845 result = method(url, fp, errcode, errmsg, headers, data)
1846 if result: return result
1847 return self.http_error_default(url, fp, errcode, errmsg, headers)
1848
1849 def http_error_default(self, url, fp, errcode, errmsg, headers):
1850 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001851 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001852 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001853
1854 if _have_ssl:
1855 def _https_connection(self, host):
1856 return http.client.HTTPSConnection(host,
1857 key_file=self.key_file,
1858 cert_file=self.cert_file)
1859
1860 def open_https(self, url, data=None):
1861 """Use HTTPS protocol."""
1862 return self._open_generic_http(self._https_connection, url, data)
1863
1864 def open_file(self, url):
1865 """Use local file or FTP depending on form of URL."""
1866 if not isinstance(url, str):
1867 raise URLError('file error', 'proxy support for file protocol currently not implemented')
1868 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001869 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001870 else:
1871 return self.open_local_file(url)
1872
1873 def open_local_file(self, url):
1874 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001875 import email.utils
1876 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001877 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001878 localname = url2pathname(file)
1879 try:
1880 stats = os.stat(localname)
1881 except OSError as e:
1882 raise URLError(e.errno, e.strerror, e.filename)
1883 size = stats.st_size
1884 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1885 mtype = mimetypes.guess_type(url)[0]
1886 headers = email.message_from_string(
1887 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1888 (mtype or 'text/plain', size, modified))
1889 if not host:
1890 urlfile = file
1891 if file[:1] == '/':
1892 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001893 return addinfourl(open(localname, 'rb'), headers, urlfile)
1894 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001895 if (not port
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001896 and socket.gethostbyname(host) in (localhost() + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001897 urlfile = file
1898 if file[:1] == '/':
1899 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001900 elif file[:2] == './':
1901 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001902 return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001903 raise URLError('local file error', 'not on local host')
1904
1905 def open_ftp(self, url):
1906 """Use FTP protocol."""
1907 if not isinstance(url, str):
1908 raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
1909 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001910 host, path = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001911 if not host: raise URLError('ftp error', 'no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001912 host, port = splitport(host)
1913 user, host = splituser(host)
1914 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001915 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001916 host = unquote(host)
1917 user = unquote(user or '')
1918 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001919 host = socket.gethostbyname(host)
1920 if not port:
1921 import ftplib
1922 port = ftplib.FTP_PORT
1923 else:
1924 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001925 path, attrs = splitattr(path)
1926 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001927 dirs = path.split('/')
1928 dirs, file = dirs[:-1], dirs[-1]
1929 if dirs and not dirs[0]: dirs = dirs[1:]
1930 if dirs and not dirs[0]: dirs[0] = '/'
1931 key = user, host, port, '/'.join(dirs)
1932 # XXX thread unsafe!
1933 if len(self.ftpcache) > MAXFTPCACHE:
1934 # Prune the cache, rather arbitrarily
1935 for k in self.ftpcache.keys():
1936 if k != key:
1937 v = self.ftpcache[k]
1938 del self.ftpcache[k]
1939 v.close()
1940 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001941 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001942 self.ftpcache[key] = \
1943 ftpwrapper(user, passwd, host, port, dirs)
1944 if not file: type = 'D'
1945 else: type = 'I'
1946 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001947 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001948 if attr.lower() == 'type' and \
1949 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1950 type = value.upper()
1951 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1952 mtype = mimetypes.guess_type("ftp:" + url)[0]
1953 headers = ""
1954 if mtype:
1955 headers += "Content-Type: %s\n" % mtype
1956 if retrlen is not None and retrlen >= 0:
1957 headers += "Content-Length: %d\n" % retrlen
1958 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001959 return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001960 except ftperrors() as msg:
1961 raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
1962
1963 def open_data(self, url, data=None):
1964 """Use "data" URL."""
1965 if not isinstance(url, str):
1966 raise URLError('data error', 'proxy support for data protocol currently not implemented')
1967 # ignore POSTed data
1968 #
1969 # syntax of data URLs:
1970 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1971 # mediatype := [ type "/" subtype ] *( ";" parameter )
1972 # data := *urlchar
1973 # parameter := attribute "=" value
1974 try:
1975 [type, data] = url.split(',', 1)
1976 except ValueError:
1977 raise IOError('data error', 'bad data URL')
1978 if not type:
1979 type = 'text/plain;charset=US-ASCII'
1980 semi = type.rfind(';')
1981 if semi >= 0 and '=' not in type[semi:]:
1982 encoding = type[semi+1:]
1983 type = type[:semi]
1984 else:
1985 encoding = ''
1986 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001987 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001988 time.gmtime(time.time())))
1989 msg.append('Content-type: %s' % type)
1990 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00001991 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001992 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001993 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001994 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001995 msg.append('Content-Length: %d' % len(data))
1996 msg.append('')
1997 msg.append(data)
1998 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00001999 headers = email.message_from_string(msg)
2000 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002001 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002002 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002003
2004
2005class FancyURLopener(URLopener):
2006 """Derived class with handlers for errors we can handle (perhaps)."""
2007
2008 def __init__(self, *args, **kwargs):
2009 URLopener.__init__(self, *args, **kwargs)
2010 self.auth_cache = {}
2011 self.tries = 0
2012 self.maxtries = 10
2013
2014 def http_error_default(self, url, fp, errcode, errmsg, headers):
2015 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002016 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002017
2018 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2019 """Error 302 -- relocated (temporarily)."""
2020 self.tries += 1
2021 if self.maxtries and self.tries >= self.maxtries:
2022 if hasattr(self, "http_error_500"):
2023 meth = self.http_error_500
2024 else:
2025 meth = self.http_error_default
2026 self.tries = 0
2027 return meth(url, fp, 500,
2028 "Internal Server Error: Redirect Recursion", headers)
2029 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2030 data)
2031 self.tries = 0
2032 return result
2033
2034 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2035 if 'location' in headers:
2036 newurl = headers['location']
2037 elif 'uri' in headers:
2038 newurl = headers['uri']
2039 else:
2040 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002041 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002042
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002043 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002044 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002045
2046 urlparts = urlparse(newurl)
2047
2048 # For security reasons, we don't allow redirection to anything other
2049 # than http, https and ftp.
2050
2051 # We are using newer HTTPError with older redirect_internal method
2052 # This older method will get deprecated in 3.3
2053
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002054 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002055 raise HTTPError(newurl, errcode,
2056 errmsg +
2057 " Redirection to url '%s' is not allowed." % newurl,
2058 headers, fp)
2059
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002060 return self.open(newurl)
2061
2062 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2063 """Error 301 -- also relocated (permanently)."""
2064 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2065
2066 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2067 """Error 303 -- also relocated (essentially identical to 302)."""
2068 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2069
2070 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2071 """Error 307 -- relocated, but turn POST into error."""
2072 if data is None:
2073 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2074 else:
2075 return self.http_error_default(url, fp, errcode, errmsg, headers)
2076
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002077 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2078 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002079 """Error 401 -- authentication required.
2080 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002081 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002082 URLopener.http_error_default(self, url, fp,
2083 errcode, errmsg, headers)
2084 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002085 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2086 if not match:
2087 URLopener.http_error_default(self, url, fp,
2088 errcode, errmsg, headers)
2089 scheme, realm = match.groups()
2090 if scheme.lower() != 'basic':
2091 URLopener.http_error_default(self, url, fp,
2092 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002093 if not retry:
2094 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2095 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002096 name = 'retry_' + self.type + '_basic_auth'
2097 if data is None:
2098 return getattr(self,name)(url, realm)
2099 else:
2100 return getattr(self,name)(url, realm, data)
2101
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002102 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2103 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002104 """Error 407 -- proxy authentication required.
2105 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002106 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002107 URLopener.http_error_default(self, url, fp,
2108 errcode, errmsg, headers)
2109 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002110 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2111 if not match:
2112 URLopener.http_error_default(self, url, fp,
2113 errcode, errmsg, headers)
2114 scheme, realm = match.groups()
2115 if scheme.lower() != 'basic':
2116 URLopener.http_error_default(self, url, fp,
2117 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002118 if not retry:
2119 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2120 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002121 name = 'retry_proxy_' + self.type + '_basic_auth'
2122 if data is None:
2123 return getattr(self,name)(url, realm)
2124 else:
2125 return getattr(self,name)(url, realm, data)
2126
2127 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002128 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002129 newurl = 'http://' + host + selector
2130 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002131 urltype, proxyhost = splittype(proxy)
2132 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002133 i = proxyhost.find('@') + 1
2134 proxyhost = proxyhost[i:]
2135 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2136 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002137 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002138 quote(passwd, safe=''), proxyhost)
2139 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2140 if data is None:
2141 return self.open(newurl)
2142 else:
2143 return self.open(newurl, data)
2144
2145 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002146 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002147 newurl = 'https://' + host + selector
2148 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002149 urltype, proxyhost = splittype(proxy)
2150 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002151 i = proxyhost.find('@') + 1
2152 proxyhost = proxyhost[i:]
2153 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2154 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002155 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002156 quote(passwd, safe=''), proxyhost)
2157 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2158 if data is None:
2159 return self.open(newurl)
2160 else:
2161 return self.open(newurl, data)
2162
2163 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002164 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002165 i = host.find('@') + 1
2166 host = host[i:]
2167 user, passwd = self.get_user_passwd(host, realm, i)
2168 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002169 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002170 quote(passwd, safe=''), host)
2171 newurl = 'http://' + host + selector
2172 if data is None:
2173 return self.open(newurl)
2174 else:
2175 return self.open(newurl, data)
2176
2177 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002178 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002179 i = host.find('@') + 1
2180 host = host[i:]
2181 user, passwd = self.get_user_passwd(host, realm, i)
2182 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002183 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002184 quote(passwd, safe=''), host)
2185 newurl = 'https://' + host + selector
2186 if data is None:
2187 return self.open(newurl)
2188 else:
2189 return self.open(newurl, data)
2190
Florent Xicluna757445b2010-05-17 17:24:07 +00002191 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002192 key = realm + '@' + host.lower()
2193 if key in self.auth_cache:
2194 if clear_cache:
2195 del self.auth_cache[key]
2196 else:
2197 return self.auth_cache[key]
2198 user, passwd = self.prompt_user_passwd(host, realm)
2199 if user or passwd: self.auth_cache[key] = (user, passwd)
2200 return user, passwd
2201
2202 def prompt_user_passwd(self, host, realm):
2203 """Override this in a GUI environment!"""
2204 import getpass
2205 try:
2206 user = input("Enter username for %s at %s: " % (realm, host))
2207 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2208 (user, realm, host))
2209 return user, passwd
2210 except KeyboardInterrupt:
2211 print()
2212 return None, None
2213
2214
2215# Utility functions
2216
2217_localhost = None
2218def localhost():
2219 """Return the IP address of the magic hostname 'localhost'."""
2220 global _localhost
2221 if _localhost is None:
2222 _localhost = socket.gethostbyname('localhost')
2223 return _localhost
2224
2225_thishost = None
2226def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002227 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002228 global _thishost
2229 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002230 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002231 return _thishost
2232
2233_ftperrors = None
2234def ftperrors():
2235 """Return the set of errors raised by the FTP class."""
2236 global _ftperrors
2237 if _ftperrors is None:
2238 import ftplib
2239 _ftperrors = ftplib.all_errors
2240 return _ftperrors
2241
2242_noheaders = None
2243def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002244 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002245 global _noheaders
2246 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002247 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002248 return _noheaders
2249
2250
2251# Utility classes
2252
2253class ftpwrapper:
2254 """Class used by open_ftp() for cache of open FTP connections."""
2255
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002256 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2257 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002258 self.user = user
2259 self.passwd = passwd
2260 self.host = host
2261 self.port = port
2262 self.dirs = dirs
2263 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002264 self.refcount = 0
2265 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002266 self.init()
2267
2268 def init(self):
2269 import ftplib
2270 self.busy = 0
2271 self.ftp = ftplib.FTP()
2272 self.ftp.connect(self.host, self.port, self.timeout)
2273 self.ftp.login(self.user, self.passwd)
2274 for dir in self.dirs:
2275 self.ftp.cwd(dir)
2276
2277 def retrfile(self, file, type):
2278 import ftplib
2279 self.endtransfer()
2280 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2281 else: cmd = 'TYPE ' + type; isdir = 0
2282 try:
2283 self.ftp.voidcmd(cmd)
2284 except ftplib.all_errors:
2285 self.init()
2286 self.ftp.voidcmd(cmd)
2287 conn = None
2288 if file and not isdir:
2289 # Try to retrieve as a file
2290 try:
2291 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002292 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002293 except ftplib.error_perm as reason:
2294 if str(reason)[:3] != '550':
Georg Brandl13e89462008-07-01 19:56:00 +00002295 raise URLError('ftp error', reason).with_traceback(
2296 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002297 if not conn:
2298 # Set transfer mode to ASCII!
2299 self.ftp.voidcmd('TYPE A')
2300 # Try a directory listing. Verify that directory exists.
2301 if file:
2302 pwd = self.ftp.pwd()
2303 try:
2304 try:
2305 self.ftp.cwd(file)
2306 except ftplib.error_perm as reason:
Georg Brandl13e89462008-07-01 19:56:00 +00002307 raise URLError('ftp error', reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002308 finally:
2309 self.ftp.cwd(pwd)
2310 cmd = 'LIST ' + file
2311 else:
2312 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002313 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002314 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002315
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002316 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2317 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002318 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002319 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002320 return (ftpobj, retrlen)
2321
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002322 def endtransfer(self):
2323 if not self.busy:
2324 return
2325 self.busy = 0
2326 try:
2327 self.ftp.voidresp()
2328 except ftperrors():
2329 pass
2330
2331 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002332 self.keepalive = False
2333 if self.refcount <= 0:
2334 self.real_close()
2335
2336 def file_close(self):
2337 self.endtransfer()
2338 self.refcount -= 1
2339 if self.refcount <= 0 and not self.keepalive:
2340 self.real_close()
2341
2342 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002343 self.endtransfer()
2344 try:
2345 self.ftp.close()
2346 except ftperrors():
2347 pass
2348
2349# Proxy handling
2350def getproxies_environment():
2351 """Return a dictionary of scheme -> proxy server URL mappings.
2352
2353 Scan the environment for variables named <scheme>_proxy;
2354 this seems to be the standard convention. If you need a
2355 different way, you can pass a proxies dictionary to the
2356 [Fancy]URLopener constructor.
2357
2358 """
2359 proxies = {}
2360 for name, value in os.environ.items():
2361 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002362 if value and name[-6:] == '_proxy':
2363 proxies[name[:-6]] = value
2364 return proxies
2365
2366def proxy_bypass_environment(host):
2367 """Test if proxies should not be used for a particular host.
2368
2369 Checks the environment for a variable named no_proxy, which should
2370 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2371 """
2372 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2373 # '*' is special case for always bypass
2374 if no_proxy == '*':
2375 return 1
2376 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002377 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002378 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002379 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2380 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002381 if name and (hostonly.endswith(name) or host.endswith(name)):
2382 return 1
2383 # otherwise, don't bypass
2384 return 0
2385
2386
Ronald Oussorene72e1612011-03-14 18:15:25 -04002387# This code tests an OSX specific data structure but is testable on all
2388# platforms
2389def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2390 """
2391 Return True iff this host shouldn't be accessed using a proxy
2392
2393 This function uses the MacOSX framework SystemConfiguration
2394 to fetch the proxy information.
2395
2396 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2397 { 'exclude_simple': bool,
2398 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2399 }
2400 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002401 from fnmatch import fnmatch
2402
2403 hostonly, port = splitport(host)
2404
2405 def ip2num(ipAddr):
2406 parts = ipAddr.split('.')
2407 parts = list(map(int, parts))
2408 if len(parts) != 4:
2409 parts = (parts + [0, 0, 0, 0])[:4]
2410 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2411
2412 # Check for simple host names:
2413 if '.' not in host:
2414 if proxy_settings['exclude_simple']:
2415 return True
2416
2417 hostIP = None
2418
2419 for value in proxy_settings.get('exceptions', ()):
2420 # Items in the list are strings like these: *.local, 169.254/16
2421 if not value: continue
2422
2423 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2424 if m is not None:
2425 if hostIP is None:
2426 try:
2427 hostIP = socket.gethostbyname(hostonly)
2428 hostIP = ip2num(hostIP)
2429 except socket.error:
2430 continue
2431
2432 base = ip2num(m.group(1))
2433 mask = m.group(2)
2434 if mask is None:
2435 mask = 8 * (m.group(1).count('.') + 1)
2436 else:
2437 mask = int(mask[1:])
2438 mask = 32 - mask
2439
2440 if (hostIP >> mask) == (base >> mask):
2441 return True
2442
2443 elif fnmatch(host, value):
2444 return True
2445
2446 return False
2447
2448
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002449if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002450 from _scproxy import _get_proxy_settings, _get_proxies
2451
2452 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002453 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002454 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002455
2456 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002457 """Return a dictionary of scheme -> proxy server URL mappings.
2458
Ronald Oussoren84151202010-04-18 20:46:11 +00002459 This function uses the MacOSX framework SystemConfiguration
2460 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002461 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002462 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463
Ronald Oussoren84151202010-04-18 20:46:11 +00002464
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002465
2466 def proxy_bypass(host):
2467 if getproxies_environment():
2468 return proxy_bypass_environment(host)
2469 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002470 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002471
2472 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002473 return getproxies_environment() or getproxies_macosx_sysconf()
2474
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002475
2476elif os.name == 'nt':
2477 def getproxies_registry():
2478 """Return a dictionary of scheme -> proxy server URL mappings.
2479
2480 Win32 uses the registry to store proxies.
2481
2482 """
2483 proxies = {}
2484 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002485 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002486 except ImportError:
2487 # Std module, so should be around - but you never know!
2488 return proxies
2489 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002490 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002491 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002492 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002493 'ProxyEnable')[0]
2494 if proxyEnable:
2495 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002496 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002497 'ProxyServer')[0])
2498 if '=' in proxyServer:
2499 # Per-protocol settings
2500 for p in proxyServer.split(';'):
2501 protocol, address = p.split('=', 1)
2502 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002503 if not re.match('^([^/:]+)://', address):
2504 address = '%s://%s' % (protocol, address)
2505 proxies[protocol] = address
2506 else:
2507 # Use one setting for all protocols
2508 if proxyServer[:5] == 'http:':
2509 proxies['http'] = proxyServer
2510 else:
2511 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002512 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002513 proxies['ftp'] = 'ftp://%s' % proxyServer
2514 internetSettings.Close()
2515 except (WindowsError, ValueError, TypeError):
2516 # Either registry key not found etc, or the value in an
2517 # unexpected format.
2518 # proxies already set up to be empty so nothing to do
2519 pass
2520 return proxies
2521
2522 def getproxies():
2523 """Return a dictionary of scheme -> proxy server URL mappings.
2524
2525 Returns settings gathered from the environment, if specified,
2526 or the registry.
2527
2528 """
2529 return getproxies_environment() or getproxies_registry()
2530
2531 def proxy_bypass_registry(host):
2532 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002533 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002534 except ImportError:
2535 # Std modules, so should be around - but you never know!
2536 return 0
2537 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002538 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002539 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002540 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002541 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002542 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002543 'ProxyOverride')[0])
2544 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2545 except WindowsError:
2546 return 0
2547 if not proxyEnable or not proxyOverride:
2548 return 0
2549 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002550 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002551 host = [rawHost]
2552 try:
2553 addr = socket.gethostbyname(rawHost)
2554 if addr != rawHost:
2555 host.append(addr)
2556 except socket.error:
2557 pass
2558 try:
2559 fqdn = socket.getfqdn(rawHost)
2560 if fqdn != rawHost:
2561 host.append(fqdn)
2562 except socket.error:
2563 pass
2564 # make a check value list from the registry entry: replace the
2565 # '<local>' string by the localhost entry and the corresponding
2566 # canonical entry.
2567 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002568 # now check if we match one of the registry values.
2569 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002570 if test == '<local>':
2571 if '.' not in rawHost:
2572 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002573 test = test.replace(".", r"\.") # mask dots
2574 test = test.replace("*", r".*") # change glob sequence
2575 test = test.replace("?", r".") # change glob char
2576 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002577 if re.match(test, val, re.I):
2578 return 1
2579 return 0
2580
2581 def proxy_bypass(host):
2582 """Return a dictionary of scheme -> proxy server URL mappings.
2583
2584 Returns settings gathered from the environment, if specified,
2585 or the registry.
2586
2587 """
2588 if getproxies_environment():
2589 return proxy_bypass_environment(host)
2590 else:
2591 return proxy_bypass_registry(host)
2592
2593else:
2594 # By default use environment variables
2595 getproxies = getproxies_environment
2596 proxy_bypass = proxy_bypass_environment