blob: b2a77ebf2fbdb64ebadae7b3edd9077dff1f8782 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Senthil Kumarand95cc752010-08-08 11:27:53 +0000106 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000107from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108
109# check for SSL
110try:
111 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000112except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 _have_ssl = False
114else:
115 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800117__all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
123 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
124 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
125 'UnknownHandler', 'HTTPErrorProcessor',
126 # Functions
127 'urlopen', 'install_opener', 'build_opener',
128 'pathname2url', 'url2pathname', 'getproxies',
129 # Legacy interface
130 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
131]
132
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133# used in User-Agent header sent
134__version__ = sys.version[:3]
135
136_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000137def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200138 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000139 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200140 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000141 if not _have_ssl:
142 raise ValueError('SSL support not available')
143 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
144 context.options |= ssl.OP_NO_SSLv2
Senthil Kumaran4a2ab122013-04-04 19:34:02 -0700145 context.verify_mode = ssl.CERT_REQUIRED
146 if cafile or capath:
147 context.load_verify_locations(cafile, capath)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000148 else:
Senthil Kumaran4a2ab122013-04-04 19:34:02 -0700149 context.set_default_verify_paths()
150 https_handler = HTTPSHandler(context=context, check_hostname=True)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000151 opener = build_opener(https_handler)
152 elif _opener is None:
153 _opener = opener = build_opener()
154 else:
155 opener = _opener
156 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000157
158def install_opener(opener):
159 global _opener
160 _opener = opener
161
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700162_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000163def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700164 """
165 Retrieve a URL into a temporary location on disk.
166
167 Requires a URL argument. If a filename is passed, it is used as
168 the temporary file location. The reporthook argument should be
169 a callable that accepts a block number, a read size, and the
170 total file size of the URL target. The data argument should be
171 valid URL encoded data.
172
173 If a filename is passed and the URL points to a local resource,
174 the result is a copy from local file to new file.
175
176 Returns a tuple containing the path to the newly created
177 data file as well as the resulting HTTPMessage object.
178 """
179 url_type, path = splittype(url)
180
181 with contextlib.closing(urlopen(url, data)) as fp:
182 headers = fp.info()
183
184 # Just return the local path and the "headers" for file://
185 # URLs. No sense in performing a copy unless requested.
186 if url_type == "file" and not filename:
187 return os.path.normpath(path), headers
188
189 # Handle temporary file setup.
190 if filename:
191 tfp = open(filename, 'wb')
192 else:
193 tfp = tempfile.NamedTemporaryFile(delete=False)
194 filename = tfp.name
195 _url_tempfiles.append(filename)
196
197 with tfp:
198 result = filename, headers
199 bs = 1024*8
200 size = -1
201 read = 0
202 blocknum = 0
203 if "content-length" in headers:
204 size = int(headers["Content-Length"])
205
206 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800207 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700208
209 while True:
210 block = fp.read(bs)
211 if not block:
212 break
213 read += len(block)
214 tfp.write(block)
215 blocknum += 1
216 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800217 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700218
219 if size >= 0 and read < size:
220 raise ContentTooShortError(
221 "retrieval incomplete: got only %i out of %i bytes"
222 % (read, size), result)
223
224 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000225
226def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700227 for temp_file in _url_tempfiles:
228 try:
229 os.unlink(temp_file)
230 except EnvironmentError:
231 pass
232
233 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234 global _opener
235 if _opener:
236 _opener = None
237
238# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000239_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000240def request_host(request):
241 """Return request-host, as defined by RFC 2965.
242
243 Variation from RFC: returned value is lowercased, for convenient
244 comparison.
245
246 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000247 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000248 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000249 if host == "":
250 host = request.get_header("Host", "")
251
252 # remove port, if present
253 host = _cut_port_re.sub("", host, 1)
254 return host.lower()
255
256class Request:
257
258 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800259 origin_req_host=None, unverifiable=False,
260 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000261 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaran45ce4dc2012-07-08 02:08:48 -0700262 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800263 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000264 self.data = data
265 self.headers = {}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000266 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000267 for key, value in headers.items():
268 self.add_header(key, value)
269 self.unredirected_hdrs = {}
270 if origin_req_host is None:
271 origin_req_host = request_host(self)
272 self.origin_req_host = origin_req_host
273 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800274 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000275 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000276
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000277 def _parse(self):
278 self.type, rest = splittype(self.full_url)
279 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400280 raise ValueError("unknown url type: %r" % self.full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000281 self.host, self.selector = splithost(rest)
282 if self.host:
283 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000284
285 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800286 """Return a string indicating the HTTP request method."""
287 if self.method is not None:
288 return self.method
289 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000290 return "POST"
291 else:
292 return "GET"
293
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800295 if self.fragment:
296 return '%s#%s' % (self.full_url, self.fragment)
297 else:
298 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000299
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700300 # Begin deprecated methods
301
302 def add_data(self, data):
303 msg = "Request.add_data method is deprecated."
304 warnings.warn(msg, DeprecationWarning, stacklevel=1)
305 self.data = data
306
307 def has_data(self):
308 msg = "Request.has_data method is deprecated."
309 warnings.warn(msg, DeprecationWarning, stacklevel=1)
310 return self.data is not None
311
312 def get_data(self):
313 msg = "Request.get_data method is deprecated."
314 warnings.warn(msg, DeprecationWarning, stacklevel=1)
315 return self.data
316
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000317 def get_type(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700318 msg = "Request.get_type method is deprecated."
319 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000320 return self.type
321
322 def get_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700323 msg = "Request.get_host method is deprecated."
324 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000325 return self.host
326
327 def get_selector(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700328 msg = "Request.get_selector method is deprecated."
329 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000330 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000331
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000332 def is_unverifiable(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700333 msg = "Request.is_unverifiable method is deprecated."
334 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000335 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000336
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000337 def get_origin_req_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700338 msg = "Request.get_origin_req_host method is deprecated."
339 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000340 return self.origin_req_host
341
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000342 # End deprecated methods
343
344 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000345 if self.type == 'https' and not self._tunnel_host:
346 self._tunnel_host = self.host
347 else:
348 self.type= type
349 self.selector = self.full_url
350 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000351
352 def has_proxy(self):
353 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000354
355 def add_header(self, key, val):
356 # useful for something like authentication
357 self.headers[key.capitalize()] = val
358
359 def add_unredirected_header(self, key, val):
360 # will not be added to a redirected request
361 self.unredirected_hdrs[key.capitalize()] = val
362
363 def has_header(self, header_name):
364 return (header_name in self.headers or
365 header_name in self.unredirected_hdrs)
366
367 def get_header(self, header_name, default=None):
368 return self.headers.get(
369 header_name,
370 self.unredirected_hdrs.get(header_name, default))
371
372 def header_items(self):
373 hdrs = self.unredirected_hdrs.copy()
374 hdrs.update(self.headers)
375 return list(hdrs.items())
376
377class OpenerDirector:
378 def __init__(self):
379 client_version = "Python-urllib/%s" % __version__
380 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000381 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000382 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000383 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000384 self.handle_open = {}
385 self.handle_error = {}
386 self.process_response = {}
387 self.process_request = {}
388
389 def add_handler(self, handler):
390 if not hasattr(handler, "add_parent"):
391 raise TypeError("expected BaseHandler instance, got %r" %
392 type(handler))
393
394 added = False
395 for meth in dir(handler):
396 if meth in ["redirect_request", "do_open", "proxy_open"]:
397 # oops, coincidental match
398 continue
399
400 i = meth.find("_")
401 protocol = meth[:i]
402 condition = meth[i+1:]
403
404 if condition.startswith("error"):
405 j = condition.find("_") + i + 1
406 kind = meth[j+1:]
407 try:
408 kind = int(kind)
409 except ValueError:
410 pass
411 lookup = self.handle_error.get(protocol, {})
412 self.handle_error[protocol] = lookup
413 elif condition == "open":
414 kind = protocol
415 lookup = self.handle_open
416 elif condition == "response":
417 kind = protocol
418 lookup = self.process_response
419 elif condition == "request":
420 kind = protocol
421 lookup = self.process_request
422 else:
423 continue
424
425 handlers = lookup.setdefault(kind, [])
426 if handlers:
427 bisect.insort(handlers, handler)
428 else:
429 handlers.append(handler)
430 added = True
431
432 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000433 bisect.insort(self.handlers, handler)
434 handler.add_parent(self)
435
436 def close(self):
437 # Only exists for backwards compatibility.
438 pass
439
440 def _call_chain(self, chain, kind, meth_name, *args):
441 # Handlers raise an exception if no one else should try to handle
442 # the request, or return None if they can't but another handler
443 # could. Otherwise, they return the response.
444 handlers = chain.get(kind, ())
445 for handler in handlers:
446 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000447 result = func(*args)
448 if result is not None:
449 return result
450
451 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
452 # accept a URL or a Request object
453 if isinstance(fullurl, str):
454 req = Request(fullurl, data)
455 else:
456 req = fullurl
457 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000458 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000459
460 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000461 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000462
463 # pre-process request
464 meth_name = protocol+"_request"
465 for processor in self.process_request.get(protocol, []):
466 meth = getattr(processor, meth_name)
467 req = meth(req)
468
469 response = self._open(req, data)
470
471 # post-process response
472 meth_name = protocol+"_response"
473 for processor in self.process_response.get(protocol, []):
474 meth = getattr(processor, meth_name)
475 response = meth(req, response)
476
477 return response
478
479 def _open(self, req, data=None):
480 result = self._call_chain(self.handle_open, 'default',
481 'default_open', req)
482 if result:
483 return result
484
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000485 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 result = self._call_chain(self.handle_open, protocol, protocol +
487 '_open', req)
488 if result:
489 return result
490
491 return self._call_chain(self.handle_open, 'unknown',
492 'unknown_open', req)
493
494 def error(self, proto, *args):
495 if proto in ('http', 'https'):
496 # XXX http[s] protocols are special-cased
497 dict = self.handle_error['http'] # https is not different than http
498 proto = args[2] # YUCK!
499 meth_name = 'http_error_%s' % proto
500 http_err = 1
501 orig_args = args
502 else:
503 dict = self.handle_error
504 meth_name = proto + '_error'
505 http_err = 0
506 args = (dict, proto, meth_name) + args
507 result = self._call_chain(*args)
508 if result:
509 return result
510
511 if http_err:
512 args = (dict, 'default', 'http_error_default') + orig_args
513 return self._call_chain(*args)
514
515# XXX probably also want an abstract factory that knows when it makes
516# sense to skip a superclass in favor of a subclass and when it might
517# make sense to include both
518
519def build_opener(*handlers):
520 """Create an opener object from a list of handlers.
521
522 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000523 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000524
525 If any of the handlers passed as arguments are subclasses of the
526 default handlers, the default handlers will not be used.
527 """
528 def isclass(obj):
529 return isinstance(obj, type) or hasattr(obj, "__bases__")
530
531 opener = OpenerDirector()
532 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
533 HTTPDefaultErrorHandler, HTTPRedirectHandler,
534 FTPHandler, FileHandler, HTTPErrorProcessor]
535 if hasattr(http.client, "HTTPSConnection"):
536 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000537 skip = set()
538 for klass in default_classes:
539 for check in handlers:
540 if isclass(check):
541 if issubclass(check, klass):
542 skip.add(klass)
543 elif isinstance(check, klass):
544 skip.add(klass)
545 for klass in skip:
546 default_classes.remove(klass)
547
548 for klass in default_classes:
549 opener.add_handler(klass())
550
551 for h in handlers:
552 if isclass(h):
553 h = h()
554 opener.add_handler(h)
555 return opener
556
557class BaseHandler:
558 handler_order = 500
559
560 def add_parent(self, parent):
561 self.parent = parent
562
563 def close(self):
564 # Only exists for backwards compatibility
565 pass
566
567 def __lt__(self, other):
568 if not hasattr(other, "handler_order"):
569 # Try to preserve the old behavior of having custom classes
570 # inserted after default ones (works only for custom user
571 # classes which are not aware of handler_order).
572 return True
573 return self.handler_order < other.handler_order
574
575
576class HTTPErrorProcessor(BaseHandler):
577 """Process HTTP error responses."""
578 handler_order = 1000 # after all other processing
579
580 def http_response(self, request, response):
581 code, msg, hdrs = response.code, response.msg, response.info()
582
583 # According to RFC 2616, "2xx" code indicates that the client's
584 # request was successfully received, understood, and accepted.
585 if not (200 <= code < 300):
586 response = self.parent.error(
587 'http', request, response, code, msg, hdrs)
588
589 return response
590
591 https_response = http_response
592
593class HTTPDefaultErrorHandler(BaseHandler):
594 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000595 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000596
597class HTTPRedirectHandler(BaseHandler):
598 # maximum number of redirections to any single URL
599 # this is needed because of the state that cookies introduce
600 max_repeats = 4
601 # maximum total number of redirections (regardless of URL) before
602 # assuming we're in a loop
603 max_redirections = 10
604
605 def redirect_request(self, req, fp, code, msg, headers, newurl):
606 """Return a Request or None in response to a redirect.
607
608 This is called by the http_error_30x methods when a
609 redirection response is received. If a redirection should
610 take place, return a new Request to allow http_error_30x to
611 perform the redirect. Otherwise, raise HTTPError if no-one
612 else should try to handle this url. Return None if you can't
613 but another Handler might.
614 """
615 m = req.get_method()
616 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
617 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000618 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000619
620 # Strictly (according to RFC 2616), 301 or 302 in response to
621 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000622 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000623 # essentially all clients do redirect in this case, so we do
624 # the same.
625 # be conciliant with URIs containing a space
626 newurl = newurl.replace(' ', '%20')
627 CONTENT_HEADERS = ("content-length", "content-type")
628 newheaders = dict((k, v) for k, v in req.headers.items()
629 if k.lower() not in CONTENT_HEADERS)
630 return Request(newurl,
631 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000632 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000633 unverifiable=True)
634
635 # Implementation note: To avoid the server sending us into an
636 # infinite loop, the request object needs to track what URLs we
637 # have already seen. Do this by adding a handler-specific
638 # attribute to the Request object.
639 def http_error_302(self, req, fp, code, msg, headers):
640 # Some servers (incorrectly) return multiple Location headers
641 # (so probably same goes for URI). Use first header.
642 if "location" in headers:
643 newurl = headers["location"]
644 elif "uri" in headers:
645 newurl = headers["uri"]
646 else:
647 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000648
649 # fix a possible malformed URL
650 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700651
652 # For security reasons we don't allow redirection to anything other
653 # than http, https or ftp.
654
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800655 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800656 raise HTTPError(
657 newurl, code,
658 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
659 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700660
Facundo Batistaf24802c2008-08-17 03:36:03 +0000661 if not urlparts.path:
662 urlparts = list(urlparts)
663 urlparts[2] = "/"
664 newurl = urlunparse(urlparts)
665
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000666 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000667
668 # XXX Probably want to forget about the state of the current
669 # request, although that might interact poorly with other
670 # handlers that also use handler-specific request attributes
671 new = self.redirect_request(req, fp, code, msg, headers, newurl)
672 if new is None:
673 return
674
675 # loop detection
676 # .redirect_dict has a key url if url was previously visited.
677 if hasattr(req, 'redirect_dict'):
678 visited = new.redirect_dict = req.redirect_dict
679 if (visited.get(newurl, 0) >= self.max_repeats or
680 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000681 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000682 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 else:
684 visited = new.redirect_dict = req.redirect_dict = {}
685 visited[newurl] = visited.get(newurl, 0) + 1
686
687 # Don't close the fp until we are sure that we won't use it
688 # with HTTPError.
689 fp.read()
690 fp.close()
691
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000692 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000693
694 http_error_301 = http_error_303 = http_error_307 = http_error_302
695
696 inf_msg = "The HTTP server returned a redirect error that would " \
697 "lead to an infinite loop.\n" \
698 "The last 30x error message was:\n"
699
700
701def _parse_proxy(proxy):
702 """Return (scheme, user, password, host/port) given a URL or an authority.
703
704 If a URL is supplied, it must have an authority (host:port) component.
705 According to RFC 3986, having an authority component means the URL must
706 have two slashes after the scheme:
707
708 >>> _parse_proxy('file:/ftp.example.com/')
709 Traceback (most recent call last):
710 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
711
712 The first three items of the returned tuple may be None.
713
714 Examples of authority parsing:
715
716 >>> _parse_proxy('proxy.example.com')
717 (None, None, None, 'proxy.example.com')
718 >>> _parse_proxy('proxy.example.com:3128')
719 (None, None, None, 'proxy.example.com:3128')
720
721 The authority component may optionally include userinfo (assumed to be
722 username:password):
723
724 >>> _parse_proxy('joe:password@proxy.example.com')
725 (None, 'joe', 'password', 'proxy.example.com')
726 >>> _parse_proxy('joe:password@proxy.example.com:3128')
727 (None, 'joe', 'password', 'proxy.example.com:3128')
728
729 Same examples, but with URLs instead:
730
731 >>> _parse_proxy('http://proxy.example.com/')
732 ('http', None, None, 'proxy.example.com')
733 >>> _parse_proxy('http://proxy.example.com:3128/')
734 ('http', None, None, 'proxy.example.com:3128')
735 >>> _parse_proxy('http://joe:password@proxy.example.com/')
736 ('http', 'joe', 'password', 'proxy.example.com')
737 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
738 ('http', 'joe', 'password', 'proxy.example.com:3128')
739
740 Everything after the authority is ignored:
741
742 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
743 ('ftp', 'joe', 'password', 'proxy.example.com')
744
745 Test for no trailing '/' case:
746
747 >>> _parse_proxy('http://joe:password@proxy.example.com')
748 ('http', 'joe', 'password', 'proxy.example.com')
749
750 """
Georg Brandl13e89462008-07-01 19:56:00 +0000751 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000752 if not r_scheme.startswith("/"):
753 # authority
754 scheme = None
755 authority = proxy
756 else:
757 # URL
758 if not r_scheme.startswith("//"):
759 raise ValueError("proxy URL with no authority: %r" % proxy)
760 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
761 # and 3.3.), path is empty or starts with '/'
762 end = r_scheme.find("/", 2)
763 if end == -1:
764 end = None
765 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000766 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000767 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000768 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000769 else:
770 user = password = None
771 return scheme, user, password, hostport
772
773class ProxyHandler(BaseHandler):
774 # Proxies must be in front
775 handler_order = 100
776
777 def __init__(self, proxies=None):
778 if proxies is None:
779 proxies = getproxies()
780 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
781 self.proxies = proxies
782 for type, url in proxies.items():
783 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200784 lambda r, proxy=url, type=type, meth=self.proxy_open:
785 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000786
787 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000788 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000789 proxy_type, user, password, hostport = _parse_proxy(proxy)
790 if proxy_type is None:
791 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000792
793 if req.host and proxy_bypass(req.host):
794 return None
795
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000796 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000797 user_pass = '%s:%s' % (unquote(user),
798 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000799 creds = base64.b64encode(user_pass.encode()).decode("ascii")
800 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000801 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000803 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 # let other handlers take care of it
805 return None
806 else:
807 # need to start over, because the other handlers don't
808 # grok the proxy's URL type
809 # e.g. if we have a constructor arg proxies like so:
810 # {'http': 'ftp://proxy.example.com'}, we may end up turning
811 # a request for http://acme.example.com/a into one for
812 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000813 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000814
815class HTTPPasswordMgr:
816
817 def __init__(self):
818 self.passwd = {}
819
820 def add_password(self, realm, uri, user, passwd):
821 # uri could be a single URI or a sequence
822 if isinstance(uri, str):
823 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800824 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 self.passwd[realm] = {}
826 for default_port in True, False:
827 reduced_uri = tuple(
828 [self.reduce_uri(u, default_port) for u in uri])
829 self.passwd[realm][reduced_uri] = (user, passwd)
830
831 def find_user_password(self, realm, authuri):
832 domains = self.passwd.get(realm, {})
833 for default_port in True, False:
834 reduced_authuri = self.reduce_uri(authuri, default_port)
835 for uris, authinfo in domains.items():
836 for uri in uris:
837 if self.is_suburi(uri, reduced_authuri):
838 return authinfo
839 return None, None
840
841 def reduce_uri(self, uri, default_port=True):
842 """Accept authority or URI and extract only the authority and path."""
843 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000844 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000845 if parts[1]:
846 # URI
847 scheme = parts[0]
848 authority = parts[1]
849 path = parts[2] or '/'
850 else:
851 # host or host:port
852 scheme = None
853 authority = uri
854 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000855 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000856 if default_port and port is None and scheme is not None:
857 dport = {"http": 80,
858 "https": 443,
859 }.get(scheme)
860 if dport is not None:
861 authority = "%s:%d" % (host, dport)
862 return authority, path
863
864 def is_suburi(self, base, test):
865 """Check if test is below base in a URI tree
866
867 Both args must be URIs in reduced form.
868 """
869 if base == test:
870 return True
871 if base[0] != test[0]:
872 return False
873 common = posixpath.commonprefix((base[1], test[1]))
874 if len(common) == len(base[1]):
875 return True
876 return False
877
878
879class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
880
881 def find_user_password(self, realm, authuri):
882 user, password = HTTPPasswordMgr.find_user_password(self, realm,
883 authuri)
884 if user is not None:
885 return user, password
886 return HTTPPasswordMgr.find_user_password(self, None, authuri)
887
888
889class AbstractBasicAuthHandler:
890
891 # XXX this allows for multiple auth-schemes, but will stupidly pick
892 # the last one with a realm specified.
893
894 # allow for double- and single-quoted realm values
895 # (single quotes are a violation of the RFC, but appear in the wild)
896 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800897 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000898
899 # XXX could pre-emptively send auth info already accepted (RFC 2617,
900 # end of section 2, and section 1.2 immediately after "credentials"
901 # production).
902
903 def __init__(self, password_mgr=None):
904 if password_mgr is None:
905 password_mgr = HTTPPasswordMgr()
906 self.passwd = password_mgr
907 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000908 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000909
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000910 def reset_retry_count(self):
911 self.retried = 0
912
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000913 def http_error_auth_reqed(self, authreq, host, req, headers):
914 # host may be an authority (without userinfo) or a URL with an
915 # authority
916 # XXX could be multiple headers
917 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000918
919 if self.retried > 5:
920 # retry sending the username:password 5 times before failing.
921 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
922 headers, None)
923 else:
924 self.retried += 1
925
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800927 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800928 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800929 raise ValueError("AbstractBasicAuthHandler does not"
930 " support the following scheme: '%s'" %
931 scheme)
932 else:
933 mo = AbstractBasicAuthHandler.rx.search(authreq)
934 if mo:
935 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800936 if quote not in ['"',"'"]:
937 warnings.warn("Basic Auth Realm was unquoted",
938 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800939 if scheme.lower() == 'basic':
940 response = self.retry_http_basic_auth(host, req, realm)
941 if response and response.code != 401:
942 self.retried = 0
943 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000944
945 def retry_http_basic_auth(self, host, req, realm):
946 user, pw = self.passwd.find_user_password(realm, host)
947 if pw is not None:
948 raw = "%s:%s" % (user, pw)
949 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
950 if req.headers.get(self.auth_header, None) == auth:
951 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000952 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000953 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000954 else:
955 return None
956
957
958class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
959
960 auth_header = 'Authorization'
961
962 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000963 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000964 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000966 self.reset_retry_count()
967 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000968
969
970class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
971
972 auth_header = 'Proxy-authorization'
973
974 def http_error_407(self, req, fp, code, msg, headers):
975 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000976 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000977 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
978 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000979 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000980 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000982 self.reset_retry_count()
983 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000984
985
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800986# Return n random bytes.
987_randombytes = os.urandom
988
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000989
990class AbstractDigestAuthHandler:
991 # Digest authentication is specified in RFC 2617.
992
993 # XXX The client does not inspect the Authentication-Info header
994 # in a successful response.
995
996 # XXX It should be possible to test this implementation against
997 # a mock server that just generates a static set of challenges.
998
999 # XXX qop="auth-int" supports is shaky
1000
1001 def __init__(self, passwd=None):
1002 if passwd is None:
1003 passwd = HTTPPasswordMgr()
1004 self.passwd = passwd
1005 self.add_password = self.passwd.add_password
1006 self.retried = 0
1007 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001008 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001009
1010 def reset_retry_count(self):
1011 self.retried = 0
1012
1013 def http_error_auth_reqed(self, auth_header, host, req, headers):
1014 authreq = headers.get(auth_header, None)
1015 if self.retried > 5:
1016 # Don't fail endlessly - if we failed once, we'll probably
1017 # fail a second time. Hm. Unless the Password Manager is
1018 # prompting for the information. Crap. This isn't great
1019 # but it's better than the current 'repeat until recursion
1020 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001021 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001022 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 else:
1024 self.retried += 1
1025 if authreq:
1026 scheme = authreq.split()[0]
1027 if scheme.lower() == 'digest':
1028 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001029 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001030 raise ValueError("AbstractDigestAuthHandler does not support"
1031 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001032
1033 def retry_http_digest_auth(self, req, auth):
1034 token, challenge = auth.split(' ', 1)
1035 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1036 auth = self.get_authorization(req, chal)
1037 if auth:
1038 auth_val = 'Digest %s' % auth
1039 if req.headers.get(self.auth_header, None) == auth_val:
1040 return None
1041 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001042 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001043 return resp
1044
1045 def get_cnonce(self, nonce):
1046 # The cnonce-value is an opaque
1047 # quoted string value provided by the client and used by both client
1048 # and server to avoid chosen plaintext attacks, to provide mutual
1049 # authentication, and to provide some message integrity protection.
1050 # This isn't a fabulous effort, but it's probably Good Enough.
1051 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001052 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001053 dig = hashlib.sha1(b).hexdigest()
1054 return dig[:16]
1055
1056 def get_authorization(self, req, chal):
1057 try:
1058 realm = chal['realm']
1059 nonce = chal['nonce']
1060 qop = chal.get('qop')
1061 algorithm = chal.get('algorithm', 'MD5')
1062 # mod_digest doesn't send an opaque, even though it isn't
1063 # supposed to be optional
1064 opaque = chal.get('opaque', None)
1065 except KeyError:
1066 return None
1067
1068 H, KD = self.get_algorithm_impls(algorithm)
1069 if H is None:
1070 return None
1071
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001072 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001073 if user is None:
1074 return None
1075
1076 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001077 if req.data is not None:
1078 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001079 else:
1080 entdig = None
1081
1082 A1 = "%s:%s:%s" % (user, realm, pw)
1083 A2 = "%s:%s" % (req.get_method(),
1084 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001085 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001086 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001087 if nonce == self.last_nonce:
1088 self.nonce_count += 1
1089 else:
1090 self.nonce_count = 1
1091 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001092 ncvalue = '%08x' % self.nonce_count
1093 cnonce = self.get_cnonce(nonce)
1094 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1095 respdig = KD(H(A1), noncebit)
1096 elif qop is None:
1097 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1098 else:
1099 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001100 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001101
1102 # XXX should the partial digests be encoded too?
1103
1104 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001105 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001106 respdig)
1107 if opaque:
1108 base += ', opaque="%s"' % opaque
1109 if entdig:
1110 base += ', digest="%s"' % entdig
1111 base += ', algorithm="%s"' % algorithm
1112 if qop:
1113 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1114 return base
1115
1116 def get_algorithm_impls(self, algorithm):
1117 # lambdas assume digest modules are imported at the top level
1118 if algorithm == 'MD5':
1119 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1120 elif algorithm == 'SHA':
1121 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1122 # XXX MD5-sess
1123 KD = lambda s, d: H("%s:%s" % (s, d))
1124 return H, KD
1125
1126 def get_entity_digest(self, data, chal):
1127 # XXX not implemented yet
1128 return None
1129
1130
1131class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1132 """An authentication protocol defined by RFC 2069
1133
1134 Digest authentication improves on basic authentication because it
1135 does not transmit passwords in the clear.
1136 """
1137
1138 auth_header = 'Authorization'
1139 handler_order = 490 # before Basic auth
1140
1141 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001142 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001143 retry = self.http_error_auth_reqed('www-authenticate',
1144 host, req, headers)
1145 self.reset_retry_count()
1146 return retry
1147
1148
1149class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1150
1151 auth_header = 'Proxy-Authorization'
1152 handler_order = 490 # before Basic auth
1153
1154 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001155 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001156 retry = self.http_error_auth_reqed('proxy-authenticate',
1157 host, req, headers)
1158 self.reset_retry_count()
1159 return retry
1160
1161class AbstractHTTPHandler(BaseHandler):
1162
1163 def __init__(self, debuglevel=0):
1164 self._debuglevel = debuglevel
1165
1166 def set_http_debuglevel(self, level):
1167 self._debuglevel = level
1168
1169 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001170 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001171 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001172 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001173
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001174 if request.data is not None: # POST
1175 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001176 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001177 msg = "POST data should be bytes or an iterable of bytes. " \
1178 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001179 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001180 if not request.has_header('Content-type'):
1181 request.add_unredirected_header(
1182 'Content-type',
1183 'application/x-www-form-urlencoded')
1184 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001185 try:
1186 mv = memoryview(data)
1187 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001188 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001189 raise ValueError("Content-Length should be specified "
1190 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001191 data))
1192 else:
1193 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001194 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001195
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001196 sel_host = host
1197 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001198 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001199 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001200 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001201 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001202 for name, value in self.parent.addheaders:
1203 name = name.capitalize()
1204 if not request.has_header(name):
1205 request.add_unredirected_header(name, value)
1206
1207 return request
1208
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001209 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001210 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001211
1212 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001213 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001214 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001216 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001218 # will parse host:port
1219 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001220
1221 headers = dict(req.unredirected_hdrs)
1222 headers.update(dict((k, v) for k, v in req.headers.items()
1223 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001224
1225 # TODO(jhylton): Should this be redesigned to handle
1226 # persistent connections?
1227
1228 # We want to make an HTTP/1.1 request, but the addinfourl
1229 # class isn't prepared to deal with a persistent connection.
1230 # It will try to read all remaining data from the socket,
1231 # which will block while the server waits for the next request.
1232 # So make sure the connection gets closed after the (only)
1233 # request.
1234 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001235 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001236
1237 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001238 tunnel_headers = {}
1239 proxy_auth_hdr = "Proxy-Authorization"
1240 if proxy_auth_hdr in headers:
1241 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1242 # Proxy-Authorization should not be sent to origin
1243 # server.
1244 del headers[proxy_auth_hdr]
1245 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001246
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001247 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001248 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001249 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001250 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001251 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001252 else:
1253 r = h.getresponse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001254
Senthil Kumaran26430412011-04-13 07:01:19 +08001255 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001256 # This line replaces the .msg attribute of the HTTPResponse
1257 # with .headers, because urllib clients expect the response to
1258 # have the reason in .msg. It would be good to mark this
1259 # attribute is deprecated and get then to use info() or
1260 # .headers.
1261 r.msg = r.reason
1262 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001263
1264
1265class HTTPHandler(AbstractHTTPHandler):
1266
1267 def http_open(self, req):
1268 return self.do_open(http.client.HTTPConnection, req)
1269
1270 http_request = AbstractHTTPHandler.do_request_
1271
1272if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001273
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001274 class HTTPSHandler(AbstractHTTPHandler):
1275
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001276 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1277 AbstractHTTPHandler.__init__(self, debuglevel)
1278 self._context = context
1279 self._check_hostname = check_hostname
1280
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001281 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001282 return self.do_open(http.client.HTTPSConnection, req,
1283 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001284
1285 https_request = AbstractHTTPHandler.do_request_
1286
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001287 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001288
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001289class HTTPCookieProcessor(BaseHandler):
1290 def __init__(self, cookiejar=None):
1291 import http.cookiejar
1292 if cookiejar is None:
1293 cookiejar = http.cookiejar.CookieJar()
1294 self.cookiejar = cookiejar
1295
1296 def http_request(self, request):
1297 self.cookiejar.add_cookie_header(request)
1298 return request
1299
1300 def http_response(self, request, response):
1301 self.cookiejar.extract_cookies(response, request)
1302 return response
1303
1304 https_request = http_request
1305 https_response = http_response
1306
1307class UnknownHandler(BaseHandler):
1308 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001309 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001310 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001311
1312def parse_keqv_list(l):
1313 """Parse list of key=value strings where keys are not duplicated."""
1314 parsed = {}
1315 for elt in l:
1316 k, v = elt.split('=', 1)
1317 if v[0] == '"' and v[-1] == '"':
1318 v = v[1:-1]
1319 parsed[k] = v
1320 return parsed
1321
1322def parse_http_list(s):
1323 """Parse lists as described by RFC 2068 Section 2.
1324
1325 In particular, parse comma-separated lists where the elements of
1326 the list may include quoted-strings. A quoted-string could
1327 contain a comma. A non-quoted string could have quotes in the
1328 middle. Neither commas nor quotes count if they are escaped.
1329 Only double-quotes count, not single-quotes.
1330 """
1331 res = []
1332 part = ''
1333
1334 escape = quote = False
1335 for cur in s:
1336 if escape:
1337 part += cur
1338 escape = False
1339 continue
1340 if quote:
1341 if cur == '\\':
1342 escape = True
1343 continue
1344 elif cur == '"':
1345 quote = False
1346 part += cur
1347 continue
1348
1349 if cur == ',':
1350 res.append(part)
1351 part = ''
1352 continue
1353
1354 if cur == '"':
1355 quote = True
1356
1357 part += cur
1358
1359 # append last part
1360 if part:
1361 res.append(part)
1362
1363 return [part.strip() for part in res]
1364
1365class FileHandler(BaseHandler):
1366 # Use local file or FTP depending on form of URL
1367 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001368 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001369 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1370 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001371 if not req.host is self.get_names():
1372 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001373 else:
1374 return self.open_local_file(req)
1375
1376 # names for the localhost
1377 names = None
1378 def get_names(self):
1379 if FileHandler.names is None:
1380 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001381 FileHandler.names = tuple(
1382 socket.gethostbyname_ex('localhost')[2] +
1383 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001384 except socket.gaierror:
1385 FileHandler.names = (socket.gethostbyname('localhost'),)
1386 return FileHandler.names
1387
1388 # not entirely sure what the rules are here
1389 def open_local_file(self, req):
1390 import email.utils
1391 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001392 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001393 filename = req.selector
1394 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001395 try:
1396 stats = os.stat(localfile)
1397 size = stats.st_size
1398 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001399 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001400 headers = email.message_from_string(
1401 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1402 (mtype or 'text/plain', size, modified))
1403 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001404 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001405 if not host or \
1406 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001407 if host:
1408 origurl = 'file://' + host + filename
1409 else:
1410 origurl = 'file://' + filename
1411 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001412 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001413 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumarancad7b312012-10-27 02:26:46 -07001414 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001415 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001416
1417def _safe_gethostbyname(host):
1418 try:
1419 return socket.gethostbyname(host)
1420 except socket.gaierror:
1421 return None
1422
1423class FTPHandler(BaseHandler):
1424 def ftp_open(self, req):
1425 import ftplib
1426 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001427 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001428 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001429 raise URLError('ftp error: no host given')
1430 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001431 if port is None:
1432 port = ftplib.FTP_PORT
1433 else:
1434 port = int(port)
1435
1436 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001437 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001438 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001439 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001440 else:
1441 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001442 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001443 user = user or ''
1444 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001445
1446 try:
1447 host = socket.gethostbyname(host)
1448 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001449 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001450 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001451 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001452 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001453 dirs, file = dirs[:-1], dirs[-1]
1454 if dirs and not dirs[0]:
1455 dirs = dirs[1:]
1456 try:
1457 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1458 type = file and 'I' or 'D'
1459 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001460 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001461 if attr.lower() == 'type' and \
1462 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1463 type = value.upper()
1464 fp, retrlen = fw.retrfile(file, type)
1465 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001466 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001467 if mtype:
1468 headers += "Content-type: %s\n" % mtype
1469 if retrlen is not None and retrlen >= 0:
1470 headers += "Content-length: %d\n" % retrlen
1471 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001472 return addinfourl(fp, headers, req.full_url)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001473 except ftplib.all_errors as exp:
1474 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001475 raise exc.with_traceback(sys.exc_info()[2])
1476
1477 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001478 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1479 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001480
1481class CacheFTPHandler(FTPHandler):
1482 # XXX would be nice to have pluggable cache strategies
1483 # XXX this stuff is definitely not thread safe
1484 def __init__(self):
1485 self.cache = {}
1486 self.timeout = {}
1487 self.soonest = 0
1488 self.delay = 60
1489 self.max_conns = 16
1490
1491 def setTimeout(self, t):
1492 self.delay = t
1493
1494 def setMaxConns(self, m):
1495 self.max_conns = m
1496
1497 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1498 key = user, host, port, '/'.join(dirs), timeout
1499 if key in self.cache:
1500 self.timeout[key] = time.time() + self.delay
1501 else:
1502 self.cache[key] = ftpwrapper(user, passwd, host, port,
1503 dirs, timeout)
1504 self.timeout[key] = time.time() + self.delay
1505 self.check_cache()
1506 return self.cache[key]
1507
1508 def check_cache(self):
1509 # first check for old ones
1510 t = time.time()
1511 if self.soonest <= t:
1512 for k, v in list(self.timeout.items()):
1513 if v < t:
1514 self.cache[k].close()
1515 del self.cache[k]
1516 del self.timeout[k]
1517 self.soonest = min(list(self.timeout.values()))
1518
1519 # then check the size
1520 if len(self.cache) == self.max_conns:
1521 for k, v in list(self.timeout.items()):
1522 if v == self.soonest:
1523 del self.cache[k]
1524 del self.timeout[k]
1525 break
1526 self.soonest = min(list(self.timeout.values()))
1527
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001528 def clear_cache(self):
1529 for conn in self.cache.values():
1530 conn.close()
1531 self.cache.clear()
1532 self.timeout.clear()
1533
1534
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001535# Code move from the old urllib module
1536
1537MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1538
1539# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001540if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001541 from nturl2path import url2pathname, pathname2url
1542else:
1543 def url2pathname(pathname):
1544 """OS-specific conversion from a relative URL of the 'file' scheme
1545 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001546 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001547
1548 def pathname2url(pathname):
1549 """OS-specific conversion from a file system path to a relative URL
1550 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001551 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001552
1553# This really consists of two pieces:
1554# (1) a class which handles opening of all sorts of URLs
1555# (plus assorted utilities etc.)
1556# (2) a set of functions for parsing URLs
1557# XXX Should these be separated out into different modules?
1558
1559
1560ftpcache = {}
1561class URLopener:
1562 """Class to open URLs.
1563 This is a class rather than just a subroutine because we may need
1564 more than one set of global protocol-specific options.
1565 Note -- this is a base class for those who don't want the
1566 automatic handling of errors type 302 (relocated) and 401
1567 (authorization needed)."""
1568
1569 __tempfiles = None
1570
1571 version = "Python-urllib/%s" % __version__
1572
1573 # Constructor
1574 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001575 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001576 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1577 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001578 if proxies is None:
1579 proxies = getproxies()
1580 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1581 self.proxies = proxies
1582 self.key_file = x509.get('key_file')
1583 self.cert_file = x509.get('cert_file')
1584 self.addheaders = [('User-Agent', self.version)]
1585 self.__tempfiles = []
1586 self.__unlink = os.unlink # See cleanup()
1587 self.tempcache = None
1588 # Undocumented feature: if you assign {} to tempcache,
1589 # it is used to cache files retrieved with
1590 # self.retrieve(). This is not enabled by default
1591 # since it does not work for changing documents (and I
1592 # haven't got the logic to check expiration headers
1593 # yet).
1594 self.ftpcache = ftpcache
1595 # Undocumented feature: you can use a different
1596 # ftp cache by assigning to the .ftpcache member;
1597 # in case you want logically independent URL openers
1598 # XXX This is not threadsafe. Bah.
1599
1600 def __del__(self):
1601 self.close()
1602
1603 def close(self):
1604 self.cleanup()
1605
1606 def cleanup(self):
1607 # This code sometimes runs when the rest of this module
1608 # has already been deleted, so it can't use any globals
1609 # or import anything.
1610 if self.__tempfiles:
1611 for file in self.__tempfiles:
1612 try:
1613 self.__unlink(file)
1614 except OSError:
1615 pass
1616 del self.__tempfiles[:]
1617 if self.tempcache:
1618 self.tempcache.clear()
1619
1620 def addheader(self, *args):
1621 """Add a header to be used by the HTTP interface only
1622 e.g. u.addheader('Accept', 'sound/basic')"""
1623 self.addheaders.append(args)
1624
1625 # External interface
1626 def open(self, fullurl, data=None):
1627 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001628 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001629 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001630 if self.tempcache and fullurl in self.tempcache:
1631 filename, headers = self.tempcache[fullurl]
1632 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001633 return addinfourl(fp, headers, fullurl)
1634 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001635 if not urltype:
1636 urltype = 'file'
1637 if urltype in self.proxies:
1638 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001639 urltype, proxyhost = splittype(proxy)
1640 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001641 url = (host, fullurl) # Signal special case to open_*()
1642 else:
1643 proxy = None
1644 name = 'open_' + urltype
1645 self.type = urltype
1646 name = name.replace('-', '_')
1647 if not hasattr(self, name):
1648 if proxy:
1649 return self.open_unknown_proxy(proxy, fullurl, data)
1650 else:
1651 return self.open_unknown(fullurl, data)
1652 try:
1653 if data is None:
1654 return getattr(self, name)(url)
1655 else:
1656 return getattr(self, name)(url, data)
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001657 except HTTPError:
1658 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001659 except socket.error as msg:
1660 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1661
1662 def open_unknown(self, fullurl, data=None):
1663 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001664 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001665 raise IOError('url error', 'unknown url type', type)
1666
1667 def open_unknown_proxy(self, proxy, fullurl, data=None):
1668 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001669 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001670 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1671
1672 # External interface
1673 def retrieve(self, url, filename=None, reporthook=None, data=None):
1674 """retrieve(url) returns (filename, headers) for a local object
1675 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001676 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001677 if self.tempcache and url in self.tempcache:
1678 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001679 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001680 if filename is None and (not type or type == 'file'):
1681 try:
1682 fp = self.open_local_file(url1)
1683 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001684 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001685 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001686 except IOError as msg:
1687 pass
1688 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001689 try:
1690 headers = fp.info()
1691 if filename:
1692 tfp = open(filename, 'wb')
1693 else:
1694 import tempfile
1695 garbage, path = splittype(url)
1696 garbage, path = splithost(path or "")
1697 path, garbage = splitquery(path or "")
1698 path, garbage = splitattr(path or "")
1699 suffix = os.path.splitext(path)[1]
1700 (fd, filename) = tempfile.mkstemp(suffix)
1701 self.__tempfiles.append(filename)
1702 tfp = os.fdopen(fd, 'wb')
1703 try:
1704 result = filename, headers
1705 if self.tempcache is not None:
1706 self.tempcache[url] = result
1707 bs = 1024*8
1708 size = -1
1709 read = 0
1710 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001711 if "content-length" in headers:
1712 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001713 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001714 reporthook(blocknum, bs, size)
1715 while 1:
1716 block = fp.read(bs)
1717 if not block:
1718 break
1719 read += len(block)
1720 tfp.write(block)
1721 blocknum += 1
1722 if reporthook:
1723 reporthook(blocknum, bs, size)
1724 finally:
1725 tfp.close()
1726 finally:
1727 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001728
1729 # raise exception if actual size does not match content-length header
1730 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001731 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001732 "retrieval incomplete: got only %i out of %i bytes"
1733 % (read, size), result)
1734
1735 return result
1736
1737 # Each method named open_<type> knows how to open that type of URL
1738
1739 def _open_generic_http(self, connection_factory, url, data):
1740 """Make an HTTP connection using connection_class.
1741
1742 This is an internal method that should be called from
1743 open_http() or open_https().
1744
1745 Arguments:
1746 - connection_factory should take a host name and return an
1747 HTTPConnection instance.
1748 - url is the url to retrieval or a host, relative-path pair.
1749 - data is payload for a POST request or None.
1750 """
1751
1752 user_passwd = None
1753 proxy_passwd= None
1754 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001755 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001756 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001757 user_passwd, host = splituser(host)
1758 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001759 realhost = host
1760 else:
1761 host, selector = url
1762 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001763 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001764 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001765 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001766 url = rest
1767 user_passwd = None
1768 if urltype.lower() != 'http':
1769 realhost = None
1770 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001771 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001772 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001773 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001774 if user_passwd:
1775 selector = "%s://%s%s" % (urltype, realhost, rest)
1776 if proxy_bypass(realhost):
1777 host = realhost
1778
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001779 if not host: raise IOError('http error', 'no host given')
1780
1781 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001782 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001783 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001784 else:
1785 proxy_auth = None
1786
1787 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001788 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001789 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001790 else:
1791 auth = None
1792 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001793 headers = {}
1794 if proxy_auth:
1795 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1796 if auth:
1797 headers["Authorization"] = "Basic %s" % auth
1798 if realhost:
1799 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001800
1801 # Add Connection:close as we don't support persistent connections yet.
1802 # This helps in closing the socket and avoiding ResourceWarning
1803
1804 headers["Connection"] = "close"
1805
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001806 for header, value in self.addheaders:
1807 headers[header] = value
1808
1809 if data is not None:
1810 headers["Content-Type"] = "application/x-www-form-urlencoded"
1811 http_conn.request("POST", selector, data, headers)
1812 else:
1813 http_conn.request("GET", selector, headers=headers)
1814
1815 try:
1816 response = http_conn.getresponse()
1817 except http.client.BadStatusLine:
1818 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001819 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001820
1821 # According to RFC 2616, "2xx" code indicates that the client's
1822 # request was successfully received, understood, and accepted.
1823 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001824 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001825 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001826 else:
1827 return self.http_error(
1828 url, response.fp,
1829 response.status, response.reason, response.msg, data)
1830
1831 def open_http(self, url, data=None):
1832 """Use HTTP protocol."""
1833 return self._open_generic_http(http.client.HTTPConnection, url, data)
1834
1835 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1836 """Handle http errors.
1837
1838 Derived class can override this, or provide specific handlers
1839 named http_error_DDD where DDD is the 3-digit error code."""
1840 # First check if there's a specific handler for this error
1841 name = 'http_error_%d' % errcode
1842 if hasattr(self, name):
1843 method = getattr(self, name)
1844 if data is None:
1845 result = method(url, fp, errcode, errmsg, headers)
1846 else:
1847 result = method(url, fp, errcode, errmsg, headers, data)
1848 if result: return result
1849 return self.http_error_default(url, fp, errcode, errmsg, headers)
1850
1851 def http_error_default(self, url, fp, errcode, errmsg, headers):
1852 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001853 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001854 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001855
1856 if _have_ssl:
1857 def _https_connection(self, host):
1858 return http.client.HTTPSConnection(host,
1859 key_file=self.key_file,
1860 cert_file=self.cert_file)
1861
1862 def open_https(self, url, data=None):
1863 """Use HTTPS protocol."""
1864 return self._open_generic_http(self._https_connection, url, data)
1865
1866 def open_file(self, url):
1867 """Use local file or FTP depending on form of URL."""
1868 if not isinstance(url, str):
Senthil Kumarancad7b312012-10-27 02:26:46 -07001869 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001870 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001871 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001872 else:
1873 return self.open_local_file(url)
1874
1875 def open_local_file(self, url):
1876 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001877 import email.utils
1878 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001879 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001880 localname = url2pathname(file)
1881 try:
1882 stats = os.stat(localname)
1883 except OSError as e:
Senthil Kumarancad7b312012-10-27 02:26:46 -07001884 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001885 size = stats.st_size
1886 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1887 mtype = mimetypes.guess_type(url)[0]
1888 headers = email.message_from_string(
1889 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1890 (mtype or 'text/plain', size, modified))
1891 if not host:
1892 urlfile = file
1893 if file[:1] == '/':
1894 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001895 return addinfourl(open(localname, 'rb'), headers, urlfile)
1896 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001897 if (not port
Senthil Kumarancad7b312012-10-27 02:26:46 -07001898 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001899 urlfile = file
1900 if file[:1] == '/':
1901 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001902 elif file[:2] == './':
1903 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001904 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001905 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001906
1907 def open_ftp(self, url):
1908 """Use FTP protocol."""
1909 if not isinstance(url, str):
Senthil Kumarancad7b312012-10-27 02:26:46 -07001910 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001911 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001912 host, path = splithost(url)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001913 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001914 host, port = splitport(host)
1915 user, host = splituser(host)
1916 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001917 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001918 host = unquote(host)
1919 user = unquote(user or '')
1920 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001921 host = socket.gethostbyname(host)
1922 if not port:
1923 import ftplib
1924 port = ftplib.FTP_PORT
1925 else:
1926 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001927 path, attrs = splitattr(path)
1928 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001929 dirs = path.split('/')
1930 dirs, file = dirs[:-1], dirs[-1]
1931 if dirs and not dirs[0]: dirs = dirs[1:]
1932 if dirs and not dirs[0]: dirs[0] = '/'
1933 key = user, host, port, '/'.join(dirs)
1934 # XXX thread unsafe!
1935 if len(self.ftpcache) > MAXFTPCACHE:
1936 # Prune the cache, rather arbitrarily
1937 for k in self.ftpcache.keys():
1938 if k != key:
1939 v = self.ftpcache[k]
1940 del self.ftpcache[k]
1941 v.close()
1942 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001943 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001944 self.ftpcache[key] = \
1945 ftpwrapper(user, passwd, host, port, dirs)
1946 if not file: type = 'D'
1947 else: type = 'I'
1948 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001949 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001950 if attr.lower() == 'type' and \
1951 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1952 type = value.upper()
1953 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1954 mtype = mimetypes.guess_type("ftp:" + url)[0]
1955 headers = ""
1956 if mtype:
1957 headers += "Content-Type: %s\n" % mtype
1958 if retrlen is not None and retrlen >= 0:
1959 headers += "Content-Length: %d\n" % retrlen
1960 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001961 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumarancad7b312012-10-27 02:26:46 -07001962 except ftperrors() as exp:
1963 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001964
1965 def open_data(self, url, data=None):
1966 """Use "data" URL."""
1967 if not isinstance(url, str):
Senthil Kumarancad7b312012-10-27 02:26:46 -07001968 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001969 # ignore POSTed data
1970 #
1971 # syntax of data URLs:
1972 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1973 # mediatype := [ type "/" subtype ] *( ";" parameter )
1974 # data := *urlchar
1975 # parameter := attribute "=" value
1976 try:
1977 [type, data] = url.split(',', 1)
1978 except ValueError:
1979 raise IOError('data error', 'bad data URL')
1980 if not type:
1981 type = 'text/plain;charset=US-ASCII'
1982 semi = type.rfind(';')
1983 if semi >= 0 and '=' not in type[semi:]:
1984 encoding = type[semi+1:]
1985 type = type[:semi]
1986 else:
1987 encoding = ''
1988 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001989 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001990 time.gmtime(time.time())))
1991 msg.append('Content-type: %s' % type)
1992 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00001993 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001994 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001995 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001996 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001997 msg.append('Content-Length: %d' % len(data))
1998 msg.append('')
1999 msg.append(data)
2000 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002001 headers = email.message_from_string(msg)
2002 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002003 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002004 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002005
2006
2007class FancyURLopener(URLopener):
2008 """Derived class with handlers for errors we can handle (perhaps)."""
2009
2010 def __init__(self, *args, **kwargs):
2011 URLopener.__init__(self, *args, **kwargs)
2012 self.auth_cache = {}
2013 self.tries = 0
2014 self.maxtries = 10
2015
2016 def http_error_default(self, url, fp, errcode, errmsg, headers):
2017 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002018 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002019
2020 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2021 """Error 302 -- relocated (temporarily)."""
2022 self.tries += 1
2023 if self.maxtries and self.tries >= self.maxtries:
2024 if hasattr(self, "http_error_500"):
2025 meth = self.http_error_500
2026 else:
2027 meth = self.http_error_default
2028 self.tries = 0
2029 return meth(url, fp, 500,
2030 "Internal Server Error: Redirect Recursion", headers)
2031 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2032 data)
2033 self.tries = 0
2034 return result
2035
2036 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2037 if 'location' in headers:
2038 newurl = headers['location']
2039 elif 'uri' in headers:
2040 newurl = headers['uri']
2041 else:
2042 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002043 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002044
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002045 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002046 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002047
2048 urlparts = urlparse(newurl)
2049
2050 # For security reasons, we don't allow redirection to anything other
2051 # than http, https and ftp.
2052
2053 # We are using newer HTTPError with older redirect_internal method
2054 # This older method will get deprecated in 3.3
2055
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002056 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002057 raise HTTPError(newurl, errcode,
2058 errmsg +
2059 " Redirection to url '%s' is not allowed." % newurl,
2060 headers, fp)
2061
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002062 return self.open(newurl)
2063
2064 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2065 """Error 301 -- also relocated (permanently)."""
2066 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2067
2068 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2069 """Error 303 -- also relocated (essentially identical to 302)."""
2070 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2071
2072 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2073 """Error 307 -- relocated, but turn POST into error."""
2074 if data is None:
2075 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2076 else:
2077 return self.http_error_default(url, fp, errcode, errmsg, headers)
2078
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002079 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2080 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002081 """Error 401 -- authentication required.
2082 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002083 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002084 URLopener.http_error_default(self, url, fp,
2085 errcode, errmsg, headers)
2086 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002087 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2088 if not match:
2089 URLopener.http_error_default(self, url, fp,
2090 errcode, errmsg, headers)
2091 scheme, realm = match.groups()
2092 if scheme.lower() != 'basic':
2093 URLopener.http_error_default(self, url, fp,
2094 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002095 if not retry:
2096 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2097 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002098 name = 'retry_' + self.type + '_basic_auth'
2099 if data is None:
2100 return getattr(self,name)(url, realm)
2101 else:
2102 return getattr(self,name)(url, realm, data)
2103
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002104 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2105 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002106 """Error 407 -- proxy authentication required.
2107 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002108 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002109 URLopener.http_error_default(self, url, fp,
2110 errcode, errmsg, headers)
2111 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002112 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2113 if not match:
2114 URLopener.http_error_default(self, url, fp,
2115 errcode, errmsg, headers)
2116 scheme, realm = match.groups()
2117 if scheme.lower() != 'basic':
2118 URLopener.http_error_default(self, url, fp,
2119 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002120 if not retry:
2121 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2122 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002123 name = 'retry_proxy_' + self.type + '_basic_auth'
2124 if data is None:
2125 return getattr(self,name)(url, realm)
2126 else:
2127 return getattr(self,name)(url, realm, data)
2128
2129 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002130 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002131 newurl = 'http://' + host + selector
2132 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002133 urltype, proxyhost = splittype(proxy)
2134 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002135 i = proxyhost.find('@') + 1
2136 proxyhost = proxyhost[i:]
2137 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2138 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002139 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002140 quote(passwd, safe=''), proxyhost)
2141 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2142 if data is None:
2143 return self.open(newurl)
2144 else:
2145 return self.open(newurl, data)
2146
2147 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002148 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002149 newurl = 'https://' + host + selector
2150 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002151 urltype, proxyhost = splittype(proxy)
2152 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002153 i = proxyhost.find('@') + 1
2154 proxyhost = proxyhost[i:]
2155 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2156 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002157 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002158 quote(passwd, safe=''), proxyhost)
2159 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2160 if data is None:
2161 return self.open(newurl)
2162 else:
2163 return self.open(newurl, data)
2164
2165 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002166 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002167 i = host.find('@') + 1
2168 host = host[i:]
2169 user, passwd = self.get_user_passwd(host, realm, i)
2170 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002171 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002172 quote(passwd, safe=''), host)
2173 newurl = 'http://' + host + selector
2174 if data is None:
2175 return self.open(newurl)
2176 else:
2177 return self.open(newurl, data)
2178
2179 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002180 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002181 i = host.find('@') + 1
2182 host = host[i:]
2183 user, passwd = self.get_user_passwd(host, realm, i)
2184 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002185 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002186 quote(passwd, safe=''), host)
2187 newurl = 'https://' + host + selector
2188 if data is None:
2189 return self.open(newurl)
2190 else:
2191 return self.open(newurl, data)
2192
Florent Xicluna757445b2010-05-17 17:24:07 +00002193 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002194 key = realm + '@' + host.lower()
2195 if key in self.auth_cache:
2196 if clear_cache:
2197 del self.auth_cache[key]
2198 else:
2199 return self.auth_cache[key]
2200 user, passwd = self.prompt_user_passwd(host, realm)
2201 if user or passwd: self.auth_cache[key] = (user, passwd)
2202 return user, passwd
2203
2204 def prompt_user_passwd(self, host, realm):
2205 """Override this in a GUI environment!"""
2206 import getpass
2207 try:
2208 user = input("Enter username for %s at %s: " % (realm, host))
2209 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2210 (user, realm, host))
2211 return user, passwd
2212 except KeyboardInterrupt:
2213 print()
2214 return None, None
2215
2216
2217# Utility functions
2218
2219_localhost = None
2220def localhost():
2221 """Return the IP address of the magic hostname 'localhost'."""
2222 global _localhost
2223 if _localhost is None:
2224 _localhost = socket.gethostbyname('localhost')
2225 return _localhost
2226
2227_thishost = None
2228def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002229 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002230 global _thishost
2231 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002232 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002233 return _thishost
2234
2235_ftperrors = None
2236def ftperrors():
2237 """Return the set of errors raised by the FTP class."""
2238 global _ftperrors
2239 if _ftperrors is None:
2240 import ftplib
2241 _ftperrors = ftplib.all_errors
2242 return _ftperrors
2243
2244_noheaders = None
2245def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002246 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002247 global _noheaders
2248 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002249 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002250 return _noheaders
2251
2252
2253# Utility classes
2254
2255class ftpwrapper:
2256 """Class used by open_ftp() for cache of open FTP connections."""
2257
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002258 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2259 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002260 self.user = user
2261 self.passwd = passwd
2262 self.host = host
2263 self.port = port
2264 self.dirs = dirs
2265 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002266 self.refcount = 0
2267 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002268 self.init()
2269
2270 def init(self):
2271 import ftplib
2272 self.busy = 0
2273 self.ftp = ftplib.FTP()
2274 self.ftp.connect(self.host, self.port, self.timeout)
2275 self.ftp.login(self.user, self.passwd)
2276 for dir in self.dirs:
2277 self.ftp.cwd(dir)
2278
2279 def retrfile(self, file, type):
2280 import ftplib
2281 self.endtransfer()
2282 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2283 else: cmd = 'TYPE ' + type; isdir = 0
2284 try:
2285 self.ftp.voidcmd(cmd)
2286 except ftplib.all_errors:
2287 self.init()
2288 self.ftp.voidcmd(cmd)
2289 conn = None
2290 if file and not isdir:
2291 # Try to retrieve as a file
2292 try:
2293 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002294 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002295 except ftplib.error_perm as reason:
2296 if str(reason)[:3] != '550':
Senthil Kumarancad7b312012-10-27 02:26:46 -07002297 raise URLError('ftp error: %d' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002298 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002299 if not conn:
2300 # Set transfer mode to ASCII!
2301 self.ftp.voidcmd('TYPE A')
2302 # Try a directory listing. Verify that directory exists.
2303 if file:
2304 pwd = self.ftp.pwd()
2305 try:
2306 try:
2307 self.ftp.cwd(file)
2308 except ftplib.error_perm as reason:
Senthil Kumarancad7b312012-10-27 02:26:46 -07002309 raise URLError('ftp error: %d' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002310 finally:
2311 self.ftp.cwd(pwd)
2312 cmd = 'LIST ' + file
2313 else:
2314 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002315 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002316 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002317
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002318 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2319 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002320 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002321 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002322 return (ftpobj, retrlen)
2323
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002324 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002325 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002326
2327 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002328 self.keepalive = False
2329 if self.refcount <= 0:
2330 self.real_close()
2331
2332 def file_close(self):
2333 self.endtransfer()
2334 self.refcount -= 1
2335 if self.refcount <= 0 and not self.keepalive:
2336 self.real_close()
2337
2338 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002339 self.endtransfer()
2340 try:
2341 self.ftp.close()
2342 except ftperrors():
2343 pass
2344
2345# Proxy handling
2346def getproxies_environment():
2347 """Return a dictionary of scheme -> proxy server URL mappings.
2348
2349 Scan the environment for variables named <scheme>_proxy;
2350 this seems to be the standard convention. If you need a
2351 different way, you can pass a proxies dictionary to the
2352 [Fancy]URLopener constructor.
2353
2354 """
2355 proxies = {}
2356 for name, value in os.environ.items():
2357 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002358 if value and name[-6:] == '_proxy':
2359 proxies[name[:-6]] = value
2360 return proxies
2361
2362def proxy_bypass_environment(host):
2363 """Test if proxies should not be used for a particular host.
2364
2365 Checks the environment for a variable named no_proxy, which should
2366 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2367 """
2368 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2369 # '*' is special case for always bypass
2370 if no_proxy == '*':
2371 return 1
2372 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002373 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002374 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002375 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2376 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002377 if name and (hostonly.endswith(name) or host.endswith(name)):
2378 return 1
2379 # otherwise, don't bypass
2380 return 0
2381
2382
Ronald Oussorene72e1612011-03-14 18:15:25 -04002383# This code tests an OSX specific data structure but is testable on all
2384# platforms
2385def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2386 """
2387 Return True iff this host shouldn't be accessed using a proxy
2388
2389 This function uses the MacOSX framework SystemConfiguration
2390 to fetch the proxy information.
2391
2392 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2393 { 'exclude_simple': bool,
2394 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2395 }
2396 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002397 from fnmatch import fnmatch
2398
2399 hostonly, port = splitport(host)
2400
2401 def ip2num(ipAddr):
2402 parts = ipAddr.split('.')
2403 parts = list(map(int, parts))
2404 if len(parts) != 4:
2405 parts = (parts + [0, 0, 0, 0])[:4]
2406 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2407
2408 # Check for simple host names:
2409 if '.' not in host:
2410 if proxy_settings['exclude_simple']:
2411 return True
2412
2413 hostIP = None
2414
2415 for value in proxy_settings.get('exceptions', ()):
2416 # Items in the list are strings like these: *.local, 169.254/16
2417 if not value: continue
2418
2419 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2420 if m is not None:
2421 if hostIP is None:
2422 try:
2423 hostIP = socket.gethostbyname(hostonly)
2424 hostIP = ip2num(hostIP)
2425 except socket.error:
2426 continue
2427
2428 base = ip2num(m.group(1))
2429 mask = m.group(2)
2430 if mask is None:
2431 mask = 8 * (m.group(1).count('.') + 1)
2432 else:
2433 mask = int(mask[1:])
2434 mask = 32 - mask
2435
2436 if (hostIP >> mask) == (base >> mask):
2437 return True
2438
2439 elif fnmatch(host, value):
2440 return True
2441
2442 return False
2443
2444
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002445if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002446 from _scproxy import _get_proxy_settings, _get_proxies
2447
2448 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002449 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002450 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002451
2452 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002453 """Return a dictionary of scheme -> proxy server URL mappings.
2454
Ronald Oussoren84151202010-04-18 20:46:11 +00002455 This function uses the MacOSX framework SystemConfiguration
2456 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002457 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002458 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002459
Ronald Oussoren84151202010-04-18 20:46:11 +00002460
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002461
2462 def proxy_bypass(host):
2463 if getproxies_environment():
2464 return proxy_bypass_environment(host)
2465 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002466 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002467
2468 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002469 return getproxies_environment() or getproxies_macosx_sysconf()
2470
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002471
2472elif os.name == 'nt':
2473 def getproxies_registry():
2474 """Return a dictionary of scheme -> proxy server URL mappings.
2475
2476 Win32 uses the registry to store proxies.
2477
2478 """
2479 proxies = {}
2480 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002481 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002482 except ImportError:
2483 # Std module, so should be around - but you never know!
2484 return proxies
2485 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002486 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002487 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002488 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002489 'ProxyEnable')[0]
2490 if proxyEnable:
2491 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002492 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002493 'ProxyServer')[0])
2494 if '=' in proxyServer:
2495 # Per-protocol settings
2496 for p in proxyServer.split(';'):
2497 protocol, address = p.split('=', 1)
2498 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002499 if not re.match('^([^/:]+)://', address):
2500 address = '%s://%s' % (protocol, address)
2501 proxies[protocol] = address
2502 else:
2503 # Use one setting for all protocols
2504 if proxyServer[:5] == 'http:':
2505 proxies['http'] = proxyServer
2506 else:
2507 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002508 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002509 proxies['ftp'] = 'ftp://%s' % proxyServer
2510 internetSettings.Close()
2511 except (WindowsError, ValueError, TypeError):
2512 # Either registry key not found etc, or the value in an
2513 # unexpected format.
2514 # proxies already set up to be empty so nothing to do
2515 pass
2516 return proxies
2517
2518 def getproxies():
2519 """Return a dictionary of scheme -> proxy server URL mappings.
2520
2521 Returns settings gathered from the environment, if specified,
2522 or the registry.
2523
2524 """
2525 return getproxies_environment() or getproxies_registry()
2526
2527 def proxy_bypass_registry(host):
2528 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002529 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002530 except ImportError:
2531 # Std modules, so should be around - but you never know!
2532 return 0
2533 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002534 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002535 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002536 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002537 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002538 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002539 'ProxyOverride')[0])
2540 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2541 except WindowsError:
2542 return 0
2543 if not proxyEnable or not proxyOverride:
2544 return 0
2545 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002546 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002547 host = [rawHost]
2548 try:
2549 addr = socket.gethostbyname(rawHost)
2550 if addr != rawHost:
2551 host.append(addr)
2552 except socket.error:
2553 pass
2554 try:
2555 fqdn = socket.getfqdn(rawHost)
2556 if fqdn != rawHost:
2557 host.append(fqdn)
2558 except socket.error:
2559 pass
2560 # make a check value list from the registry entry: replace the
2561 # '<local>' string by the localhost entry and the corresponding
2562 # canonical entry.
2563 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002564 # now check if we match one of the registry values.
2565 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002566 if test == '<local>':
2567 if '.' not in rawHost:
2568 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002569 test = test.replace(".", r"\.") # mask dots
2570 test = test.replace("*", r".*") # change glob sequence
2571 test = test.replace("?", r".") # change glob char
2572 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002573 if re.match(test, val, re.I):
2574 return 1
2575 return 0
2576
2577 def proxy_bypass(host):
2578 """Return a dictionary of scheme -> proxy server URL mappings.
2579
2580 Returns settings gathered from the environment, if specified,
2581 or the registry.
2582
2583 """
2584 if getproxies_environment():
2585 return proxy_bypass_environment(host)
2586 else:
2587 return proxy_bypass_registry(host)
2588
2589else:
2590 # By default use environment variables
2591 getproxies = getproxies_environment
2592 proxy_bypass = proxy_bypass_environment