blob: 67b4c795b36edc3af28d271c9c566f41216c09f9 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Senthil Kumarand95cc752010-08-08 11:27:53 +0000106 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000107from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108
109# check for SSL
110try:
111 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000112except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 _have_ssl = False
114else:
115 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800117__all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
123 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
124 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
125 'UnknownHandler', 'HTTPErrorProcessor',
126 # Functions
127 'urlopen', 'install_opener', 'build_opener',
128 'pathname2url', 'url2pathname', 'getproxies',
129 # Legacy interface
130 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
131]
132
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133# used in User-Agent header sent
134__version__ = sys.version[:3]
135
136_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000137def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200138 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000139 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200140 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000141 if not _have_ssl:
142 raise ValueError('SSL support not available')
143 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
144 context.options |= ssl.OP_NO_SSLv2
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200145 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000146 context.verify_mode = ssl.CERT_REQUIRED
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200147 if cafile or capath:
148 context.load_verify_locations(cafile, capath)
149 else:
150 context.set_default_verify_paths()
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000151 check_hostname = True
152 else:
153 check_hostname = False
154 https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
155 opener = build_opener(https_handler)
156 elif _opener is None:
157 _opener = opener = build_opener()
158 else:
159 opener = _opener
160 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000161
162def install_opener(opener):
163 global _opener
164 _opener = opener
165
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700166_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000167def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700168 """
169 Retrieve a URL into a temporary location on disk.
170
171 Requires a URL argument. If a filename is passed, it is used as
172 the temporary file location. The reporthook argument should be
173 a callable that accepts a block number, a read size, and the
174 total file size of the URL target. The data argument should be
175 valid URL encoded data.
176
177 If a filename is passed and the URL points to a local resource,
178 the result is a copy from local file to new file.
179
180 Returns a tuple containing the path to the newly created
181 data file as well as the resulting HTTPMessage object.
182 """
183 url_type, path = splittype(url)
184
185 with contextlib.closing(urlopen(url, data)) as fp:
186 headers = fp.info()
187
188 # Just return the local path and the "headers" for file://
189 # URLs. No sense in performing a copy unless requested.
190 if url_type == "file" and not filename:
191 return os.path.normpath(path), headers
192
193 # Handle temporary file setup.
194 if filename:
195 tfp = open(filename, 'wb')
196 else:
197 tfp = tempfile.NamedTemporaryFile(delete=False)
198 filename = tfp.name
199 _url_tempfiles.append(filename)
200
201 with tfp:
202 result = filename, headers
203 bs = 1024*8
204 size = -1
205 read = 0
206 blocknum = 0
207 if "content-length" in headers:
208 size = int(headers["Content-Length"])
209
210 if reporthook:
211 reporthook(blocknum, 0, size)
212
213 while True:
214 block = fp.read(bs)
215 if not block:
216 break
217 read += len(block)
218 tfp.write(block)
219 blocknum += 1
220 if reporthook:
221 reporthook(blocknum, len(block), size)
222
223 if size >= 0 and read < size:
224 raise ContentTooShortError(
225 "retrieval incomplete: got only %i out of %i bytes"
226 % (read, size), result)
227
228 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000229
230def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700231 for temp_file in _url_tempfiles:
232 try:
233 os.unlink(temp_file)
234 except EnvironmentError:
235 pass
236
237 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000238 global _opener
239 if _opener:
240 _opener = None
241
242# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000243_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000244def request_host(request):
245 """Return request-host, as defined by RFC 2965.
246
247 Variation from RFC: returned value is lowercased, for convenient
248 comparison.
249
250 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000251 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000252 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000253 if host == "":
254 host = request.get_header("Host", "")
255
256 # remove port, if present
257 host = _cut_port_re.sub("", host, 1)
258 return host.lower()
259
260class Request:
261
262 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800263 origin_req_host=None, unverifiable=False,
264 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000265 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaran45ce4dc2012-07-08 02:08:48 -0700266 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800267 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000268 self.data = data
269 self.headers = {}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000270 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000271 for key, value in headers.items():
272 self.add_header(key, value)
273 self.unredirected_hdrs = {}
274 if origin_req_host is None:
275 origin_req_host = request_host(self)
276 self.origin_req_host = origin_req_host
277 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800278 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000279 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000280
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000281 def _parse(self):
282 self.type, rest = splittype(self.full_url)
283 if self.type is None:
284 raise ValueError("unknown url type: %s" % self.full_url)
285 self.host, self.selector = splithost(rest)
286 if self.host:
287 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000288
289 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800290 """Return a string indicating the HTTP request method."""
291 if self.method is not None:
292 return self.method
293 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 return "POST"
295 else:
296 return "GET"
297
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000298 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800299 if self.fragment:
300 return '%s#%s' % (self.full_url, self.fragment)
301 else:
302 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000303
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700304 # Begin deprecated methods
305
306 def add_data(self, data):
307 msg = "Request.add_data method is deprecated."
308 warnings.warn(msg, DeprecationWarning, stacklevel=1)
309 self.data = data
310
311 def has_data(self):
312 msg = "Request.has_data method is deprecated."
313 warnings.warn(msg, DeprecationWarning, stacklevel=1)
314 return self.data is not None
315
316 def get_data(self):
317 msg = "Request.get_data method is deprecated."
318 warnings.warn(msg, DeprecationWarning, stacklevel=1)
319 return self.data
320
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000321 def get_type(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700322 msg = "Request.get_type method is deprecated."
323 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000324 return self.type
325
326 def get_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700327 msg = "Request.get_host method is deprecated."
328 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000329 return self.host
330
331 def get_selector(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700332 msg = "Request.get_selector method is deprecated."
333 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000334 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000335
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000336 def is_unverifiable(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700337 msg = "Request.is_unverifiable method is deprecated."
338 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000339 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000340
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000341 def get_origin_req_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700342 msg = "Request.get_origin_req_host method is deprecated."
343 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000344 return self.origin_req_host
345
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000346 # End deprecated methods
347
348 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000349 if self.type == 'https' and not self._tunnel_host:
350 self._tunnel_host = self.host
351 else:
352 self.type= type
353 self.selector = self.full_url
354 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000355
356 def has_proxy(self):
357 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000358
359 def add_header(self, key, val):
360 # useful for something like authentication
361 self.headers[key.capitalize()] = val
362
363 def add_unredirected_header(self, key, val):
364 # will not be added to a redirected request
365 self.unredirected_hdrs[key.capitalize()] = val
366
367 def has_header(self, header_name):
368 return (header_name in self.headers or
369 header_name in self.unredirected_hdrs)
370
371 def get_header(self, header_name, default=None):
372 return self.headers.get(
373 header_name,
374 self.unredirected_hdrs.get(header_name, default))
375
376 def header_items(self):
377 hdrs = self.unredirected_hdrs.copy()
378 hdrs.update(self.headers)
379 return list(hdrs.items())
380
381class OpenerDirector:
382 def __init__(self):
383 client_version = "Python-urllib/%s" % __version__
384 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000385 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000386 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000387 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000388 self.handle_open = {}
389 self.handle_error = {}
390 self.process_response = {}
391 self.process_request = {}
392
393 def add_handler(self, handler):
394 if not hasattr(handler, "add_parent"):
395 raise TypeError("expected BaseHandler instance, got %r" %
396 type(handler))
397
398 added = False
399 for meth in dir(handler):
400 if meth in ["redirect_request", "do_open", "proxy_open"]:
401 # oops, coincidental match
402 continue
403
404 i = meth.find("_")
405 protocol = meth[:i]
406 condition = meth[i+1:]
407
408 if condition.startswith("error"):
409 j = condition.find("_") + i + 1
410 kind = meth[j+1:]
411 try:
412 kind = int(kind)
413 except ValueError:
414 pass
415 lookup = self.handle_error.get(protocol, {})
416 self.handle_error[protocol] = lookup
417 elif condition == "open":
418 kind = protocol
419 lookup = self.handle_open
420 elif condition == "response":
421 kind = protocol
422 lookup = self.process_response
423 elif condition == "request":
424 kind = protocol
425 lookup = self.process_request
426 else:
427 continue
428
429 handlers = lookup.setdefault(kind, [])
430 if handlers:
431 bisect.insort(handlers, handler)
432 else:
433 handlers.append(handler)
434 added = True
435
436 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000437 bisect.insort(self.handlers, handler)
438 handler.add_parent(self)
439
440 def close(self):
441 # Only exists for backwards compatibility.
442 pass
443
444 def _call_chain(self, chain, kind, meth_name, *args):
445 # Handlers raise an exception if no one else should try to handle
446 # the request, or return None if they can't but another handler
447 # could. Otherwise, they return the response.
448 handlers = chain.get(kind, ())
449 for handler in handlers:
450 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000451 result = func(*args)
452 if result is not None:
453 return result
454
455 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
456 # accept a URL or a Request object
457 if isinstance(fullurl, str):
458 req = Request(fullurl, data)
459 else:
460 req = fullurl
461 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000462 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000463
464 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000465 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000466
467 # pre-process request
468 meth_name = protocol+"_request"
469 for processor in self.process_request.get(protocol, []):
470 meth = getattr(processor, meth_name)
471 req = meth(req)
472
473 response = self._open(req, data)
474
475 # post-process response
476 meth_name = protocol+"_response"
477 for processor in self.process_response.get(protocol, []):
478 meth = getattr(processor, meth_name)
479 response = meth(req, response)
480
481 return response
482
483 def _open(self, req, data=None):
484 result = self._call_chain(self.handle_open, 'default',
485 'default_open', req)
486 if result:
487 return result
488
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000489 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000490 result = self._call_chain(self.handle_open, protocol, protocol +
491 '_open', req)
492 if result:
493 return result
494
495 return self._call_chain(self.handle_open, 'unknown',
496 'unknown_open', req)
497
498 def error(self, proto, *args):
499 if proto in ('http', 'https'):
500 # XXX http[s] protocols are special-cased
501 dict = self.handle_error['http'] # https is not different than http
502 proto = args[2] # YUCK!
503 meth_name = 'http_error_%s' % proto
504 http_err = 1
505 orig_args = args
506 else:
507 dict = self.handle_error
508 meth_name = proto + '_error'
509 http_err = 0
510 args = (dict, proto, meth_name) + args
511 result = self._call_chain(*args)
512 if result:
513 return result
514
515 if http_err:
516 args = (dict, 'default', 'http_error_default') + orig_args
517 return self._call_chain(*args)
518
519# XXX probably also want an abstract factory that knows when it makes
520# sense to skip a superclass in favor of a subclass and when it might
521# make sense to include both
522
523def build_opener(*handlers):
524 """Create an opener object from a list of handlers.
525
526 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000527 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000528
529 If any of the handlers passed as arguments are subclasses of the
530 default handlers, the default handlers will not be used.
531 """
532 def isclass(obj):
533 return isinstance(obj, type) or hasattr(obj, "__bases__")
534
535 opener = OpenerDirector()
536 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
537 HTTPDefaultErrorHandler, HTTPRedirectHandler,
538 FTPHandler, FileHandler, HTTPErrorProcessor]
539 if hasattr(http.client, "HTTPSConnection"):
540 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000541 skip = set()
542 for klass in default_classes:
543 for check in handlers:
544 if isclass(check):
545 if issubclass(check, klass):
546 skip.add(klass)
547 elif isinstance(check, klass):
548 skip.add(klass)
549 for klass in skip:
550 default_classes.remove(klass)
551
552 for klass in default_classes:
553 opener.add_handler(klass())
554
555 for h in handlers:
556 if isclass(h):
557 h = h()
558 opener.add_handler(h)
559 return opener
560
561class BaseHandler:
562 handler_order = 500
563
564 def add_parent(self, parent):
565 self.parent = parent
566
567 def close(self):
568 # Only exists for backwards compatibility
569 pass
570
571 def __lt__(self, other):
572 if not hasattr(other, "handler_order"):
573 # Try to preserve the old behavior of having custom classes
574 # inserted after default ones (works only for custom user
575 # classes which are not aware of handler_order).
576 return True
577 return self.handler_order < other.handler_order
578
579
580class HTTPErrorProcessor(BaseHandler):
581 """Process HTTP error responses."""
582 handler_order = 1000 # after all other processing
583
584 def http_response(self, request, response):
585 code, msg, hdrs = response.code, response.msg, response.info()
586
587 # According to RFC 2616, "2xx" code indicates that the client's
588 # request was successfully received, understood, and accepted.
589 if not (200 <= code < 300):
590 response = self.parent.error(
591 'http', request, response, code, msg, hdrs)
592
593 return response
594
595 https_response = http_response
596
597class HTTPDefaultErrorHandler(BaseHandler):
598 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000599 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600
601class HTTPRedirectHandler(BaseHandler):
602 # maximum number of redirections to any single URL
603 # this is needed because of the state that cookies introduce
604 max_repeats = 4
605 # maximum total number of redirections (regardless of URL) before
606 # assuming we're in a loop
607 max_redirections = 10
608
609 def redirect_request(self, req, fp, code, msg, headers, newurl):
610 """Return a Request or None in response to a redirect.
611
612 This is called by the http_error_30x methods when a
613 redirection response is received. If a redirection should
614 take place, return a new Request to allow http_error_30x to
615 perform the redirect. Otherwise, raise HTTPError if no-one
616 else should try to handle this url. Return None if you can't
617 but another Handler might.
618 """
619 m = req.get_method()
620 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
621 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000622 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000623
624 # Strictly (according to RFC 2616), 301 or 302 in response to
625 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000626 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000627 # essentially all clients do redirect in this case, so we do
628 # the same.
629 # be conciliant with URIs containing a space
630 newurl = newurl.replace(' ', '%20')
631 CONTENT_HEADERS = ("content-length", "content-type")
632 newheaders = dict((k, v) for k, v in req.headers.items()
633 if k.lower() not in CONTENT_HEADERS)
634 return Request(newurl,
635 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000636 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000637 unverifiable=True)
638
639 # Implementation note: To avoid the server sending us into an
640 # infinite loop, the request object needs to track what URLs we
641 # have already seen. Do this by adding a handler-specific
642 # attribute to the Request object.
643 def http_error_302(self, req, fp, code, msg, headers):
644 # Some servers (incorrectly) return multiple Location headers
645 # (so probably same goes for URI). Use first header.
646 if "location" in headers:
647 newurl = headers["location"]
648 elif "uri" in headers:
649 newurl = headers["uri"]
650 else:
651 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000652
653 # fix a possible malformed URL
654 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700655
656 # For security reasons we don't allow redirection to anything other
657 # than http, https or ftp.
658
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800659 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800660 raise HTTPError(
661 newurl, code,
662 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
663 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700664
Facundo Batistaf24802c2008-08-17 03:36:03 +0000665 if not urlparts.path:
666 urlparts = list(urlparts)
667 urlparts[2] = "/"
668 newurl = urlunparse(urlparts)
669
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000670 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000671
672 # XXX Probably want to forget about the state of the current
673 # request, although that might interact poorly with other
674 # handlers that also use handler-specific request attributes
675 new = self.redirect_request(req, fp, code, msg, headers, newurl)
676 if new is None:
677 return
678
679 # loop detection
680 # .redirect_dict has a key url if url was previously visited.
681 if hasattr(req, 'redirect_dict'):
682 visited = new.redirect_dict = req.redirect_dict
683 if (visited.get(newurl, 0) >= self.max_repeats or
684 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000685 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000686 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000687 else:
688 visited = new.redirect_dict = req.redirect_dict = {}
689 visited[newurl] = visited.get(newurl, 0) + 1
690
691 # Don't close the fp until we are sure that we won't use it
692 # with HTTPError.
693 fp.read()
694 fp.close()
695
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000696 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000697
698 http_error_301 = http_error_303 = http_error_307 = http_error_302
699
700 inf_msg = "The HTTP server returned a redirect error that would " \
701 "lead to an infinite loop.\n" \
702 "The last 30x error message was:\n"
703
704
705def _parse_proxy(proxy):
706 """Return (scheme, user, password, host/port) given a URL or an authority.
707
708 If a URL is supplied, it must have an authority (host:port) component.
709 According to RFC 3986, having an authority component means the URL must
710 have two slashes after the scheme:
711
712 >>> _parse_proxy('file:/ftp.example.com/')
713 Traceback (most recent call last):
714 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
715
716 The first three items of the returned tuple may be None.
717
718 Examples of authority parsing:
719
720 >>> _parse_proxy('proxy.example.com')
721 (None, None, None, 'proxy.example.com')
722 >>> _parse_proxy('proxy.example.com:3128')
723 (None, None, None, 'proxy.example.com:3128')
724
725 The authority component may optionally include userinfo (assumed to be
726 username:password):
727
728 >>> _parse_proxy('joe:password@proxy.example.com')
729 (None, 'joe', 'password', 'proxy.example.com')
730 >>> _parse_proxy('joe:password@proxy.example.com:3128')
731 (None, 'joe', 'password', 'proxy.example.com:3128')
732
733 Same examples, but with URLs instead:
734
735 >>> _parse_proxy('http://proxy.example.com/')
736 ('http', None, None, 'proxy.example.com')
737 >>> _parse_proxy('http://proxy.example.com:3128/')
738 ('http', None, None, 'proxy.example.com:3128')
739 >>> _parse_proxy('http://joe:password@proxy.example.com/')
740 ('http', 'joe', 'password', 'proxy.example.com')
741 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
742 ('http', 'joe', 'password', 'proxy.example.com:3128')
743
744 Everything after the authority is ignored:
745
746 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
747 ('ftp', 'joe', 'password', 'proxy.example.com')
748
749 Test for no trailing '/' case:
750
751 >>> _parse_proxy('http://joe:password@proxy.example.com')
752 ('http', 'joe', 'password', 'proxy.example.com')
753
754 """
Georg Brandl13e89462008-07-01 19:56:00 +0000755 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000756 if not r_scheme.startswith("/"):
757 # authority
758 scheme = None
759 authority = proxy
760 else:
761 # URL
762 if not r_scheme.startswith("//"):
763 raise ValueError("proxy URL with no authority: %r" % proxy)
764 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
765 # and 3.3.), path is empty or starts with '/'
766 end = r_scheme.find("/", 2)
767 if end == -1:
768 end = None
769 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000770 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000771 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000772 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000773 else:
774 user = password = None
775 return scheme, user, password, hostport
776
777class ProxyHandler(BaseHandler):
778 # Proxies must be in front
779 handler_order = 100
780
781 def __init__(self, proxies=None):
782 if proxies is None:
783 proxies = getproxies()
784 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
785 self.proxies = proxies
786 for type, url in proxies.items():
787 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200788 lambda r, proxy=url, type=type, meth=self.proxy_open:
789 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000790
791 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000792 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000793 proxy_type, user, password, hostport = _parse_proxy(proxy)
794 if proxy_type is None:
795 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000796
797 if req.host and proxy_bypass(req.host):
798 return None
799
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000800 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000801 user_pass = '%s:%s' % (unquote(user),
802 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 creds = base64.b64encode(user_pass.encode()).decode("ascii")
804 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000805 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000806 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000807 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000808 # let other handlers take care of it
809 return None
810 else:
811 # need to start over, because the other handlers don't
812 # grok the proxy's URL type
813 # e.g. if we have a constructor arg proxies like so:
814 # {'http': 'ftp://proxy.example.com'}, we may end up turning
815 # a request for http://acme.example.com/a into one for
816 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000817 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818
819class HTTPPasswordMgr:
820
821 def __init__(self):
822 self.passwd = {}
823
824 def add_password(self, realm, uri, user, passwd):
825 # uri could be a single URI or a sequence
826 if isinstance(uri, str):
827 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800828 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000829 self.passwd[realm] = {}
830 for default_port in True, False:
831 reduced_uri = tuple(
832 [self.reduce_uri(u, default_port) for u in uri])
833 self.passwd[realm][reduced_uri] = (user, passwd)
834
835 def find_user_password(self, realm, authuri):
836 domains = self.passwd.get(realm, {})
837 for default_port in True, False:
838 reduced_authuri = self.reduce_uri(authuri, default_port)
839 for uris, authinfo in domains.items():
840 for uri in uris:
841 if self.is_suburi(uri, reduced_authuri):
842 return authinfo
843 return None, None
844
845 def reduce_uri(self, uri, default_port=True):
846 """Accept authority or URI and extract only the authority and path."""
847 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000848 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000849 if parts[1]:
850 # URI
851 scheme = parts[0]
852 authority = parts[1]
853 path = parts[2] or '/'
854 else:
855 # host or host:port
856 scheme = None
857 authority = uri
858 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000859 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000860 if default_port and port is None and scheme is not None:
861 dport = {"http": 80,
862 "https": 443,
863 }.get(scheme)
864 if dport is not None:
865 authority = "%s:%d" % (host, dport)
866 return authority, path
867
868 def is_suburi(self, base, test):
869 """Check if test is below base in a URI tree
870
871 Both args must be URIs in reduced form.
872 """
873 if base == test:
874 return True
875 if base[0] != test[0]:
876 return False
877 common = posixpath.commonprefix((base[1], test[1]))
878 if len(common) == len(base[1]):
879 return True
880 return False
881
882
883class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
884
885 def find_user_password(self, realm, authuri):
886 user, password = HTTPPasswordMgr.find_user_password(self, realm,
887 authuri)
888 if user is not None:
889 return user, password
890 return HTTPPasswordMgr.find_user_password(self, None, authuri)
891
892
893class AbstractBasicAuthHandler:
894
895 # XXX this allows for multiple auth-schemes, but will stupidly pick
896 # the last one with a realm specified.
897
898 # allow for double- and single-quoted realm values
899 # (single quotes are a violation of the RFC, but appear in the wild)
900 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800901 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000902
903 # XXX could pre-emptively send auth info already accepted (RFC 2617,
904 # end of section 2, and section 1.2 immediately after "credentials"
905 # production).
906
907 def __init__(self, password_mgr=None):
908 if password_mgr is None:
909 password_mgr = HTTPPasswordMgr()
910 self.passwd = password_mgr
911 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000912 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000913
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000914 def reset_retry_count(self):
915 self.retried = 0
916
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000917 def http_error_auth_reqed(self, authreq, host, req, headers):
918 # host may be an authority (without userinfo) or a URL with an
919 # authority
920 # XXX could be multiple headers
921 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000922
923 if self.retried > 5:
924 # retry sending the username:password 5 times before failing.
925 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
926 headers, None)
927 else:
928 self.retried += 1
929
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000930 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800931 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800932 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800933 raise ValueError("AbstractBasicAuthHandler does not"
934 " support the following scheme: '%s'" %
935 scheme)
936 else:
937 mo = AbstractBasicAuthHandler.rx.search(authreq)
938 if mo:
939 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800940 if quote not in ['"',"'"]:
941 warnings.warn("Basic Auth Realm was unquoted",
942 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800943 if scheme.lower() == 'basic':
944 response = self.retry_http_basic_auth(host, req, realm)
945 if response and response.code != 401:
946 self.retried = 0
947 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000948
949 def retry_http_basic_auth(self, host, req, realm):
950 user, pw = self.passwd.find_user_password(realm, host)
951 if pw is not None:
952 raw = "%s:%s" % (user, pw)
953 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
954 if req.headers.get(self.auth_header, None) == auth:
955 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000956 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000957 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000958 else:
959 return None
960
961
962class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
963
964 auth_header = 'Authorization'
965
966 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000967 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000968 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000969 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000970 self.reset_retry_count()
971 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000972
973
974class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
975
976 auth_header = 'Proxy-authorization'
977
978 def http_error_407(self, req, fp, code, msg, headers):
979 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000980 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
982 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000983 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000984 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000985 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000986 self.reset_retry_count()
987 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000988
989
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800990# Return n random bytes.
991_randombytes = os.urandom
992
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000993
994class AbstractDigestAuthHandler:
995 # Digest authentication is specified in RFC 2617.
996
997 # XXX The client does not inspect the Authentication-Info header
998 # in a successful response.
999
1000 # XXX It should be possible to test this implementation against
1001 # a mock server that just generates a static set of challenges.
1002
1003 # XXX qop="auth-int" supports is shaky
1004
1005 def __init__(self, passwd=None):
1006 if passwd is None:
1007 passwd = HTTPPasswordMgr()
1008 self.passwd = passwd
1009 self.add_password = self.passwd.add_password
1010 self.retried = 0
1011 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001012 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001013
1014 def reset_retry_count(self):
1015 self.retried = 0
1016
1017 def http_error_auth_reqed(self, auth_header, host, req, headers):
1018 authreq = headers.get(auth_header, None)
1019 if self.retried > 5:
1020 # Don't fail endlessly - if we failed once, we'll probably
1021 # fail a second time. Hm. Unless the Password Manager is
1022 # prompting for the information. Crap. This isn't great
1023 # but it's better than the current 'repeat until recursion
1024 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001025 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001026 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001027 else:
1028 self.retried += 1
1029 if authreq:
1030 scheme = authreq.split()[0]
1031 if scheme.lower() == 'digest':
1032 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001033 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001034 raise ValueError("AbstractDigestAuthHandler does not support"
1035 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001036
1037 def retry_http_digest_auth(self, req, auth):
1038 token, challenge = auth.split(' ', 1)
1039 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1040 auth = self.get_authorization(req, chal)
1041 if auth:
1042 auth_val = 'Digest %s' % auth
1043 if req.headers.get(self.auth_header, None) == auth_val:
1044 return None
1045 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001046 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001047 return resp
1048
1049 def get_cnonce(self, nonce):
1050 # The cnonce-value is an opaque
1051 # quoted string value provided by the client and used by both client
1052 # and server to avoid chosen plaintext attacks, to provide mutual
1053 # authentication, and to provide some message integrity protection.
1054 # This isn't a fabulous effort, but it's probably Good Enough.
1055 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001056 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001057 dig = hashlib.sha1(b).hexdigest()
1058 return dig[:16]
1059
1060 def get_authorization(self, req, chal):
1061 try:
1062 realm = chal['realm']
1063 nonce = chal['nonce']
1064 qop = chal.get('qop')
1065 algorithm = chal.get('algorithm', 'MD5')
1066 # mod_digest doesn't send an opaque, even though it isn't
1067 # supposed to be optional
1068 opaque = chal.get('opaque', None)
1069 except KeyError:
1070 return None
1071
1072 H, KD = self.get_algorithm_impls(algorithm)
1073 if H is None:
1074 return None
1075
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001076 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001077 if user is None:
1078 return None
1079
1080 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001081 if req.data is not None:
1082 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001083 else:
1084 entdig = None
1085
1086 A1 = "%s:%s:%s" % (user, realm, pw)
1087 A2 = "%s:%s" % (req.get_method(),
1088 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001089 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001091 if nonce == self.last_nonce:
1092 self.nonce_count += 1
1093 else:
1094 self.nonce_count = 1
1095 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 ncvalue = '%08x' % self.nonce_count
1097 cnonce = self.get_cnonce(nonce)
1098 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1099 respdig = KD(H(A1), noncebit)
1100 elif qop is None:
1101 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1102 else:
1103 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001104 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001105
1106 # XXX should the partial digests be encoded too?
1107
1108 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001109 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001110 respdig)
1111 if opaque:
1112 base += ', opaque="%s"' % opaque
1113 if entdig:
1114 base += ', digest="%s"' % entdig
1115 base += ', algorithm="%s"' % algorithm
1116 if qop:
1117 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1118 return base
1119
1120 def get_algorithm_impls(self, algorithm):
1121 # lambdas assume digest modules are imported at the top level
1122 if algorithm == 'MD5':
1123 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1124 elif algorithm == 'SHA':
1125 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1126 # XXX MD5-sess
1127 KD = lambda s, d: H("%s:%s" % (s, d))
1128 return H, KD
1129
1130 def get_entity_digest(self, data, chal):
1131 # XXX not implemented yet
1132 return None
1133
1134
1135class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1136 """An authentication protocol defined by RFC 2069
1137
1138 Digest authentication improves on basic authentication because it
1139 does not transmit passwords in the clear.
1140 """
1141
1142 auth_header = 'Authorization'
1143 handler_order = 490 # before Basic auth
1144
1145 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001146 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001147 retry = self.http_error_auth_reqed('www-authenticate',
1148 host, req, headers)
1149 self.reset_retry_count()
1150 return retry
1151
1152
1153class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1154
1155 auth_header = 'Proxy-Authorization'
1156 handler_order = 490 # before Basic auth
1157
1158 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001159 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 retry = self.http_error_auth_reqed('proxy-authenticate',
1161 host, req, headers)
1162 self.reset_retry_count()
1163 return retry
1164
1165class AbstractHTTPHandler(BaseHandler):
1166
1167 def __init__(self, debuglevel=0):
1168 self._debuglevel = debuglevel
1169
1170 def set_http_debuglevel(self, level):
1171 self._debuglevel = level
1172
1173 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001174 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001175 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001176 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001177
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001178 if request.data is not None: # POST
1179 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001180 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001181 msg = "POST data should be bytes or an iterable of bytes. " \
1182 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001183 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001184 if not request.has_header('Content-type'):
1185 request.add_unredirected_header(
1186 'Content-type',
1187 'application/x-www-form-urlencoded')
1188 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001189 try:
1190 mv = memoryview(data)
1191 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001192 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001193 raise ValueError("Content-Length should be specified "
1194 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001195 data))
1196 else:
1197 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001198 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001199
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001200 sel_host = host
1201 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001202 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001203 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001204 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001205 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001206 for name, value in self.parent.addheaders:
1207 name = name.capitalize()
1208 if not request.has_header(name):
1209 request.add_unredirected_header(name, value)
1210
1211 return request
1212
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001213 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001214 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215
1216 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001218 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001219 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001220 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001221
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001222 # will parse host:port
1223 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001224
1225 headers = dict(req.unredirected_hdrs)
1226 headers.update(dict((k, v) for k, v in req.headers.items()
1227 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001228
1229 # TODO(jhylton): Should this be redesigned to handle
1230 # persistent connections?
1231
1232 # We want to make an HTTP/1.1 request, but the addinfourl
1233 # class isn't prepared to deal with a persistent connection.
1234 # It will try to read all remaining data from the socket,
1235 # which will block while the server waits for the next request.
1236 # So make sure the connection gets closed after the (only)
1237 # request.
1238 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001239 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001240
1241 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001242 tunnel_headers = {}
1243 proxy_auth_hdr = "Proxy-Authorization"
1244 if proxy_auth_hdr in headers:
1245 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1246 # Proxy-Authorization should not be sent to origin
1247 # server.
1248 del headers[proxy_auth_hdr]
1249 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001250
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001251 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001252 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001253 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001254 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001255 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001256 else:
1257 r = h.getresponse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001258
Senthil Kumaran26430412011-04-13 07:01:19 +08001259 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001260 # This line replaces the .msg attribute of the HTTPResponse
1261 # with .headers, because urllib clients expect the response to
1262 # have the reason in .msg. It would be good to mark this
1263 # attribute is deprecated and get then to use info() or
1264 # .headers.
1265 r.msg = r.reason
1266 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001267
1268
1269class HTTPHandler(AbstractHTTPHandler):
1270
1271 def http_open(self, req):
1272 return self.do_open(http.client.HTTPConnection, req)
1273
1274 http_request = AbstractHTTPHandler.do_request_
1275
1276if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001277
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001278 class HTTPSHandler(AbstractHTTPHandler):
1279
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001280 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1281 AbstractHTTPHandler.__init__(self, debuglevel)
1282 self._context = context
1283 self._check_hostname = check_hostname
1284
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001285 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001286 return self.do_open(http.client.HTTPSConnection, req,
1287 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001288
1289 https_request = AbstractHTTPHandler.do_request_
1290
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001291 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001292
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001293class HTTPCookieProcessor(BaseHandler):
1294 def __init__(self, cookiejar=None):
1295 import http.cookiejar
1296 if cookiejar is None:
1297 cookiejar = http.cookiejar.CookieJar()
1298 self.cookiejar = cookiejar
1299
1300 def http_request(self, request):
1301 self.cookiejar.add_cookie_header(request)
1302 return request
1303
1304 def http_response(self, request, response):
1305 self.cookiejar.extract_cookies(response, request)
1306 return response
1307
1308 https_request = http_request
1309 https_response = http_response
1310
1311class UnknownHandler(BaseHandler):
1312 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001313 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001314 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001315
1316def parse_keqv_list(l):
1317 """Parse list of key=value strings where keys are not duplicated."""
1318 parsed = {}
1319 for elt in l:
1320 k, v = elt.split('=', 1)
1321 if v[0] == '"' and v[-1] == '"':
1322 v = v[1:-1]
1323 parsed[k] = v
1324 return parsed
1325
1326def parse_http_list(s):
1327 """Parse lists as described by RFC 2068 Section 2.
1328
1329 In particular, parse comma-separated lists where the elements of
1330 the list may include quoted-strings. A quoted-string could
1331 contain a comma. A non-quoted string could have quotes in the
1332 middle. Neither commas nor quotes count if they are escaped.
1333 Only double-quotes count, not single-quotes.
1334 """
1335 res = []
1336 part = ''
1337
1338 escape = quote = False
1339 for cur in s:
1340 if escape:
1341 part += cur
1342 escape = False
1343 continue
1344 if quote:
1345 if cur == '\\':
1346 escape = True
1347 continue
1348 elif cur == '"':
1349 quote = False
1350 part += cur
1351 continue
1352
1353 if cur == ',':
1354 res.append(part)
1355 part = ''
1356 continue
1357
1358 if cur == '"':
1359 quote = True
1360
1361 part += cur
1362
1363 # append last part
1364 if part:
1365 res.append(part)
1366
1367 return [part.strip() for part in res]
1368
1369class FileHandler(BaseHandler):
1370 # Use local file or FTP depending on form of URL
1371 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001372 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001373 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1374 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001375 if not req.host is self.get_names():
1376 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001377 else:
1378 return self.open_local_file(req)
1379
1380 # names for the localhost
1381 names = None
1382 def get_names(self):
1383 if FileHandler.names is None:
1384 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001385 FileHandler.names = tuple(
1386 socket.gethostbyname_ex('localhost')[2] +
1387 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001388 except socket.gaierror:
1389 FileHandler.names = (socket.gethostbyname('localhost'),)
1390 return FileHandler.names
1391
1392 # not entirely sure what the rules are here
1393 def open_local_file(self, req):
1394 import email.utils
1395 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001396 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001397 filename = req.selector
1398 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001399 try:
1400 stats = os.stat(localfile)
1401 size = stats.st_size
1402 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001403 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001404 headers = email.message_from_string(
1405 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1406 (mtype or 'text/plain', size, modified))
1407 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001408 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001409 if not host or \
1410 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001411 if host:
1412 origurl = 'file://' + host + filename
1413 else:
1414 origurl = 'file://' + filename
1415 return addinfourl(open(localfile, 'rb'), headers, origurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001416 except OSError as msg:
Georg Brandl029986a2008-06-23 11:44:14 +00001417 # users shouldn't expect OSErrors coming from urlopen()
Georg Brandl13e89462008-07-01 19:56:00 +00001418 raise URLError(msg)
1419 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001420
1421def _safe_gethostbyname(host):
1422 try:
1423 return socket.gethostbyname(host)
1424 except socket.gaierror:
1425 return None
1426
1427class FTPHandler(BaseHandler):
1428 def ftp_open(self, req):
1429 import ftplib
1430 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001431 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001432 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001433 raise URLError('ftp error: no host given')
1434 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001435 if port is None:
1436 port = ftplib.FTP_PORT
1437 else:
1438 port = int(port)
1439
1440 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001441 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001442 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001443 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001444 else:
1445 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001446 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001447 user = user or ''
1448 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001449
1450 try:
1451 host = socket.gethostbyname(host)
1452 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001453 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001454 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001455 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001456 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001457 dirs, file = dirs[:-1], dirs[-1]
1458 if dirs and not dirs[0]:
1459 dirs = dirs[1:]
1460 try:
1461 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1462 type = file and 'I' or 'D'
1463 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001464 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001465 if attr.lower() == 'type' and \
1466 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1467 type = value.upper()
1468 fp, retrlen = fw.retrfile(file, type)
1469 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001470 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001471 if mtype:
1472 headers += "Content-type: %s\n" % mtype
1473 if retrlen is not None and retrlen >= 0:
1474 headers += "Content-length: %d\n" % retrlen
1475 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001476 return addinfourl(fp, headers, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001477 except ftplib.all_errors as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001478 exc = URLError('ftp error: %s' % msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001479 raise exc.with_traceback(sys.exc_info()[2])
1480
1481 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001482 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1483 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001484
1485class CacheFTPHandler(FTPHandler):
1486 # XXX would be nice to have pluggable cache strategies
1487 # XXX this stuff is definitely not thread safe
1488 def __init__(self):
1489 self.cache = {}
1490 self.timeout = {}
1491 self.soonest = 0
1492 self.delay = 60
1493 self.max_conns = 16
1494
1495 def setTimeout(self, t):
1496 self.delay = t
1497
1498 def setMaxConns(self, m):
1499 self.max_conns = m
1500
1501 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1502 key = user, host, port, '/'.join(dirs), timeout
1503 if key in self.cache:
1504 self.timeout[key] = time.time() + self.delay
1505 else:
1506 self.cache[key] = ftpwrapper(user, passwd, host, port,
1507 dirs, timeout)
1508 self.timeout[key] = time.time() + self.delay
1509 self.check_cache()
1510 return self.cache[key]
1511
1512 def check_cache(self):
1513 # first check for old ones
1514 t = time.time()
1515 if self.soonest <= t:
1516 for k, v in list(self.timeout.items()):
1517 if v < t:
1518 self.cache[k].close()
1519 del self.cache[k]
1520 del self.timeout[k]
1521 self.soonest = min(list(self.timeout.values()))
1522
1523 # then check the size
1524 if len(self.cache) == self.max_conns:
1525 for k, v in list(self.timeout.items()):
1526 if v == self.soonest:
1527 del self.cache[k]
1528 del self.timeout[k]
1529 break
1530 self.soonest = min(list(self.timeout.values()))
1531
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001532 def clear_cache(self):
1533 for conn in self.cache.values():
1534 conn.close()
1535 self.cache.clear()
1536 self.timeout.clear()
1537
1538
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001539# Code move from the old urllib module
1540
1541MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1542
1543# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001544if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001545 from nturl2path import url2pathname, pathname2url
1546else:
1547 def url2pathname(pathname):
1548 """OS-specific conversion from a relative URL of the 'file' scheme
1549 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001550 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001551
1552 def pathname2url(pathname):
1553 """OS-specific conversion from a file system path to a relative URL
1554 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001555 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001556
1557# This really consists of two pieces:
1558# (1) a class which handles opening of all sorts of URLs
1559# (plus assorted utilities etc.)
1560# (2) a set of functions for parsing URLs
1561# XXX Should these be separated out into different modules?
1562
1563
1564ftpcache = {}
1565class URLopener:
1566 """Class to open URLs.
1567 This is a class rather than just a subroutine because we may need
1568 more than one set of global protocol-specific options.
1569 Note -- this is a base class for those who don't want the
1570 automatic handling of errors type 302 (relocated) and 401
1571 (authorization needed)."""
1572
1573 __tempfiles = None
1574
1575 version = "Python-urllib/%s" % __version__
1576
1577 # Constructor
1578 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001579 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001580 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1581 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001582 if proxies is None:
1583 proxies = getproxies()
1584 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1585 self.proxies = proxies
1586 self.key_file = x509.get('key_file')
1587 self.cert_file = x509.get('cert_file')
1588 self.addheaders = [('User-Agent', self.version)]
1589 self.__tempfiles = []
1590 self.__unlink = os.unlink # See cleanup()
1591 self.tempcache = None
1592 # Undocumented feature: if you assign {} to tempcache,
1593 # it is used to cache files retrieved with
1594 # self.retrieve(). This is not enabled by default
1595 # since it does not work for changing documents (and I
1596 # haven't got the logic to check expiration headers
1597 # yet).
1598 self.ftpcache = ftpcache
1599 # Undocumented feature: you can use a different
1600 # ftp cache by assigning to the .ftpcache member;
1601 # in case you want logically independent URL openers
1602 # XXX This is not threadsafe. Bah.
1603
1604 def __del__(self):
1605 self.close()
1606
1607 def close(self):
1608 self.cleanup()
1609
1610 def cleanup(self):
1611 # This code sometimes runs when the rest of this module
1612 # has already been deleted, so it can't use any globals
1613 # or import anything.
1614 if self.__tempfiles:
1615 for file in self.__tempfiles:
1616 try:
1617 self.__unlink(file)
1618 except OSError:
1619 pass
1620 del self.__tempfiles[:]
1621 if self.tempcache:
1622 self.tempcache.clear()
1623
1624 def addheader(self, *args):
1625 """Add a header to be used by the HTTP interface only
1626 e.g. u.addheader('Accept', 'sound/basic')"""
1627 self.addheaders.append(args)
1628
1629 # External interface
1630 def open(self, fullurl, data=None):
1631 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001632 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001633 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001634 if self.tempcache and fullurl in self.tempcache:
1635 filename, headers = self.tempcache[fullurl]
1636 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001637 return addinfourl(fp, headers, fullurl)
1638 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001639 if not urltype:
1640 urltype = 'file'
1641 if urltype in self.proxies:
1642 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001643 urltype, proxyhost = splittype(proxy)
1644 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001645 url = (host, fullurl) # Signal special case to open_*()
1646 else:
1647 proxy = None
1648 name = 'open_' + urltype
1649 self.type = urltype
1650 name = name.replace('-', '_')
1651 if not hasattr(self, name):
1652 if proxy:
1653 return self.open_unknown_proxy(proxy, fullurl, data)
1654 else:
1655 return self.open_unknown(fullurl, data)
1656 try:
1657 if data is None:
1658 return getattr(self, name)(url)
1659 else:
1660 return getattr(self, name)(url, data)
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001661 except HTTPError:
1662 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001663 except socket.error as msg:
1664 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1665
1666 def open_unknown(self, fullurl, data=None):
1667 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001668 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001669 raise IOError('url error', 'unknown url type', type)
1670
1671 def open_unknown_proxy(self, proxy, fullurl, data=None):
1672 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001673 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001674 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1675
1676 # External interface
1677 def retrieve(self, url, filename=None, reporthook=None, data=None):
1678 """retrieve(url) returns (filename, headers) for a local object
1679 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001680 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001681 if self.tempcache and url in self.tempcache:
1682 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001683 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001684 if filename is None and (not type or type == 'file'):
1685 try:
1686 fp = self.open_local_file(url1)
1687 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001688 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001689 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001690 except IOError as msg:
1691 pass
1692 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001693 try:
1694 headers = fp.info()
1695 if filename:
1696 tfp = open(filename, 'wb')
1697 else:
1698 import tempfile
1699 garbage, path = splittype(url)
1700 garbage, path = splithost(path or "")
1701 path, garbage = splitquery(path or "")
1702 path, garbage = splitattr(path or "")
1703 suffix = os.path.splitext(path)[1]
1704 (fd, filename) = tempfile.mkstemp(suffix)
1705 self.__tempfiles.append(filename)
1706 tfp = os.fdopen(fd, 'wb')
1707 try:
1708 result = filename, headers
1709 if self.tempcache is not None:
1710 self.tempcache[url] = result
1711 bs = 1024*8
1712 size = -1
1713 read = 0
1714 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001715 if "content-length" in headers:
1716 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001717 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001718 reporthook(blocknum, bs, size)
1719 while 1:
1720 block = fp.read(bs)
1721 if not block:
1722 break
1723 read += len(block)
1724 tfp.write(block)
1725 blocknum += 1
1726 if reporthook:
1727 reporthook(blocknum, bs, size)
1728 finally:
1729 tfp.close()
1730 finally:
1731 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001732
1733 # raise exception if actual size does not match content-length header
1734 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001735 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001736 "retrieval incomplete: got only %i out of %i bytes"
1737 % (read, size), result)
1738
1739 return result
1740
1741 # Each method named open_<type> knows how to open that type of URL
1742
1743 def _open_generic_http(self, connection_factory, url, data):
1744 """Make an HTTP connection using connection_class.
1745
1746 This is an internal method that should be called from
1747 open_http() or open_https().
1748
1749 Arguments:
1750 - connection_factory should take a host name and return an
1751 HTTPConnection instance.
1752 - url is the url to retrieval or a host, relative-path pair.
1753 - data is payload for a POST request or None.
1754 """
1755
1756 user_passwd = None
1757 proxy_passwd= None
1758 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001759 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001760 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001761 user_passwd, host = splituser(host)
1762 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001763 realhost = host
1764 else:
1765 host, selector = url
1766 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001767 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001768 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001769 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001770 url = rest
1771 user_passwd = None
1772 if urltype.lower() != 'http':
1773 realhost = None
1774 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001775 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001776 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001777 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001778 if user_passwd:
1779 selector = "%s://%s%s" % (urltype, realhost, rest)
1780 if proxy_bypass(realhost):
1781 host = realhost
1782
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001783 if not host: raise IOError('http error', 'no host given')
1784
1785 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001786 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001787 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001788 else:
1789 proxy_auth = None
1790
1791 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001792 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001793 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001794 else:
1795 auth = None
1796 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001797 headers = {}
1798 if proxy_auth:
1799 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1800 if auth:
1801 headers["Authorization"] = "Basic %s" % auth
1802 if realhost:
1803 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001804
1805 # Add Connection:close as we don't support persistent connections yet.
1806 # This helps in closing the socket and avoiding ResourceWarning
1807
1808 headers["Connection"] = "close"
1809
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001810 for header, value in self.addheaders:
1811 headers[header] = value
1812
1813 if data is not None:
1814 headers["Content-Type"] = "application/x-www-form-urlencoded"
1815 http_conn.request("POST", selector, data, headers)
1816 else:
1817 http_conn.request("GET", selector, headers=headers)
1818
1819 try:
1820 response = http_conn.getresponse()
1821 except http.client.BadStatusLine:
1822 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001823 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001824
1825 # According to RFC 2616, "2xx" code indicates that the client's
1826 # request was successfully received, understood, and accepted.
1827 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001828 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001829 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001830 else:
1831 return self.http_error(
1832 url, response.fp,
1833 response.status, response.reason, response.msg, data)
1834
1835 def open_http(self, url, data=None):
1836 """Use HTTP protocol."""
1837 return self._open_generic_http(http.client.HTTPConnection, url, data)
1838
1839 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1840 """Handle http errors.
1841
1842 Derived class can override this, or provide specific handlers
1843 named http_error_DDD where DDD is the 3-digit error code."""
1844 # First check if there's a specific handler for this error
1845 name = 'http_error_%d' % errcode
1846 if hasattr(self, name):
1847 method = getattr(self, name)
1848 if data is None:
1849 result = method(url, fp, errcode, errmsg, headers)
1850 else:
1851 result = method(url, fp, errcode, errmsg, headers, data)
1852 if result: return result
1853 return self.http_error_default(url, fp, errcode, errmsg, headers)
1854
1855 def http_error_default(self, url, fp, errcode, errmsg, headers):
1856 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001857 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001858 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001859
1860 if _have_ssl:
1861 def _https_connection(self, host):
1862 return http.client.HTTPSConnection(host,
1863 key_file=self.key_file,
1864 cert_file=self.cert_file)
1865
1866 def open_https(self, url, data=None):
1867 """Use HTTPS protocol."""
1868 return self._open_generic_http(self._https_connection, url, data)
1869
1870 def open_file(self, url):
1871 """Use local file or FTP depending on form of URL."""
1872 if not isinstance(url, str):
1873 raise URLError('file error', 'proxy support for file protocol currently not implemented')
1874 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001875 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001876 else:
1877 return self.open_local_file(url)
1878
1879 def open_local_file(self, url):
1880 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001881 import email.utils
1882 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001883 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001884 localname = url2pathname(file)
1885 try:
1886 stats = os.stat(localname)
1887 except OSError as e:
1888 raise URLError(e.errno, e.strerror, e.filename)
1889 size = stats.st_size
1890 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1891 mtype = mimetypes.guess_type(url)[0]
1892 headers = email.message_from_string(
1893 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1894 (mtype or 'text/plain', size, modified))
1895 if not host:
1896 urlfile = file
1897 if file[:1] == '/':
1898 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001899 return addinfourl(open(localname, 'rb'), headers, urlfile)
1900 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001901 if (not port
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001902 and socket.gethostbyname(host) in (localhost() + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001903 urlfile = file
1904 if file[:1] == '/':
1905 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001906 elif file[:2] == './':
1907 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001908 return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001909 raise URLError('local file error', 'not on local host')
1910
1911 def open_ftp(self, url):
1912 """Use FTP protocol."""
1913 if not isinstance(url, str):
1914 raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
1915 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001916 host, path = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001917 if not host: raise URLError('ftp error', 'no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001918 host, port = splitport(host)
1919 user, host = splituser(host)
1920 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001921 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001922 host = unquote(host)
1923 user = unquote(user or '')
1924 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001925 host = socket.gethostbyname(host)
1926 if not port:
1927 import ftplib
1928 port = ftplib.FTP_PORT
1929 else:
1930 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001931 path, attrs = splitattr(path)
1932 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001933 dirs = path.split('/')
1934 dirs, file = dirs[:-1], dirs[-1]
1935 if dirs and not dirs[0]: dirs = dirs[1:]
1936 if dirs and not dirs[0]: dirs[0] = '/'
1937 key = user, host, port, '/'.join(dirs)
1938 # XXX thread unsafe!
1939 if len(self.ftpcache) > MAXFTPCACHE:
1940 # Prune the cache, rather arbitrarily
1941 for k in self.ftpcache.keys():
1942 if k != key:
1943 v = self.ftpcache[k]
1944 del self.ftpcache[k]
1945 v.close()
1946 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001947 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001948 self.ftpcache[key] = \
1949 ftpwrapper(user, passwd, host, port, dirs)
1950 if not file: type = 'D'
1951 else: type = 'I'
1952 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001953 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001954 if attr.lower() == 'type' and \
1955 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1956 type = value.upper()
1957 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1958 mtype = mimetypes.guess_type("ftp:" + url)[0]
1959 headers = ""
1960 if mtype:
1961 headers += "Content-Type: %s\n" % mtype
1962 if retrlen is not None and retrlen >= 0:
1963 headers += "Content-Length: %d\n" % retrlen
1964 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001965 return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001966 except ftperrors() as msg:
1967 raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
1968
1969 def open_data(self, url, data=None):
1970 """Use "data" URL."""
1971 if not isinstance(url, str):
1972 raise URLError('data error', 'proxy support for data protocol currently not implemented')
1973 # ignore POSTed data
1974 #
1975 # syntax of data URLs:
1976 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1977 # mediatype := [ type "/" subtype ] *( ";" parameter )
1978 # data := *urlchar
1979 # parameter := attribute "=" value
1980 try:
1981 [type, data] = url.split(',', 1)
1982 except ValueError:
1983 raise IOError('data error', 'bad data URL')
1984 if not type:
1985 type = 'text/plain;charset=US-ASCII'
1986 semi = type.rfind(';')
1987 if semi >= 0 and '=' not in type[semi:]:
1988 encoding = type[semi+1:]
1989 type = type[:semi]
1990 else:
1991 encoding = ''
1992 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001993 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001994 time.gmtime(time.time())))
1995 msg.append('Content-type: %s' % type)
1996 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00001997 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001998 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001999 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002000 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002001 msg.append('Content-Length: %d' % len(data))
2002 msg.append('')
2003 msg.append(data)
2004 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002005 headers = email.message_from_string(msg)
2006 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002007 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002008 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002009
2010
2011class FancyURLopener(URLopener):
2012 """Derived class with handlers for errors we can handle (perhaps)."""
2013
2014 def __init__(self, *args, **kwargs):
2015 URLopener.__init__(self, *args, **kwargs)
2016 self.auth_cache = {}
2017 self.tries = 0
2018 self.maxtries = 10
2019
2020 def http_error_default(self, url, fp, errcode, errmsg, headers):
2021 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002022 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002023
2024 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2025 """Error 302 -- relocated (temporarily)."""
2026 self.tries += 1
2027 if self.maxtries and self.tries >= self.maxtries:
2028 if hasattr(self, "http_error_500"):
2029 meth = self.http_error_500
2030 else:
2031 meth = self.http_error_default
2032 self.tries = 0
2033 return meth(url, fp, 500,
2034 "Internal Server Error: Redirect Recursion", headers)
2035 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2036 data)
2037 self.tries = 0
2038 return result
2039
2040 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2041 if 'location' in headers:
2042 newurl = headers['location']
2043 elif 'uri' in headers:
2044 newurl = headers['uri']
2045 else:
2046 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002047 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002048
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002049 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002050 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002051
2052 urlparts = urlparse(newurl)
2053
2054 # For security reasons, we don't allow redirection to anything other
2055 # than http, https and ftp.
2056
2057 # We are using newer HTTPError with older redirect_internal method
2058 # This older method will get deprecated in 3.3
2059
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002060 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002061 raise HTTPError(newurl, errcode,
2062 errmsg +
2063 " Redirection to url '%s' is not allowed." % newurl,
2064 headers, fp)
2065
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002066 return self.open(newurl)
2067
2068 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2069 """Error 301 -- also relocated (permanently)."""
2070 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2071
2072 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2073 """Error 303 -- also relocated (essentially identical to 302)."""
2074 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2075
2076 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2077 """Error 307 -- relocated, but turn POST into error."""
2078 if data is None:
2079 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2080 else:
2081 return self.http_error_default(url, fp, errcode, errmsg, headers)
2082
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002083 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2084 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002085 """Error 401 -- authentication required.
2086 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002087 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002088 URLopener.http_error_default(self, url, fp,
2089 errcode, errmsg, headers)
2090 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002091 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2092 if not match:
2093 URLopener.http_error_default(self, url, fp,
2094 errcode, errmsg, headers)
2095 scheme, realm = match.groups()
2096 if scheme.lower() != 'basic':
2097 URLopener.http_error_default(self, url, fp,
2098 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002099 if not retry:
2100 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2101 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002102 name = 'retry_' + self.type + '_basic_auth'
2103 if data is None:
2104 return getattr(self,name)(url, realm)
2105 else:
2106 return getattr(self,name)(url, realm, data)
2107
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002108 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2109 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002110 """Error 407 -- proxy authentication required.
2111 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002112 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002113 URLopener.http_error_default(self, url, fp,
2114 errcode, errmsg, headers)
2115 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002116 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2117 if not match:
2118 URLopener.http_error_default(self, url, fp,
2119 errcode, errmsg, headers)
2120 scheme, realm = match.groups()
2121 if scheme.lower() != 'basic':
2122 URLopener.http_error_default(self, url, fp,
2123 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002124 if not retry:
2125 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2126 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002127 name = 'retry_proxy_' + self.type + '_basic_auth'
2128 if data is None:
2129 return getattr(self,name)(url, realm)
2130 else:
2131 return getattr(self,name)(url, realm, data)
2132
2133 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002134 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002135 newurl = 'http://' + host + selector
2136 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002137 urltype, proxyhost = splittype(proxy)
2138 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002139 i = proxyhost.find('@') + 1
2140 proxyhost = proxyhost[i:]
2141 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2142 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002143 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002144 quote(passwd, safe=''), proxyhost)
2145 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2146 if data is None:
2147 return self.open(newurl)
2148 else:
2149 return self.open(newurl, data)
2150
2151 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002152 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002153 newurl = 'https://' + host + selector
2154 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002155 urltype, proxyhost = splittype(proxy)
2156 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002157 i = proxyhost.find('@') + 1
2158 proxyhost = proxyhost[i:]
2159 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2160 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002161 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002162 quote(passwd, safe=''), proxyhost)
2163 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2164 if data is None:
2165 return self.open(newurl)
2166 else:
2167 return self.open(newurl, data)
2168
2169 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002170 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002171 i = host.find('@') + 1
2172 host = host[i:]
2173 user, passwd = self.get_user_passwd(host, realm, i)
2174 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002175 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002176 quote(passwd, safe=''), host)
2177 newurl = 'http://' + host + selector
2178 if data is None:
2179 return self.open(newurl)
2180 else:
2181 return self.open(newurl, data)
2182
2183 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002184 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002185 i = host.find('@') + 1
2186 host = host[i:]
2187 user, passwd = self.get_user_passwd(host, realm, i)
2188 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002189 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002190 quote(passwd, safe=''), host)
2191 newurl = 'https://' + host + selector
2192 if data is None:
2193 return self.open(newurl)
2194 else:
2195 return self.open(newurl, data)
2196
Florent Xicluna757445b2010-05-17 17:24:07 +00002197 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002198 key = realm + '@' + host.lower()
2199 if key in self.auth_cache:
2200 if clear_cache:
2201 del self.auth_cache[key]
2202 else:
2203 return self.auth_cache[key]
2204 user, passwd = self.prompt_user_passwd(host, realm)
2205 if user or passwd: self.auth_cache[key] = (user, passwd)
2206 return user, passwd
2207
2208 def prompt_user_passwd(self, host, realm):
2209 """Override this in a GUI environment!"""
2210 import getpass
2211 try:
2212 user = input("Enter username for %s at %s: " % (realm, host))
2213 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2214 (user, realm, host))
2215 return user, passwd
2216 except KeyboardInterrupt:
2217 print()
2218 return None, None
2219
2220
2221# Utility functions
2222
2223_localhost = None
2224def localhost():
2225 """Return the IP address of the magic hostname 'localhost'."""
2226 global _localhost
2227 if _localhost is None:
2228 _localhost = socket.gethostbyname('localhost')
2229 return _localhost
2230
2231_thishost = None
2232def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002233 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002234 global _thishost
2235 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002236 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002237 return _thishost
2238
2239_ftperrors = None
2240def ftperrors():
2241 """Return the set of errors raised by the FTP class."""
2242 global _ftperrors
2243 if _ftperrors is None:
2244 import ftplib
2245 _ftperrors = ftplib.all_errors
2246 return _ftperrors
2247
2248_noheaders = None
2249def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002250 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002251 global _noheaders
2252 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002253 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002254 return _noheaders
2255
2256
2257# Utility classes
2258
2259class ftpwrapper:
2260 """Class used by open_ftp() for cache of open FTP connections."""
2261
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002262 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2263 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002264 self.user = user
2265 self.passwd = passwd
2266 self.host = host
2267 self.port = port
2268 self.dirs = dirs
2269 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002270 self.refcount = 0
2271 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002272 self.init()
2273
2274 def init(self):
2275 import ftplib
2276 self.busy = 0
2277 self.ftp = ftplib.FTP()
2278 self.ftp.connect(self.host, self.port, self.timeout)
2279 self.ftp.login(self.user, self.passwd)
2280 for dir in self.dirs:
2281 self.ftp.cwd(dir)
2282
2283 def retrfile(self, file, type):
2284 import ftplib
2285 self.endtransfer()
2286 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2287 else: cmd = 'TYPE ' + type; isdir = 0
2288 try:
2289 self.ftp.voidcmd(cmd)
2290 except ftplib.all_errors:
2291 self.init()
2292 self.ftp.voidcmd(cmd)
2293 conn = None
2294 if file and not isdir:
2295 # Try to retrieve as a file
2296 try:
2297 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002298 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002299 except ftplib.error_perm as reason:
2300 if str(reason)[:3] != '550':
Georg Brandl13e89462008-07-01 19:56:00 +00002301 raise URLError('ftp error', reason).with_traceback(
2302 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002303 if not conn:
2304 # Set transfer mode to ASCII!
2305 self.ftp.voidcmd('TYPE A')
2306 # Try a directory listing. Verify that directory exists.
2307 if file:
2308 pwd = self.ftp.pwd()
2309 try:
2310 try:
2311 self.ftp.cwd(file)
2312 except ftplib.error_perm as reason:
Georg Brandl13e89462008-07-01 19:56:00 +00002313 raise URLError('ftp error', reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002314 finally:
2315 self.ftp.cwd(pwd)
2316 cmd = 'LIST ' + file
2317 else:
2318 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002319 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002320 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002321
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002322 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2323 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002324 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002325 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002326 return (ftpobj, retrlen)
2327
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002328 def endtransfer(self):
2329 if not self.busy:
2330 return
2331 self.busy = 0
2332 try:
2333 self.ftp.voidresp()
2334 except ftperrors():
2335 pass
2336
2337 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002338 self.keepalive = False
2339 if self.refcount <= 0:
2340 self.real_close()
2341
2342 def file_close(self):
2343 self.endtransfer()
2344 self.refcount -= 1
2345 if self.refcount <= 0 and not self.keepalive:
2346 self.real_close()
2347
2348 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002349 self.endtransfer()
2350 try:
2351 self.ftp.close()
2352 except ftperrors():
2353 pass
2354
2355# Proxy handling
2356def getproxies_environment():
2357 """Return a dictionary of scheme -> proxy server URL mappings.
2358
2359 Scan the environment for variables named <scheme>_proxy;
2360 this seems to be the standard convention. If you need a
2361 different way, you can pass a proxies dictionary to the
2362 [Fancy]URLopener constructor.
2363
2364 """
2365 proxies = {}
2366 for name, value in os.environ.items():
2367 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002368 if value and name[-6:] == '_proxy':
2369 proxies[name[:-6]] = value
2370 return proxies
2371
2372def proxy_bypass_environment(host):
2373 """Test if proxies should not be used for a particular host.
2374
2375 Checks the environment for a variable named no_proxy, which should
2376 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2377 """
2378 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2379 # '*' is special case for always bypass
2380 if no_proxy == '*':
2381 return 1
2382 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002383 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002384 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002385 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2386 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002387 if name and (hostonly.endswith(name) or host.endswith(name)):
2388 return 1
2389 # otherwise, don't bypass
2390 return 0
2391
2392
Ronald Oussorene72e1612011-03-14 18:15:25 -04002393# This code tests an OSX specific data structure but is testable on all
2394# platforms
2395def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2396 """
2397 Return True iff this host shouldn't be accessed using a proxy
2398
2399 This function uses the MacOSX framework SystemConfiguration
2400 to fetch the proxy information.
2401
2402 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2403 { 'exclude_simple': bool,
2404 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2405 }
2406 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002407 from fnmatch import fnmatch
2408
2409 hostonly, port = splitport(host)
2410
2411 def ip2num(ipAddr):
2412 parts = ipAddr.split('.')
2413 parts = list(map(int, parts))
2414 if len(parts) != 4:
2415 parts = (parts + [0, 0, 0, 0])[:4]
2416 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2417
2418 # Check for simple host names:
2419 if '.' not in host:
2420 if proxy_settings['exclude_simple']:
2421 return True
2422
2423 hostIP = None
2424
2425 for value in proxy_settings.get('exceptions', ()):
2426 # Items in the list are strings like these: *.local, 169.254/16
2427 if not value: continue
2428
2429 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2430 if m is not None:
2431 if hostIP is None:
2432 try:
2433 hostIP = socket.gethostbyname(hostonly)
2434 hostIP = ip2num(hostIP)
2435 except socket.error:
2436 continue
2437
2438 base = ip2num(m.group(1))
2439 mask = m.group(2)
2440 if mask is None:
2441 mask = 8 * (m.group(1).count('.') + 1)
2442 else:
2443 mask = int(mask[1:])
2444 mask = 32 - mask
2445
2446 if (hostIP >> mask) == (base >> mask):
2447 return True
2448
2449 elif fnmatch(host, value):
2450 return True
2451
2452 return False
2453
2454
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002455if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002456 from _scproxy import _get_proxy_settings, _get_proxies
2457
2458 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002459 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002460 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002461
2462 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463 """Return a dictionary of scheme -> proxy server URL mappings.
2464
Ronald Oussoren84151202010-04-18 20:46:11 +00002465 This function uses the MacOSX framework SystemConfiguration
2466 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002467 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002468 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002469
Ronald Oussoren84151202010-04-18 20:46:11 +00002470
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002471
2472 def proxy_bypass(host):
2473 if getproxies_environment():
2474 return proxy_bypass_environment(host)
2475 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002476 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002477
2478 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002479 return getproxies_environment() or getproxies_macosx_sysconf()
2480
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002481
2482elif os.name == 'nt':
2483 def getproxies_registry():
2484 """Return a dictionary of scheme -> proxy server URL mappings.
2485
2486 Win32 uses the registry to store proxies.
2487
2488 """
2489 proxies = {}
2490 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002491 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002492 except ImportError:
2493 # Std module, so should be around - but you never know!
2494 return proxies
2495 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002496 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002497 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002498 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002499 'ProxyEnable')[0]
2500 if proxyEnable:
2501 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002502 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002503 'ProxyServer')[0])
2504 if '=' in proxyServer:
2505 # Per-protocol settings
2506 for p in proxyServer.split(';'):
2507 protocol, address = p.split('=', 1)
2508 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002509 if not re.match('^([^/:]+)://', address):
2510 address = '%s://%s' % (protocol, address)
2511 proxies[protocol] = address
2512 else:
2513 # Use one setting for all protocols
2514 if proxyServer[:5] == 'http:':
2515 proxies['http'] = proxyServer
2516 else:
2517 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002518 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002519 proxies['ftp'] = 'ftp://%s' % proxyServer
2520 internetSettings.Close()
2521 except (WindowsError, ValueError, TypeError):
2522 # Either registry key not found etc, or the value in an
2523 # unexpected format.
2524 # proxies already set up to be empty so nothing to do
2525 pass
2526 return proxies
2527
2528 def getproxies():
2529 """Return a dictionary of scheme -> proxy server URL mappings.
2530
2531 Returns settings gathered from the environment, if specified,
2532 or the registry.
2533
2534 """
2535 return getproxies_environment() or getproxies_registry()
2536
2537 def proxy_bypass_registry(host):
2538 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002539 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002540 except ImportError:
2541 # Std modules, so should be around - but you never know!
2542 return 0
2543 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002544 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002545 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002546 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002547 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002548 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002549 'ProxyOverride')[0])
2550 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2551 except WindowsError:
2552 return 0
2553 if not proxyEnable or not proxyOverride:
2554 return 0
2555 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002556 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002557 host = [rawHost]
2558 try:
2559 addr = socket.gethostbyname(rawHost)
2560 if addr != rawHost:
2561 host.append(addr)
2562 except socket.error:
2563 pass
2564 try:
2565 fqdn = socket.getfqdn(rawHost)
2566 if fqdn != rawHost:
2567 host.append(fqdn)
2568 except socket.error:
2569 pass
2570 # make a check value list from the registry entry: replace the
2571 # '<local>' string by the localhost entry and the corresponding
2572 # canonical entry.
2573 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002574 # now check if we match one of the registry values.
2575 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002576 if test == '<local>':
2577 if '.' not in rawHost:
2578 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002579 test = test.replace(".", r"\.") # mask dots
2580 test = test.replace("*", r".*") # change glob sequence
2581 test = test.replace("?", r".") # change glob char
2582 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002583 if re.match(test, val, re.I):
2584 return 1
2585 return 0
2586
2587 def proxy_bypass(host):
2588 """Return a dictionary of scheme -> proxy server URL mappings.
2589
2590 Returns settings gathered from the environment, if specified,
2591 or the registry.
2592
2593 """
2594 if getproxies_environment():
2595 return proxy_bypass_environment(host)
2596 else:
2597 return proxy_bypass_registry(host)
2598
2599else:
2600 # By default use environment variables
2601 getproxies = getproxies_environment
2602 proxy_bypass = proxy_bypass_environment