blob: 3896aa0ecf9caf03cfb7ca5a22d9d6d894ac4f5d [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Senthil Kumarand95cc752010-08-08 11:27:53 +0000106 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000107from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108
109# check for SSL
110try:
111 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000112except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 _have_ssl = False
114else:
115 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800117__all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
123 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
124 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
125 'UnknownHandler', 'HTTPErrorProcessor',
126 # Functions
127 'urlopen', 'install_opener', 'build_opener',
128 'pathname2url', 'url2pathname', 'getproxies',
129 # Legacy interface
130 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
131]
132
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133# used in User-Agent header sent
134__version__ = sys.version[:3]
135
136_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000137def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200138 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000139 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200140 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000141 if not _have_ssl:
142 raise ValueError('SSL support not available')
143 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
144 context.options |= ssl.OP_NO_SSLv2
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200145 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000146 context.verify_mode = ssl.CERT_REQUIRED
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200147 if cafile or capath:
148 context.load_verify_locations(cafile, capath)
149 else:
150 context.set_default_verify_paths()
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000151 check_hostname = True
152 else:
153 check_hostname = False
154 https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
155 opener = build_opener(https_handler)
156 elif _opener is None:
157 _opener = opener = build_opener()
158 else:
159 opener = _opener
160 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000161
162def install_opener(opener):
163 global _opener
164 _opener = opener
165
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700166_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000167def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700168 """
169 Retrieve a URL into a temporary location on disk.
170
171 Requires a URL argument. If a filename is passed, it is used as
172 the temporary file location. The reporthook argument should be
173 a callable that accepts a block number, a read size, and the
174 total file size of the URL target. The data argument should be
175 valid URL encoded data.
176
177 If a filename is passed and the URL points to a local resource,
178 the result is a copy from local file to new file.
179
180 Returns a tuple containing the path to the newly created
181 data file as well as the resulting HTTPMessage object.
182 """
183 url_type, path = splittype(url)
184
185 with contextlib.closing(urlopen(url, data)) as fp:
186 headers = fp.info()
187
188 # Just return the local path and the "headers" for file://
189 # URLs. No sense in performing a copy unless requested.
190 if url_type == "file" and not filename:
191 return os.path.normpath(path), headers
192
193 # Handle temporary file setup.
194 if filename:
195 tfp = open(filename, 'wb')
196 else:
197 tfp = tempfile.NamedTemporaryFile(delete=False)
198 filename = tfp.name
199 _url_tempfiles.append(filename)
200
201 with tfp:
202 result = filename, headers
203 bs = 1024*8
204 size = -1
205 read = 0
206 blocknum = 0
207 if "content-length" in headers:
208 size = int(headers["Content-Length"])
209
210 if reporthook:
211 reporthook(blocknum, 0, size)
212
213 while True:
214 block = fp.read(bs)
215 if not block:
216 break
217 read += len(block)
218 tfp.write(block)
219 blocknum += 1
220 if reporthook:
221 reporthook(blocknum, len(block), size)
222
223 if size >= 0 and read < size:
224 raise ContentTooShortError(
225 "retrieval incomplete: got only %i out of %i bytes"
226 % (read, size), result)
227
228 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000229
230def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700231 for temp_file in _url_tempfiles:
232 try:
233 os.unlink(temp_file)
234 except EnvironmentError:
235 pass
236
237 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000238 global _opener
239 if _opener:
240 _opener = None
241
242# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000243_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000244def request_host(request):
245 """Return request-host, as defined by RFC 2965.
246
247 Variation from RFC: returned value is lowercased, for convenient
248 comparison.
249
250 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000251 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000252 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000253 if host == "":
254 host = request.get_header("Host", "")
255
256 # remove port, if present
257 host = _cut_port_re.sub("", host, 1)
258 return host.lower()
259
260class Request:
261
262 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800263 origin_req_host=None, unverifiable=False,
264 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000265 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaranb7451ce2012-07-07 17:11:44 -0700266 self.full_url = unwrap(to_bytes(url))
267 self.full_url = quote(self.full_url, safe="%/:=&?~#+!$,;'@()*[]|")
Senthil Kumaran26430412011-04-13 07:01:19 +0800268 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000269 self.data = data
270 self.headers = {}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000271 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000272 for key, value in headers.items():
273 self.add_header(key, value)
274 self.unredirected_hdrs = {}
275 if origin_req_host is None:
276 origin_req_host = request_host(self)
277 self.origin_req_host = origin_req_host
278 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800279 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000280 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000281
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000282 def _parse(self):
283 self.type, rest = splittype(self.full_url)
284 if self.type is None:
285 raise ValueError("unknown url type: %s" % self.full_url)
286 self.host, self.selector = splithost(rest)
287 if self.host:
288 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000289
290 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800291 """Return a string indicating the HTTP request method."""
292 if self.method is not None:
293 return self.method
294 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000295 return "POST"
296 else:
297 return "GET"
298
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000299 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800300 if self.fragment:
301 return '%s#%s' % (self.full_url, self.fragment)
302 else:
303 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000304
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700305 # Begin deprecated methods
306
307 def add_data(self, data):
308 msg = "Request.add_data method is deprecated."
309 warnings.warn(msg, DeprecationWarning, stacklevel=1)
310 self.data = data
311
312 def has_data(self):
313 msg = "Request.has_data method is deprecated."
314 warnings.warn(msg, DeprecationWarning, stacklevel=1)
315 return self.data is not None
316
317 def get_data(self):
318 msg = "Request.get_data method is deprecated."
319 warnings.warn(msg, DeprecationWarning, stacklevel=1)
320 return self.data
321
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000322 def get_type(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700323 msg = "Request.get_type method is deprecated."
324 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000325 return self.type
326
327 def get_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700328 msg = "Request.get_host method is deprecated."
329 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330 return self.host
331
332 def get_selector(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700333 msg = "Request.get_selector method is deprecated."
334 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000335 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000336
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000337 def is_unverifiable(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700338 msg = "Request.is_unverifiable method is deprecated."
339 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000340 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000341
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000342 def get_origin_req_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700343 msg = "Request.get_origin_req_host method is deprecated."
344 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000345 return self.origin_req_host
346
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000347 # End deprecated methods
348
349 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000350 if self.type == 'https' and not self._tunnel_host:
351 self._tunnel_host = self.host
352 else:
353 self.type= type
354 self.selector = self.full_url
355 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000356
357 def has_proxy(self):
358 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000359
360 def add_header(self, key, val):
361 # useful for something like authentication
362 self.headers[key.capitalize()] = val
363
364 def add_unredirected_header(self, key, val):
365 # will not be added to a redirected request
366 self.unredirected_hdrs[key.capitalize()] = val
367
368 def has_header(self, header_name):
369 return (header_name in self.headers or
370 header_name in self.unredirected_hdrs)
371
372 def get_header(self, header_name, default=None):
373 return self.headers.get(
374 header_name,
375 self.unredirected_hdrs.get(header_name, default))
376
377 def header_items(self):
378 hdrs = self.unredirected_hdrs.copy()
379 hdrs.update(self.headers)
380 return list(hdrs.items())
381
382class OpenerDirector:
383 def __init__(self):
384 client_version = "Python-urllib/%s" % __version__
385 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000386 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000387 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000388 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000389 self.handle_open = {}
390 self.handle_error = {}
391 self.process_response = {}
392 self.process_request = {}
393
394 def add_handler(self, handler):
395 if not hasattr(handler, "add_parent"):
396 raise TypeError("expected BaseHandler instance, got %r" %
397 type(handler))
398
399 added = False
400 for meth in dir(handler):
401 if meth in ["redirect_request", "do_open", "proxy_open"]:
402 # oops, coincidental match
403 continue
404
405 i = meth.find("_")
406 protocol = meth[:i]
407 condition = meth[i+1:]
408
409 if condition.startswith("error"):
410 j = condition.find("_") + i + 1
411 kind = meth[j+1:]
412 try:
413 kind = int(kind)
414 except ValueError:
415 pass
416 lookup = self.handle_error.get(protocol, {})
417 self.handle_error[protocol] = lookup
418 elif condition == "open":
419 kind = protocol
420 lookup = self.handle_open
421 elif condition == "response":
422 kind = protocol
423 lookup = self.process_response
424 elif condition == "request":
425 kind = protocol
426 lookup = self.process_request
427 else:
428 continue
429
430 handlers = lookup.setdefault(kind, [])
431 if handlers:
432 bisect.insort(handlers, handler)
433 else:
434 handlers.append(handler)
435 added = True
436
437 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000438 bisect.insort(self.handlers, handler)
439 handler.add_parent(self)
440
441 def close(self):
442 # Only exists for backwards compatibility.
443 pass
444
445 def _call_chain(self, chain, kind, meth_name, *args):
446 # Handlers raise an exception if no one else should try to handle
447 # the request, or return None if they can't but another handler
448 # could. Otherwise, they return the response.
449 handlers = chain.get(kind, ())
450 for handler in handlers:
451 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000452 result = func(*args)
453 if result is not None:
454 return result
455
456 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
457 # accept a URL or a Request object
458 if isinstance(fullurl, str):
459 req = Request(fullurl, data)
460 else:
461 req = fullurl
462 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000463 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000464
465 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000466 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000467
468 # pre-process request
469 meth_name = protocol+"_request"
470 for processor in self.process_request.get(protocol, []):
471 meth = getattr(processor, meth_name)
472 req = meth(req)
473
474 response = self._open(req, data)
475
476 # post-process response
477 meth_name = protocol+"_response"
478 for processor in self.process_response.get(protocol, []):
479 meth = getattr(processor, meth_name)
480 response = meth(req, response)
481
482 return response
483
484 def _open(self, req, data=None):
485 result = self._call_chain(self.handle_open, 'default',
486 'default_open', req)
487 if result:
488 return result
489
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000490 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000491 result = self._call_chain(self.handle_open, protocol, protocol +
492 '_open', req)
493 if result:
494 return result
495
496 return self._call_chain(self.handle_open, 'unknown',
497 'unknown_open', req)
498
499 def error(self, proto, *args):
500 if proto in ('http', 'https'):
501 # XXX http[s] protocols are special-cased
502 dict = self.handle_error['http'] # https is not different than http
503 proto = args[2] # YUCK!
504 meth_name = 'http_error_%s' % proto
505 http_err = 1
506 orig_args = args
507 else:
508 dict = self.handle_error
509 meth_name = proto + '_error'
510 http_err = 0
511 args = (dict, proto, meth_name) + args
512 result = self._call_chain(*args)
513 if result:
514 return result
515
516 if http_err:
517 args = (dict, 'default', 'http_error_default') + orig_args
518 return self._call_chain(*args)
519
520# XXX probably also want an abstract factory that knows when it makes
521# sense to skip a superclass in favor of a subclass and when it might
522# make sense to include both
523
524def build_opener(*handlers):
525 """Create an opener object from a list of handlers.
526
527 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000528 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000529
530 If any of the handlers passed as arguments are subclasses of the
531 default handlers, the default handlers will not be used.
532 """
533 def isclass(obj):
534 return isinstance(obj, type) or hasattr(obj, "__bases__")
535
536 opener = OpenerDirector()
537 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
538 HTTPDefaultErrorHandler, HTTPRedirectHandler,
539 FTPHandler, FileHandler, HTTPErrorProcessor]
540 if hasattr(http.client, "HTTPSConnection"):
541 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000542 skip = set()
543 for klass in default_classes:
544 for check in handlers:
545 if isclass(check):
546 if issubclass(check, klass):
547 skip.add(klass)
548 elif isinstance(check, klass):
549 skip.add(klass)
550 for klass in skip:
551 default_classes.remove(klass)
552
553 for klass in default_classes:
554 opener.add_handler(klass())
555
556 for h in handlers:
557 if isclass(h):
558 h = h()
559 opener.add_handler(h)
560 return opener
561
562class BaseHandler:
563 handler_order = 500
564
565 def add_parent(self, parent):
566 self.parent = parent
567
568 def close(self):
569 # Only exists for backwards compatibility
570 pass
571
572 def __lt__(self, other):
573 if not hasattr(other, "handler_order"):
574 # Try to preserve the old behavior of having custom classes
575 # inserted after default ones (works only for custom user
576 # classes which are not aware of handler_order).
577 return True
578 return self.handler_order < other.handler_order
579
580
581class HTTPErrorProcessor(BaseHandler):
582 """Process HTTP error responses."""
583 handler_order = 1000 # after all other processing
584
585 def http_response(self, request, response):
586 code, msg, hdrs = response.code, response.msg, response.info()
587
588 # According to RFC 2616, "2xx" code indicates that the client's
589 # request was successfully received, understood, and accepted.
590 if not (200 <= code < 300):
591 response = self.parent.error(
592 'http', request, response, code, msg, hdrs)
593
594 return response
595
596 https_response = http_response
597
598class HTTPDefaultErrorHandler(BaseHandler):
599 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000600 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000601
602class HTTPRedirectHandler(BaseHandler):
603 # maximum number of redirections to any single URL
604 # this is needed because of the state that cookies introduce
605 max_repeats = 4
606 # maximum total number of redirections (regardless of URL) before
607 # assuming we're in a loop
608 max_redirections = 10
609
610 def redirect_request(self, req, fp, code, msg, headers, newurl):
611 """Return a Request or None in response to a redirect.
612
613 This is called by the http_error_30x methods when a
614 redirection response is received. If a redirection should
615 take place, return a new Request to allow http_error_30x to
616 perform the redirect. Otherwise, raise HTTPError if no-one
617 else should try to handle this url. Return None if you can't
618 but another Handler might.
619 """
620 m = req.get_method()
621 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
622 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000623 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000624
625 # Strictly (according to RFC 2616), 301 or 302 in response to
626 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000627 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000628 # essentially all clients do redirect in this case, so we do
629 # the same.
630 # be conciliant with URIs containing a space
631 newurl = newurl.replace(' ', '%20')
632 CONTENT_HEADERS = ("content-length", "content-type")
633 newheaders = dict((k, v) for k, v in req.headers.items()
634 if k.lower() not in CONTENT_HEADERS)
635 return Request(newurl,
636 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000637 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000638 unverifiable=True)
639
640 # Implementation note: To avoid the server sending us into an
641 # infinite loop, the request object needs to track what URLs we
642 # have already seen. Do this by adding a handler-specific
643 # attribute to the Request object.
644 def http_error_302(self, req, fp, code, msg, headers):
645 # Some servers (incorrectly) return multiple Location headers
646 # (so probably same goes for URI). Use first header.
647 if "location" in headers:
648 newurl = headers["location"]
649 elif "uri" in headers:
650 newurl = headers["uri"]
651 else:
652 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000653
654 # fix a possible malformed URL
655 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700656
657 # For security reasons we don't allow redirection to anything other
658 # than http, https or ftp.
659
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800660 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800661 raise HTTPError(
662 newurl, code,
663 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
664 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700665
Facundo Batistaf24802c2008-08-17 03:36:03 +0000666 if not urlparts.path:
667 urlparts = list(urlparts)
668 urlparts[2] = "/"
669 newurl = urlunparse(urlparts)
670
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000671 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000672
673 # XXX Probably want to forget about the state of the current
674 # request, although that might interact poorly with other
675 # handlers that also use handler-specific request attributes
676 new = self.redirect_request(req, fp, code, msg, headers, newurl)
677 if new is None:
678 return
679
680 # loop detection
681 # .redirect_dict has a key url if url was previously visited.
682 if hasattr(req, 'redirect_dict'):
683 visited = new.redirect_dict = req.redirect_dict
684 if (visited.get(newurl, 0) >= self.max_repeats or
685 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000686 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000687 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000688 else:
689 visited = new.redirect_dict = req.redirect_dict = {}
690 visited[newurl] = visited.get(newurl, 0) + 1
691
692 # Don't close the fp until we are sure that we won't use it
693 # with HTTPError.
694 fp.read()
695 fp.close()
696
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000697 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000698
699 http_error_301 = http_error_303 = http_error_307 = http_error_302
700
701 inf_msg = "The HTTP server returned a redirect error that would " \
702 "lead to an infinite loop.\n" \
703 "The last 30x error message was:\n"
704
705
706def _parse_proxy(proxy):
707 """Return (scheme, user, password, host/port) given a URL or an authority.
708
709 If a URL is supplied, it must have an authority (host:port) component.
710 According to RFC 3986, having an authority component means the URL must
711 have two slashes after the scheme:
712
713 >>> _parse_proxy('file:/ftp.example.com/')
714 Traceback (most recent call last):
715 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
716
717 The first three items of the returned tuple may be None.
718
719 Examples of authority parsing:
720
721 >>> _parse_proxy('proxy.example.com')
722 (None, None, None, 'proxy.example.com')
723 >>> _parse_proxy('proxy.example.com:3128')
724 (None, None, None, 'proxy.example.com:3128')
725
726 The authority component may optionally include userinfo (assumed to be
727 username:password):
728
729 >>> _parse_proxy('joe:password@proxy.example.com')
730 (None, 'joe', 'password', 'proxy.example.com')
731 >>> _parse_proxy('joe:password@proxy.example.com:3128')
732 (None, 'joe', 'password', 'proxy.example.com:3128')
733
734 Same examples, but with URLs instead:
735
736 >>> _parse_proxy('http://proxy.example.com/')
737 ('http', None, None, 'proxy.example.com')
738 >>> _parse_proxy('http://proxy.example.com:3128/')
739 ('http', None, None, 'proxy.example.com:3128')
740 >>> _parse_proxy('http://joe:password@proxy.example.com/')
741 ('http', 'joe', 'password', 'proxy.example.com')
742 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
743 ('http', 'joe', 'password', 'proxy.example.com:3128')
744
745 Everything after the authority is ignored:
746
747 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
748 ('ftp', 'joe', 'password', 'proxy.example.com')
749
750 Test for no trailing '/' case:
751
752 >>> _parse_proxy('http://joe:password@proxy.example.com')
753 ('http', 'joe', 'password', 'proxy.example.com')
754
755 """
Georg Brandl13e89462008-07-01 19:56:00 +0000756 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000757 if not r_scheme.startswith("/"):
758 # authority
759 scheme = None
760 authority = proxy
761 else:
762 # URL
763 if not r_scheme.startswith("//"):
764 raise ValueError("proxy URL with no authority: %r" % proxy)
765 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
766 # and 3.3.), path is empty or starts with '/'
767 end = r_scheme.find("/", 2)
768 if end == -1:
769 end = None
770 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000771 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000773 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000774 else:
775 user = password = None
776 return scheme, user, password, hostport
777
778class ProxyHandler(BaseHandler):
779 # Proxies must be in front
780 handler_order = 100
781
782 def __init__(self, proxies=None):
783 if proxies is None:
784 proxies = getproxies()
785 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
786 self.proxies = proxies
787 for type, url in proxies.items():
788 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200789 lambda r, proxy=url, type=type, meth=self.proxy_open:
790 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000791
792 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000793 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000794 proxy_type, user, password, hostport = _parse_proxy(proxy)
795 if proxy_type is None:
796 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000797
798 if req.host and proxy_bypass(req.host):
799 return None
800
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000801 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000802 user_pass = '%s:%s' % (unquote(user),
803 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 creds = base64.b64encode(user_pass.encode()).decode("ascii")
805 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000806 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000808 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000809 # let other handlers take care of it
810 return None
811 else:
812 # need to start over, because the other handlers don't
813 # grok the proxy's URL type
814 # e.g. if we have a constructor arg proxies like so:
815 # {'http': 'ftp://proxy.example.com'}, we may end up turning
816 # a request for http://acme.example.com/a into one for
817 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000818 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000819
820class HTTPPasswordMgr:
821
822 def __init__(self):
823 self.passwd = {}
824
825 def add_password(self, realm, uri, user, passwd):
826 # uri could be a single URI or a sequence
827 if isinstance(uri, str):
828 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800829 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000830 self.passwd[realm] = {}
831 for default_port in True, False:
832 reduced_uri = tuple(
833 [self.reduce_uri(u, default_port) for u in uri])
834 self.passwd[realm][reduced_uri] = (user, passwd)
835
836 def find_user_password(self, realm, authuri):
837 domains = self.passwd.get(realm, {})
838 for default_port in True, False:
839 reduced_authuri = self.reduce_uri(authuri, default_port)
840 for uris, authinfo in domains.items():
841 for uri in uris:
842 if self.is_suburi(uri, reduced_authuri):
843 return authinfo
844 return None, None
845
846 def reduce_uri(self, uri, default_port=True):
847 """Accept authority or URI and extract only the authority and path."""
848 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000849 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 if parts[1]:
851 # URI
852 scheme = parts[0]
853 authority = parts[1]
854 path = parts[2] or '/'
855 else:
856 # host or host:port
857 scheme = None
858 authority = uri
859 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000860 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000861 if default_port and port is None and scheme is not None:
862 dport = {"http": 80,
863 "https": 443,
864 }.get(scheme)
865 if dport is not None:
866 authority = "%s:%d" % (host, dport)
867 return authority, path
868
869 def is_suburi(self, base, test):
870 """Check if test is below base in a URI tree
871
872 Both args must be URIs in reduced form.
873 """
874 if base == test:
875 return True
876 if base[0] != test[0]:
877 return False
878 common = posixpath.commonprefix((base[1], test[1]))
879 if len(common) == len(base[1]):
880 return True
881 return False
882
883
884class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
885
886 def find_user_password(self, realm, authuri):
887 user, password = HTTPPasswordMgr.find_user_password(self, realm,
888 authuri)
889 if user is not None:
890 return user, password
891 return HTTPPasswordMgr.find_user_password(self, None, authuri)
892
893
894class AbstractBasicAuthHandler:
895
896 # XXX this allows for multiple auth-schemes, but will stupidly pick
897 # the last one with a realm specified.
898
899 # allow for double- and single-quoted realm values
900 # (single quotes are a violation of the RFC, but appear in the wild)
901 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800902 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000903
904 # XXX could pre-emptively send auth info already accepted (RFC 2617,
905 # end of section 2, and section 1.2 immediately after "credentials"
906 # production).
907
908 def __init__(self, password_mgr=None):
909 if password_mgr is None:
910 password_mgr = HTTPPasswordMgr()
911 self.passwd = password_mgr
912 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000913 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000914
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000915 def reset_retry_count(self):
916 self.retried = 0
917
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000918 def http_error_auth_reqed(self, authreq, host, req, headers):
919 # host may be an authority (without userinfo) or a URL with an
920 # authority
921 # XXX could be multiple headers
922 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000923
924 if self.retried > 5:
925 # retry sending the username:password 5 times before failing.
926 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
927 headers, None)
928 else:
929 self.retried += 1
930
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000931 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800932 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800933 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800934 raise ValueError("AbstractBasicAuthHandler does not"
935 " support the following scheme: '%s'" %
936 scheme)
937 else:
938 mo = AbstractBasicAuthHandler.rx.search(authreq)
939 if mo:
940 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800941 if quote not in ['"',"'"]:
942 warnings.warn("Basic Auth Realm was unquoted",
943 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800944 if scheme.lower() == 'basic':
945 response = self.retry_http_basic_auth(host, req, realm)
946 if response and response.code != 401:
947 self.retried = 0
948 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000949
950 def retry_http_basic_auth(self, host, req, realm):
951 user, pw = self.passwd.find_user_password(realm, host)
952 if pw is not None:
953 raw = "%s:%s" % (user, pw)
954 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
955 if req.headers.get(self.auth_header, None) == auth:
956 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000957 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000958 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000959 else:
960 return None
961
962
963class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
964
965 auth_header = 'Authorization'
966
967 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000968 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000969 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000970 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000971 self.reset_retry_count()
972 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000973
974
975class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
976
977 auth_header = 'Proxy-authorization'
978
979 def http_error_407(self, req, fp, code, msg, headers):
980 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000981 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000982 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
983 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000984 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000985 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000986 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000987 self.reset_retry_count()
988 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000989
990
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800991# Return n random bytes.
992_randombytes = os.urandom
993
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000994
995class AbstractDigestAuthHandler:
996 # Digest authentication is specified in RFC 2617.
997
998 # XXX The client does not inspect the Authentication-Info header
999 # in a successful response.
1000
1001 # XXX It should be possible to test this implementation against
1002 # a mock server that just generates a static set of challenges.
1003
1004 # XXX qop="auth-int" supports is shaky
1005
1006 def __init__(self, passwd=None):
1007 if passwd is None:
1008 passwd = HTTPPasswordMgr()
1009 self.passwd = passwd
1010 self.add_password = self.passwd.add_password
1011 self.retried = 0
1012 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001013 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001014
1015 def reset_retry_count(self):
1016 self.retried = 0
1017
1018 def http_error_auth_reqed(self, auth_header, host, req, headers):
1019 authreq = headers.get(auth_header, None)
1020 if self.retried > 5:
1021 # Don't fail endlessly - if we failed once, we'll probably
1022 # fail a second time. Hm. Unless the Password Manager is
1023 # prompting for the information. Crap. This isn't great
1024 # but it's better than the current 'repeat until recursion
1025 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001026 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001027 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001028 else:
1029 self.retried += 1
1030 if authreq:
1031 scheme = authreq.split()[0]
1032 if scheme.lower() == 'digest':
1033 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001034 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001035 raise ValueError("AbstractDigestAuthHandler does not support"
1036 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001037
1038 def retry_http_digest_auth(self, req, auth):
1039 token, challenge = auth.split(' ', 1)
1040 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1041 auth = self.get_authorization(req, chal)
1042 if auth:
1043 auth_val = 'Digest %s' % auth
1044 if req.headers.get(self.auth_header, None) == auth_val:
1045 return None
1046 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001047 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001048 return resp
1049
1050 def get_cnonce(self, nonce):
1051 # The cnonce-value is an opaque
1052 # quoted string value provided by the client and used by both client
1053 # and server to avoid chosen plaintext attacks, to provide mutual
1054 # authentication, and to provide some message integrity protection.
1055 # This isn't a fabulous effort, but it's probably Good Enough.
1056 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001057 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001058 dig = hashlib.sha1(b).hexdigest()
1059 return dig[:16]
1060
1061 def get_authorization(self, req, chal):
1062 try:
1063 realm = chal['realm']
1064 nonce = chal['nonce']
1065 qop = chal.get('qop')
1066 algorithm = chal.get('algorithm', 'MD5')
1067 # mod_digest doesn't send an opaque, even though it isn't
1068 # supposed to be optional
1069 opaque = chal.get('opaque', None)
1070 except KeyError:
1071 return None
1072
1073 H, KD = self.get_algorithm_impls(algorithm)
1074 if H is None:
1075 return None
1076
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001077 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001078 if user is None:
1079 return None
1080
1081 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001082 if req.data is not None:
1083 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001084 else:
1085 entdig = None
1086
1087 A1 = "%s:%s:%s" % (user, realm, pw)
1088 A2 = "%s:%s" % (req.get_method(),
1089 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001090 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001091 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001092 if nonce == self.last_nonce:
1093 self.nonce_count += 1
1094 else:
1095 self.nonce_count = 1
1096 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001097 ncvalue = '%08x' % self.nonce_count
1098 cnonce = self.get_cnonce(nonce)
1099 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1100 respdig = KD(H(A1), noncebit)
1101 elif qop is None:
1102 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1103 else:
1104 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001105 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001106
1107 # XXX should the partial digests be encoded too?
1108
1109 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001110 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001111 respdig)
1112 if opaque:
1113 base += ', opaque="%s"' % opaque
1114 if entdig:
1115 base += ', digest="%s"' % entdig
1116 base += ', algorithm="%s"' % algorithm
1117 if qop:
1118 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1119 return base
1120
1121 def get_algorithm_impls(self, algorithm):
1122 # lambdas assume digest modules are imported at the top level
1123 if algorithm == 'MD5':
1124 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1125 elif algorithm == 'SHA':
1126 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1127 # XXX MD5-sess
1128 KD = lambda s, d: H("%s:%s" % (s, d))
1129 return H, KD
1130
1131 def get_entity_digest(self, data, chal):
1132 # XXX not implemented yet
1133 return None
1134
1135
1136class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1137 """An authentication protocol defined by RFC 2069
1138
1139 Digest authentication improves on basic authentication because it
1140 does not transmit passwords in the clear.
1141 """
1142
1143 auth_header = 'Authorization'
1144 handler_order = 490 # before Basic auth
1145
1146 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001147 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001148 retry = self.http_error_auth_reqed('www-authenticate',
1149 host, req, headers)
1150 self.reset_retry_count()
1151 return retry
1152
1153
1154class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1155
1156 auth_header = 'Proxy-Authorization'
1157 handler_order = 490 # before Basic auth
1158
1159 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001160 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001161 retry = self.http_error_auth_reqed('proxy-authenticate',
1162 host, req, headers)
1163 self.reset_retry_count()
1164 return retry
1165
1166class AbstractHTTPHandler(BaseHandler):
1167
1168 def __init__(self, debuglevel=0):
1169 self._debuglevel = debuglevel
1170
1171 def set_http_debuglevel(self, level):
1172 self._debuglevel = level
1173
1174 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001175 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001176 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001177 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001178
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001179 if request.data is not None: # POST
1180 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001181 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001182 msg = "POST data should be bytes or an iterable of bytes. " \
1183 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001184 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001185 if not request.has_header('Content-type'):
1186 request.add_unredirected_header(
1187 'Content-type',
1188 'application/x-www-form-urlencoded')
1189 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001190 try:
1191 mv = memoryview(data)
1192 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001193 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001194 raise ValueError("Content-Length should be specified "
1195 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001196 data))
1197 else:
1198 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001199 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001200
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001201 sel_host = host
1202 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001203 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001204 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001205 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001206 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001207 for name, value in self.parent.addheaders:
1208 name = name.capitalize()
1209 if not request.has_header(name):
1210 request.add_unredirected_header(name, value)
1211
1212 return request
1213
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001214 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001215 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001216
1217 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001218 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001219 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001220 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001221 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001222
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001223 # will parse host:port
1224 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001225
1226 headers = dict(req.unredirected_hdrs)
1227 headers.update(dict((k, v) for k, v in req.headers.items()
1228 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001229
1230 # TODO(jhylton): Should this be redesigned to handle
1231 # persistent connections?
1232
1233 # We want to make an HTTP/1.1 request, but the addinfourl
1234 # class isn't prepared to deal with a persistent connection.
1235 # It will try to read all remaining data from the socket,
1236 # which will block while the server waits for the next request.
1237 # So make sure the connection gets closed after the (only)
1238 # request.
1239 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001240 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001241
1242 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001243 tunnel_headers = {}
1244 proxy_auth_hdr = "Proxy-Authorization"
1245 if proxy_auth_hdr in headers:
1246 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1247 # Proxy-Authorization should not be sent to origin
1248 # server.
1249 del headers[proxy_auth_hdr]
1250 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001251
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001252 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001253 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001254 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001255 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001256 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001257 else:
1258 r = h.getresponse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001259
Senthil Kumaran26430412011-04-13 07:01:19 +08001260 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001261 # This line replaces the .msg attribute of the HTTPResponse
1262 # with .headers, because urllib clients expect the response to
1263 # have the reason in .msg. It would be good to mark this
1264 # attribute is deprecated and get then to use info() or
1265 # .headers.
1266 r.msg = r.reason
1267 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001268
1269
1270class HTTPHandler(AbstractHTTPHandler):
1271
1272 def http_open(self, req):
1273 return self.do_open(http.client.HTTPConnection, req)
1274
1275 http_request = AbstractHTTPHandler.do_request_
1276
1277if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001278
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001279 class HTTPSHandler(AbstractHTTPHandler):
1280
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001281 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1282 AbstractHTTPHandler.__init__(self, debuglevel)
1283 self._context = context
1284 self._check_hostname = check_hostname
1285
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001286 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001287 return self.do_open(http.client.HTTPSConnection, req,
1288 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001289
1290 https_request = AbstractHTTPHandler.do_request_
1291
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001292 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001293
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001294class HTTPCookieProcessor(BaseHandler):
1295 def __init__(self, cookiejar=None):
1296 import http.cookiejar
1297 if cookiejar is None:
1298 cookiejar = http.cookiejar.CookieJar()
1299 self.cookiejar = cookiejar
1300
1301 def http_request(self, request):
1302 self.cookiejar.add_cookie_header(request)
1303 return request
1304
1305 def http_response(self, request, response):
1306 self.cookiejar.extract_cookies(response, request)
1307 return response
1308
1309 https_request = http_request
1310 https_response = http_response
1311
1312class UnknownHandler(BaseHandler):
1313 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001314 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001315 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001316
1317def parse_keqv_list(l):
1318 """Parse list of key=value strings where keys are not duplicated."""
1319 parsed = {}
1320 for elt in l:
1321 k, v = elt.split('=', 1)
1322 if v[0] == '"' and v[-1] == '"':
1323 v = v[1:-1]
1324 parsed[k] = v
1325 return parsed
1326
1327def parse_http_list(s):
1328 """Parse lists as described by RFC 2068 Section 2.
1329
1330 In particular, parse comma-separated lists where the elements of
1331 the list may include quoted-strings. A quoted-string could
1332 contain a comma. A non-quoted string could have quotes in the
1333 middle. Neither commas nor quotes count if they are escaped.
1334 Only double-quotes count, not single-quotes.
1335 """
1336 res = []
1337 part = ''
1338
1339 escape = quote = False
1340 for cur in s:
1341 if escape:
1342 part += cur
1343 escape = False
1344 continue
1345 if quote:
1346 if cur == '\\':
1347 escape = True
1348 continue
1349 elif cur == '"':
1350 quote = False
1351 part += cur
1352 continue
1353
1354 if cur == ',':
1355 res.append(part)
1356 part = ''
1357 continue
1358
1359 if cur == '"':
1360 quote = True
1361
1362 part += cur
1363
1364 # append last part
1365 if part:
1366 res.append(part)
1367
1368 return [part.strip() for part in res]
1369
1370class FileHandler(BaseHandler):
1371 # Use local file or FTP depending on form of URL
1372 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001373 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001374 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1375 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001376 if not req.host is self.get_names():
1377 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001378 else:
1379 return self.open_local_file(req)
1380
1381 # names for the localhost
1382 names = None
1383 def get_names(self):
1384 if FileHandler.names is None:
1385 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001386 FileHandler.names = tuple(
1387 socket.gethostbyname_ex('localhost')[2] +
1388 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001389 except socket.gaierror:
1390 FileHandler.names = (socket.gethostbyname('localhost'),)
1391 return FileHandler.names
1392
1393 # not entirely sure what the rules are here
1394 def open_local_file(self, req):
1395 import email.utils
1396 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001397 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001398 filename = req.selector
1399 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001400 try:
1401 stats = os.stat(localfile)
1402 size = stats.st_size
1403 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001404 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001405 headers = email.message_from_string(
1406 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1407 (mtype or 'text/plain', size, modified))
1408 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001409 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001410 if not host or \
1411 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001412 if host:
1413 origurl = 'file://' + host + filename
1414 else:
1415 origurl = 'file://' + filename
1416 return addinfourl(open(localfile, 'rb'), headers, origurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001417 except OSError as msg:
Georg Brandl029986a2008-06-23 11:44:14 +00001418 # users shouldn't expect OSErrors coming from urlopen()
Georg Brandl13e89462008-07-01 19:56:00 +00001419 raise URLError(msg)
1420 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001421
1422def _safe_gethostbyname(host):
1423 try:
1424 return socket.gethostbyname(host)
1425 except socket.gaierror:
1426 return None
1427
1428class FTPHandler(BaseHandler):
1429 def ftp_open(self, req):
1430 import ftplib
1431 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001432 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001433 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001434 raise URLError('ftp error: no host given')
1435 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001436 if port is None:
1437 port = ftplib.FTP_PORT
1438 else:
1439 port = int(port)
1440
1441 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001442 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001443 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001444 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001445 else:
1446 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001447 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001448 user = user or ''
1449 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001450
1451 try:
1452 host = socket.gethostbyname(host)
1453 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001454 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001455 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001456 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001457 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001458 dirs, file = dirs[:-1], dirs[-1]
1459 if dirs and not dirs[0]:
1460 dirs = dirs[1:]
1461 try:
1462 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1463 type = file and 'I' or 'D'
1464 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001465 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001466 if attr.lower() == 'type' and \
1467 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1468 type = value.upper()
1469 fp, retrlen = fw.retrfile(file, type)
1470 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001471 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001472 if mtype:
1473 headers += "Content-type: %s\n" % mtype
1474 if retrlen is not None and retrlen >= 0:
1475 headers += "Content-length: %d\n" % retrlen
1476 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001477 return addinfourl(fp, headers, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001478 except ftplib.all_errors as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001479 exc = URLError('ftp error: %s' % msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001480 raise exc.with_traceback(sys.exc_info()[2])
1481
1482 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001483 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1484 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001485
1486class CacheFTPHandler(FTPHandler):
1487 # XXX would be nice to have pluggable cache strategies
1488 # XXX this stuff is definitely not thread safe
1489 def __init__(self):
1490 self.cache = {}
1491 self.timeout = {}
1492 self.soonest = 0
1493 self.delay = 60
1494 self.max_conns = 16
1495
1496 def setTimeout(self, t):
1497 self.delay = t
1498
1499 def setMaxConns(self, m):
1500 self.max_conns = m
1501
1502 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1503 key = user, host, port, '/'.join(dirs), timeout
1504 if key in self.cache:
1505 self.timeout[key] = time.time() + self.delay
1506 else:
1507 self.cache[key] = ftpwrapper(user, passwd, host, port,
1508 dirs, timeout)
1509 self.timeout[key] = time.time() + self.delay
1510 self.check_cache()
1511 return self.cache[key]
1512
1513 def check_cache(self):
1514 # first check for old ones
1515 t = time.time()
1516 if self.soonest <= t:
1517 for k, v in list(self.timeout.items()):
1518 if v < t:
1519 self.cache[k].close()
1520 del self.cache[k]
1521 del self.timeout[k]
1522 self.soonest = min(list(self.timeout.values()))
1523
1524 # then check the size
1525 if len(self.cache) == self.max_conns:
1526 for k, v in list(self.timeout.items()):
1527 if v == self.soonest:
1528 del self.cache[k]
1529 del self.timeout[k]
1530 break
1531 self.soonest = min(list(self.timeout.values()))
1532
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001533 def clear_cache(self):
1534 for conn in self.cache.values():
1535 conn.close()
1536 self.cache.clear()
1537 self.timeout.clear()
1538
1539
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001540# Code move from the old urllib module
1541
1542MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1543
1544# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001545if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001546 from nturl2path import url2pathname, pathname2url
1547else:
1548 def url2pathname(pathname):
1549 """OS-specific conversion from a relative URL of the 'file' scheme
1550 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001551 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001552
1553 def pathname2url(pathname):
1554 """OS-specific conversion from a file system path to a relative URL
1555 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001556 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001557
1558# This really consists of two pieces:
1559# (1) a class which handles opening of all sorts of URLs
1560# (plus assorted utilities etc.)
1561# (2) a set of functions for parsing URLs
1562# XXX Should these be separated out into different modules?
1563
1564
1565ftpcache = {}
1566class URLopener:
1567 """Class to open URLs.
1568 This is a class rather than just a subroutine because we may need
1569 more than one set of global protocol-specific options.
1570 Note -- this is a base class for those who don't want the
1571 automatic handling of errors type 302 (relocated) and 401
1572 (authorization needed)."""
1573
1574 __tempfiles = None
1575
1576 version = "Python-urllib/%s" % __version__
1577
1578 # Constructor
1579 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001580 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001581 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1582 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001583 if proxies is None:
1584 proxies = getproxies()
1585 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1586 self.proxies = proxies
1587 self.key_file = x509.get('key_file')
1588 self.cert_file = x509.get('cert_file')
1589 self.addheaders = [('User-Agent', self.version)]
1590 self.__tempfiles = []
1591 self.__unlink = os.unlink # See cleanup()
1592 self.tempcache = None
1593 # Undocumented feature: if you assign {} to tempcache,
1594 # it is used to cache files retrieved with
1595 # self.retrieve(). This is not enabled by default
1596 # since it does not work for changing documents (and I
1597 # haven't got the logic to check expiration headers
1598 # yet).
1599 self.ftpcache = ftpcache
1600 # Undocumented feature: you can use a different
1601 # ftp cache by assigning to the .ftpcache member;
1602 # in case you want logically independent URL openers
1603 # XXX This is not threadsafe. Bah.
1604
1605 def __del__(self):
1606 self.close()
1607
1608 def close(self):
1609 self.cleanup()
1610
1611 def cleanup(self):
1612 # This code sometimes runs when the rest of this module
1613 # has already been deleted, so it can't use any globals
1614 # or import anything.
1615 if self.__tempfiles:
1616 for file in self.__tempfiles:
1617 try:
1618 self.__unlink(file)
1619 except OSError:
1620 pass
1621 del self.__tempfiles[:]
1622 if self.tempcache:
1623 self.tempcache.clear()
1624
1625 def addheader(self, *args):
1626 """Add a header to be used by the HTTP interface only
1627 e.g. u.addheader('Accept', 'sound/basic')"""
1628 self.addheaders.append(args)
1629
1630 # External interface
1631 def open(self, fullurl, data=None):
1632 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001633 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001634 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001635 if self.tempcache and fullurl in self.tempcache:
1636 filename, headers = self.tempcache[fullurl]
1637 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001638 return addinfourl(fp, headers, fullurl)
1639 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001640 if not urltype:
1641 urltype = 'file'
1642 if urltype in self.proxies:
1643 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001644 urltype, proxyhost = splittype(proxy)
1645 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001646 url = (host, fullurl) # Signal special case to open_*()
1647 else:
1648 proxy = None
1649 name = 'open_' + urltype
1650 self.type = urltype
1651 name = name.replace('-', '_')
1652 if not hasattr(self, name):
1653 if proxy:
1654 return self.open_unknown_proxy(proxy, fullurl, data)
1655 else:
1656 return self.open_unknown(fullurl, data)
1657 try:
1658 if data is None:
1659 return getattr(self, name)(url)
1660 else:
1661 return getattr(self, name)(url, data)
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001662 except HTTPError:
1663 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001664 except socket.error as msg:
1665 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1666
1667 def open_unknown(self, fullurl, data=None):
1668 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001669 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001670 raise IOError('url error', 'unknown url type', type)
1671
1672 def open_unknown_proxy(self, proxy, fullurl, data=None):
1673 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001674 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001675 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1676
1677 # External interface
1678 def retrieve(self, url, filename=None, reporthook=None, data=None):
1679 """retrieve(url) returns (filename, headers) for a local object
1680 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001681 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001682 if self.tempcache and url in self.tempcache:
1683 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001684 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001685 if filename is None and (not type or type == 'file'):
1686 try:
1687 fp = self.open_local_file(url1)
1688 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001689 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001690 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001691 except IOError as msg:
1692 pass
1693 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001694 try:
1695 headers = fp.info()
1696 if filename:
1697 tfp = open(filename, 'wb')
1698 else:
1699 import tempfile
1700 garbage, path = splittype(url)
1701 garbage, path = splithost(path or "")
1702 path, garbage = splitquery(path or "")
1703 path, garbage = splitattr(path or "")
1704 suffix = os.path.splitext(path)[1]
1705 (fd, filename) = tempfile.mkstemp(suffix)
1706 self.__tempfiles.append(filename)
1707 tfp = os.fdopen(fd, 'wb')
1708 try:
1709 result = filename, headers
1710 if self.tempcache is not None:
1711 self.tempcache[url] = result
1712 bs = 1024*8
1713 size = -1
1714 read = 0
1715 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001716 if "content-length" in headers:
1717 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001718 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001719 reporthook(blocknum, bs, size)
1720 while 1:
1721 block = fp.read(bs)
1722 if not block:
1723 break
1724 read += len(block)
1725 tfp.write(block)
1726 blocknum += 1
1727 if reporthook:
1728 reporthook(blocknum, bs, size)
1729 finally:
1730 tfp.close()
1731 finally:
1732 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001733
1734 # raise exception if actual size does not match content-length header
1735 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001736 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001737 "retrieval incomplete: got only %i out of %i bytes"
1738 % (read, size), result)
1739
1740 return result
1741
1742 # Each method named open_<type> knows how to open that type of URL
1743
1744 def _open_generic_http(self, connection_factory, url, data):
1745 """Make an HTTP connection using connection_class.
1746
1747 This is an internal method that should be called from
1748 open_http() or open_https().
1749
1750 Arguments:
1751 - connection_factory should take a host name and return an
1752 HTTPConnection instance.
1753 - url is the url to retrieval or a host, relative-path pair.
1754 - data is payload for a POST request or None.
1755 """
1756
1757 user_passwd = None
1758 proxy_passwd= None
1759 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001760 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001761 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001762 user_passwd, host = splituser(host)
1763 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001764 realhost = host
1765 else:
1766 host, selector = url
1767 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001768 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001769 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001770 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001771 url = rest
1772 user_passwd = None
1773 if urltype.lower() != 'http':
1774 realhost = None
1775 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001776 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001777 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001778 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001779 if user_passwd:
1780 selector = "%s://%s%s" % (urltype, realhost, rest)
1781 if proxy_bypass(realhost):
1782 host = realhost
1783
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001784 if not host: raise IOError('http error', 'no host given')
1785
1786 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001787 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001788 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001789 else:
1790 proxy_auth = None
1791
1792 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001793 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001794 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001795 else:
1796 auth = None
1797 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001798 headers = {}
1799 if proxy_auth:
1800 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1801 if auth:
1802 headers["Authorization"] = "Basic %s" % auth
1803 if realhost:
1804 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001805
1806 # Add Connection:close as we don't support persistent connections yet.
1807 # This helps in closing the socket and avoiding ResourceWarning
1808
1809 headers["Connection"] = "close"
1810
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001811 for header, value in self.addheaders:
1812 headers[header] = value
1813
1814 if data is not None:
1815 headers["Content-Type"] = "application/x-www-form-urlencoded"
1816 http_conn.request("POST", selector, data, headers)
1817 else:
1818 http_conn.request("GET", selector, headers=headers)
1819
1820 try:
1821 response = http_conn.getresponse()
1822 except http.client.BadStatusLine:
1823 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001824 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001825
1826 # According to RFC 2616, "2xx" code indicates that the client's
1827 # request was successfully received, understood, and accepted.
1828 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001829 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001830 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001831 else:
1832 return self.http_error(
1833 url, response.fp,
1834 response.status, response.reason, response.msg, data)
1835
1836 def open_http(self, url, data=None):
1837 """Use HTTP protocol."""
1838 return self._open_generic_http(http.client.HTTPConnection, url, data)
1839
1840 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1841 """Handle http errors.
1842
1843 Derived class can override this, or provide specific handlers
1844 named http_error_DDD where DDD is the 3-digit error code."""
1845 # First check if there's a specific handler for this error
1846 name = 'http_error_%d' % errcode
1847 if hasattr(self, name):
1848 method = getattr(self, name)
1849 if data is None:
1850 result = method(url, fp, errcode, errmsg, headers)
1851 else:
1852 result = method(url, fp, errcode, errmsg, headers, data)
1853 if result: return result
1854 return self.http_error_default(url, fp, errcode, errmsg, headers)
1855
1856 def http_error_default(self, url, fp, errcode, errmsg, headers):
1857 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001858 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001859 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001860
1861 if _have_ssl:
1862 def _https_connection(self, host):
1863 return http.client.HTTPSConnection(host,
1864 key_file=self.key_file,
1865 cert_file=self.cert_file)
1866
1867 def open_https(self, url, data=None):
1868 """Use HTTPS protocol."""
1869 return self._open_generic_http(self._https_connection, url, data)
1870
1871 def open_file(self, url):
1872 """Use local file or FTP depending on form of URL."""
1873 if not isinstance(url, str):
1874 raise URLError('file error', 'proxy support for file protocol currently not implemented')
1875 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001876 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001877 else:
1878 return self.open_local_file(url)
1879
1880 def open_local_file(self, url):
1881 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001882 import email.utils
1883 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001884 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001885 localname = url2pathname(file)
1886 try:
1887 stats = os.stat(localname)
1888 except OSError as e:
1889 raise URLError(e.errno, e.strerror, e.filename)
1890 size = stats.st_size
1891 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1892 mtype = mimetypes.guess_type(url)[0]
1893 headers = email.message_from_string(
1894 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1895 (mtype or 'text/plain', size, modified))
1896 if not host:
1897 urlfile = file
1898 if file[:1] == '/':
1899 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001900 return addinfourl(open(localname, 'rb'), headers, urlfile)
1901 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001902 if (not port
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001903 and socket.gethostbyname(host) in (localhost() + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001904 urlfile = file
1905 if file[:1] == '/':
1906 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001907 elif file[:2] == './':
1908 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001909 return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001910 raise URLError('local file error', 'not on local host')
1911
1912 def open_ftp(self, url):
1913 """Use FTP protocol."""
1914 if not isinstance(url, str):
1915 raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
1916 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001917 host, path = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001918 if not host: raise URLError('ftp error', 'no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001919 host, port = splitport(host)
1920 user, host = splituser(host)
1921 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001922 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001923 host = unquote(host)
1924 user = unquote(user or '')
1925 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001926 host = socket.gethostbyname(host)
1927 if not port:
1928 import ftplib
1929 port = ftplib.FTP_PORT
1930 else:
1931 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001932 path, attrs = splitattr(path)
1933 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001934 dirs = path.split('/')
1935 dirs, file = dirs[:-1], dirs[-1]
1936 if dirs and not dirs[0]: dirs = dirs[1:]
1937 if dirs and not dirs[0]: dirs[0] = '/'
1938 key = user, host, port, '/'.join(dirs)
1939 # XXX thread unsafe!
1940 if len(self.ftpcache) > MAXFTPCACHE:
1941 # Prune the cache, rather arbitrarily
1942 for k in self.ftpcache.keys():
1943 if k != key:
1944 v = self.ftpcache[k]
1945 del self.ftpcache[k]
1946 v.close()
1947 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001948 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001949 self.ftpcache[key] = \
1950 ftpwrapper(user, passwd, host, port, dirs)
1951 if not file: type = 'D'
1952 else: type = 'I'
1953 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001954 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001955 if attr.lower() == 'type' and \
1956 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1957 type = value.upper()
1958 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1959 mtype = mimetypes.guess_type("ftp:" + url)[0]
1960 headers = ""
1961 if mtype:
1962 headers += "Content-Type: %s\n" % mtype
1963 if retrlen is not None and retrlen >= 0:
1964 headers += "Content-Length: %d\n" % retrlen
1965 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001966 return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001967 except ftperrors() as msg:
1968 raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
1969
1970 def open_data(self, url, data=None):
1971 """Use "data" URL."""
1972 if not isinstance(url, str):
1973 raise URLError('data error', 'proxy support for data protocol currently not implemented')
1974 # ignore POSTed data
1975 #
1976 # syntax of data URLs:
1977 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1978 # mediatype := [ type "/" subtype ] *( ";" parameter )
1979 # data := *urlchar
1980 # parameter := attribute "=" value
1981 try:
1982 [type, data] = url.split(',', 1)
1983 except ValueError:
1984 raise IOError('data error', 'bad data URL')
1985 if not type:
1986 type = 'text/plain;charset=US-ASCII'
1987 semi = type.rfind(';')
1988 if semi >= 0 and '=' not in type[semi:]:
1989 encoding = type[semi+1:]
1990 type = type[:semi]
1991 else:
1992 encoding = ''
1993 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001994 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001995 time.gmtime(time.time())))
1996 msg.append('Content-type: %s' % type)
1997 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00001998 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001999 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002000 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002001 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002002 msg.append('Content-Length: %d' % len(data))
2003 msg.append('')
2004 msg.append(data)
2005 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002006 headers = email.message_from_string(msg)
2007 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002008 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002009 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002010
2011
2012class FancyURLopener(URLopener):
2013 """Derived class with handlers for errors we can handle (perhaps)."""
2014
2015 def __init__(self, *args, **kwargs):
2016 URLopener.__init__(self, *args, **kwargs)
2017 self.auth_cache = {}
2018 self.tries = 0
2019 self.maxtries = 10
2020
2021 def http_error_default(self, url, fp, errcode, errmsg, headers):
2022 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002023 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002024
2025 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2026 """Error 302 -- relocated (temporarily)."""
2027 self.tries += 1
2028 if self.maxtries and self.tries >= self.maxtries:
2029 if hasattr(self, "http_error_500"):
2030 meth = self.http_error_500
2031 else:
2032 meth = self.http_error_default
2033 self.tries = 0
2034 return meth(url, fp, 500,
2035 "Internal Server Error: Redirect Recursion", headers)
2036 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2037 data)
2038 self.tries = 0
2039 return result
2040
2041 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2042 if 'location' in headers:
2043 newurl = headers['location']
2044 elif 'uri' in headers:
2045 newurl = headers['uri']
2046 else:
2047 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002048 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002049
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002050 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002051 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002052
2053 urlparts = urlparse(newurl)
2054
2055 # For security reasons, we don't allow redirection to anything other
2056 # than http, https and ftp.
2057
2058 # We are using newer HTTPError with older redirect_internal method
2059 # This older method will get deprecated in 3.3
2060
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002061 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002062 raise HTTPError(newurl, errcode,
2063 errmsg +
2064 " Redirection to url '%s' is not allowed." % newurl,
2065 headers, fp)
2066
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002067 return self.open(newurl)
2068
2069 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2070 """Error 301 -- also relocated (permanently)."""
2071 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2072
2073 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2074 """Error 303 -- also relocated (essentially identical to 302)."""
2075 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2076
2077 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2078 """Error 307 -- relocated, but turn POST into error."""
2079 if data is None:
2080 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2081 else:
2082 return self.http_error_default(url, fp, errcode, errmsg, headers)
2083
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002084 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2085 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002086 """Error 401 -- authentication required.
2087 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002088 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002089 URLopener.http_error_default(self, url, fp,
2090 errcode, errmsg, headers)
2091 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002092 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2093 if not match:
2094 URLopener.http_error_default(self, url, fp,
2095 errcode, errmsg, headers)
2096 scheme, realm = match.groups()
2097 if scheme.lower() != 'basic':
2098 URLopener.http_error_default(self, url, fp,
2099 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002100 if not retry:
2101 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2102 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002103 name = 'retry_' + self.type + '_basic_auth'
2104 if data is None:
2105 return getattr(self,name)(url, realm)
2106 else:
2107 return getattr(self,name)(url, realm, data)
2108
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002109 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2110 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002111 """Error 407 -- proxy authentication required.
2112 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002113 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002114 URLopener.http_error_default(self, url, fp,
2115 errcode, errmsg, headers)
2116 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002117 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2118 if not match:
2119 URLopener.http_error_default(self, url, fp,
2120 errcode, errmsg, headers)
2121 scheme, realm = match.groups()
2122 if scheme.lower() != 'basic':
2123 URLopener.http_error_default(self, url, fp,
2124 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002125 if not retry:
2126 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2127 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002128 name = 'retry_proxy_' + self.type + '_basic_auth'
2129 if data is None:
2130 return getattr(self,name)(url, realm)
2131 else:
2132 return getattr(self,name)(url, realm, data)
2133
2134 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002135 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002136 newurl = 'http://' + host + selector
2137 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002138 urltype, proxyhost = splittype(proxy)
2139 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002140 i = proxyhost.find('@') + 1
2141 proxyhost = proxyhost[i:]
2142 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2143 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002144 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002145 quote(passwd, safe=''), proxyhost)
2146 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2147 if data is None:
2148 return self.open(newurl)
2149 else:
2150 return self.open(newurl, data)
2151
2152 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002153 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002154 newurl = 'https://' + host + selector
2155 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002156 urltype, proxyhost = splittype(proxy)
2157 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002158 i = proxyhost.find('@') + 1
2159 proxyhost = proxyhost[i:]
2160 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2161 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002162 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002163 quote(passwd, safe=''), proxyhost)
2164 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2165 if data is None:
2166 return self.open(newurl)
2167 else:
2168 return self.open(newurl, data)
2169
2170 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002171 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002172 i = host.find('@') + 1
2173 host = host[i:]
2174 user, passwd = self.get_user_passwd(host, realm, i)
2175 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002176 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002177 quote(passwd, safe=''), host)
2178 newurl = 'http://' + host + selector
2179 if data is None:
2180 return self.open(newurl)
2181 else:
2182 return self.open(newurl, data)
2183
2184 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002185 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002186 i = host.find('@') + 1
2187 host = host[i:]
2188 user, passwd = self.get_user_passwd(host, realm, i)
2189 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002190 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002191 quote(passwd, safe=''), host)
2192 newurl = 'https://' + host + selector
2193 if data is None:
2194 return self.open(newurl)
2195 else:
2196 return self.open(newurl, data)
2197
Florent Xicluna757445b2010-05-17 17:24:07 +00002198 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002199 key = realm + '@' + host.lower()
2200 if key in self.auth_cache:
2201 if clear_cache:
2202 del self.auth_cache[key]
2203 else:
2204 return self.auth_cache[key]
2205 user, passwd = self.prompt_user_passwd(host, realm)
2206 if user or passwd: self.auth_cache[key] = (user, passwd)
2207 return user, passwd
2208
2209 def prompt_user_passwd(self, host, realm):
2210 """Override this in a GUI environment!"""
2211 import getpass
2212 try:
2213 user = input("Enter username for %s at %s: " % (realm, host))
2214 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2215 (user, realm, host))
2216 return user, passwd
2217 except KeyboardInterrupt:
2218 print()
2219 return None, None
2220
2221
2222# Utility functions
2223
2224_localhost = None
2225def localhost():
2226 """Return the IP address of the magic hostname 'localhost'."""
2227 global _localhost
2228 if _localhost is None:
2229 _localhost = socket.gethostbyname('localhost')
2230 return _localhost
2231
2232_thishost = None
2233def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002234 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002235 global _thishost
2236 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002237 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002238 return _thishost
2239
2240_ftperrors = None
2241def ftperrors():
2242 """Return the set of errors raised by the FTP class."""
2243 global _ftperrors
2244 if _ftperrors is None:
2245 import ftplib
2246 _ftperrors = ftplib.all_errors
2247 return _ftperrors
2248
2249_noheaders = None
2250def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002251 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002252 global _noheaders
2253 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002254 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002255 return _noheaders
2256
2257
2258# Utility classes
2259
2260class ftpwrapper:
2261 """Class used by open_ftp() for cache of open FTP connections."""
2262
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002263 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2264 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002265 self.user = user
2266 self.passwd = passwd
2267 self.host = host
2268 self.port = port
2269 self.dirs = dirs
2270 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002271 self.refcount = 0
2272 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002273 self.init()
2274
2275 def init(self):
2276 import ftplib
2277 self.busy = 0
2278 self.ftp = ftplib.FTP()
2279 self.ftp.connect(self.host, self.port, self.timeout)
2280 self.ftp.login(self.user, self.passwd)
2281 for dir in self.dirs:
2282 self.ftp.cwd(dir)
2283
2284 def retrfile(self, file, type):
2285 import ftplib
2286 self.endtransfer()
2287 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2288 else: cmd = 'TYPE ' + type; isdir = 0
2289 try:
2290 self.ftp.voidcmd(cmd)
2291 except ftplib.all_errors:
2292 self.init()
2293 self.ftp.voidcmd(cmd)
2294 conn = None
2295 if file and not isdir:
2296 # Try to retrieve as a file
2297 try:
2298 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002299 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002300 except ftplib.error_perm as reason:
2301 if str(reason)[:3] != '550':
Georg Brandl13e89462008-07-01 19:56:00 +00002302 raise URLError('ftp error', reason).with_traceback(
2303 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002304 if not conn:
2305 # Set transfer mode to ASCII!
2306 self.ftp.voidcmd('TYPE A')
2307 # Try a directory listing. Verify that directory exists.
2308 if file:
2309 pwd = self.ftp.pwd()
2310 try:
2311 try:
2312 self.ftp.cwd(file)
2313 except ftplib.error_perm as reason:
Georg Brandl13e89462008-07-01 19:56:00 +00002314 raise URLError('ftp error', reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002315 finally:
2316 self.ftp.cwd(pwd)
2317 cmd = 'LIST ' + file
2318 else:
2319 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002320 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002321 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002322
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002323 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2324 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002325 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002326 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002327 return (ftpobj, retrlen)
2328
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002329 def endtransfer(self):
2330 if not self.busy:
2331 return
2332 self.busy = 0
2333 try:
2334 self.ftp.voidresp()
2335 except ftperrors():
2336 pass
2337
2338 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002339 self.keepalive = False
2340 if self.refcount <= 0:
2341 self.real_close()
2342
2343 def file_close(self):
2344 self.endtransfer()
2345 self.refcount -= 1
2346 if self.refcount <= 0 and not self.keepalive:
2347 self.real_close()
2348
2349 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002350 self.endtransfer()
2351 try:
2352 self.ftp.close()
2353 except ftperrors():
2354 pass
2355
2356# Proxy handling
2357def getproxies_environment():
2358 """Return a dictionary of scheme -> proxy server URL mappings.
2359
2360 Scan the environment for variables named <scheme>_proxy;
2361 this seems to be the standard convention. If you need a
2362 different way, you can pass a proxies dictionary to the
2363 [Fancy]URLopener constructor.
2364
2365 """
2366 proxies = {}
2367 for name, value in os.environ.items():
2368 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002369 if value and name[-6:] == '_proxy':
2370 proxies[name[:-6]] = value
2371 return proxies
2372
2373def proxy_bypass_environment(host):
2374 """Test if proxies should not be used for a particular host.
2375
2376 Checks the environment for a variable named no_proxy, which should
2377 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2378 """
2379 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2380 # '*' is special case for always bypass
2381 if no_proxy == '*':
2382 return 1
2383 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002384 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002385 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002386 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2387 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002388 if name and (hostonly.endswith(name) or host.endswith(name)):
2389 return 1
2390 # otherwise, don't bypass
2391 return 0
2392
2393
Ronald Oussorene72e1612011-03-14 18:15:25 -04002394# This code tests an OSX specific data structure but is testable on all
2395# platforms
2396def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2397 """
2398 Return True iff this host shouldn't be accessed using a proxy
2399
2400 This function uses the MacOSX framework SystemConfiguration
2401 to fetch the proxy information.
2402
2403 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2404 { 'exclude_simple': bool,
2405 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2406 }
2407 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002408 from fnmatch import fnmatch
2409
2410 hostonly, port = splitport(host)
2411
2412 def ip2num(ipAddr):
2413 parts = ipAddr.split('.')
2414 parts = list(map(int, parts))
2415 if len(parts) != 4:
2416 parts = (parts + [0, 0, 0, 0])[:4]
2417 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2418
2419 # Check for simple host names:
2420 if '.' not in host:
2421 if proxy_settings['exclude_simple']:
2422 return True
2423
2424 hostIP = None
2425
2426 for value in proxy_settings.get('exceptions', ()):
2427 # Items in the list are strings like these: *.local, 169.254/16
2428 if not value: continue
2429
2430 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2431 if m is not None:
2432 if hostIP is None:
2433 try:
2434 hostIP = socket.gethostbyname(hostonly)
2435 hostIP = ip2num(hostIP)
2436 except socket.error:
2437 continue
2438
2439 base = ip2num(m.group(1))
2440 mask = m.group(2)
2441 if mask is None:
2442 mask = 8 * (m.group(1).count('.') + 1)
2443 else:
2444 mask = int(mask[1:])
2445 mask = 32 - mask
2446
2447 if (hostIP >> mask) == (base >> mask):
2448 return True
2449
2450 elif fnmatch(host, value):
2451 return True
2452
2453 return False
2454
2455
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002456if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002457 from _scproxy import _get_proxy_settings, _get_proxies
2458
2459 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002460 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002461 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002462
2463 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002464 """Return a dictionary of scheme -> proxy server URL mappings.
2465
Ronald Oussoren84151202010-04-18 20:46:11 +00002466 This function uses the MacOSX framework SystemConfiguration
2467 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002468 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002469 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002470
Ronald Oussoren84151202010-04-18 20:46:11 +00002471
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002472
2473 def proxy_bypass(host):
2474 if getproxies_environment():
2475 return proxy_bypass_environment(host)
2476 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002477 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002478
2479 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002480 return getproxies_environment() or getproxies_macosx_sysconf()
2481
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002482
2483elif os.name == 'nt':
2484 def getproxies_registry():
2485 """Return a dictionary of scheme -> proxy server URL mappings.
2486
2487 Win32 uses the registry to store proxies.
2488
2489 """
2490 proxies = {}
2491 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002492 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002493 except ImportError:
2494 # Std module, so should be around - but you never know!
2495 return proxies
2496 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002497 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002498 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002499 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002500 'ProxyEnable')[0]
2501 if proxyEnable:
2502 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002503 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002504 'ProxyServer')[0])
2505 if '=' in proxyServer:
2506 # Per-protocol settings
2507 for p in proxyServer.split(';'):
2508 protocol, address = p.split('=', 1)
2509 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002510 if not re.match('^([^/:]+)://', address):
2511 address = '%s://%s' % (protocol, address)
2512 proxies[protocol] = address
2513 else:
2514 # Use one setting for all protocols
2515 if proxyServer[:5] == 'http:':
2516 proxies['http'] = proxyServer
2517 else:
2518 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002519 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002520 proxies['ftp'] = 'ftp://%s' % proxyServer
2521 internetSettings.Close()
2522 except (WindowsError, ValueError, TypeError):
2523 # Either registry key not found etc, or the value in an
2524 # unexpected format.
2525 # proxies already set up to be empty so nothing to do
2526 pass
2527 return proxies
2528
2529 def getproxies():
2530 """Return a dictionary of scheme -> proxy server URL mappings.
2531
2532 Returns settings gathered from the environment, if specified,
2533 or the registry.
2534
2535 """
2536 return getproxies_environment() or getproxies_registry()
2537
2538 def proxy_bypass_registry(host):
2539 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002540 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002541 except ImportError:
2542 # Std modules, so should be around - but you never know!
2543 return 0
2544 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002545 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002546 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002547 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002548 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002549 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002550 'ProxyOverride')[0])
2551 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2552 except WindowsError:
2553 return 0
2554 if not proxyEnable or not proxyOverride:
2555 return 0
2556 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002557 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002558 host = [rawHost]
2559 try:
2560 addr = socket.gethostbyname(rawHost)
2561 if addr != rawHost:
2562 host.append(addr)
2563 except socket.error:
2564 pass
2565 try:
2566 fqdn = socket.getfqdn(rawHost)
2567 if fqdn != rawHost:
2568 host.append(fqdn)
2569 except socket.error:
2570 pass
2571 # make a check value list from the registry entry: replace the
2572 # '<local>' string by the localhost entry and the corresponding
2573 # canonical entry.
2574 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002575 # now check if we match one of the registry values.
2576 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002577 if test == '<local>':
2578 if '.' not in rawHost:
2579 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002580 test = test.replace(".", r"\.") # mask dots
2581 test = test.replace("*", r".*") # change glob sequence
2582 test = test.replace("?", r".") # change glob char
2583 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002584 if re.match(test, val, re.I):
2585 return 1
2586 return 0
2587
2588 def proxy_bypass(host):
2589 """Return a dictionary of scheme -> proxy server URL mappings.
2590
2591 Returns settings gathered from the environment, if specified,
2592 or the registry.
2593
2594 """
2595 if getproxies_environment():
2596 return proxy_bypass_environment(host)
2597 else:
2598 return proxy_bypass_registry(host)
2599
2600else:
2601 # By default use environment variables
2602 getproxies = getproxies_environment
2603 proxy_bypass = proxy_bypass_environment