blob: ef4bf7fb63b878677d7d7d6e2170a6f7331fca0f [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100106 splitattr, splitquery, splitvalue, splittag, to_bytes,
107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
124 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
Antoine Pitroudf204be2012-11-24 17:59:08 +0100125 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800126 'UnknownHandler', 'HTTPErrorProcessor',
127 # Functions
128 'urlopen', 'install_opener', 'build_opener',
129 'pathname2url', 'url2pathname', 'getproxies',
130 # Legacy interface
131 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
132]
133
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000134# used in User-Agent header sent
135__version__ = sys.version[:3]
136
137_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000138def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200139 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200141 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000142 if not _have_ssl:
143 raise ValueError('SSL support not available')
144 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
145 context.options |= ssl.OP_NO_SSLv2
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200146 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000147 context.verify_mode = ssl.CERT_REQUIRED
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200148 if cafile or capath:
149 context.load_verify_locations(cafile, capath)
150 else:
151 context.set_default_verify_paths()
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000152 check_hostname = True
153 else:
154 check_hostname = False
155 https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
156 opener = build_opener(https_handler)
157 elif _opener is None:
158 _opener = opener = build_opener()
159 else:
160 opener = _opener
161 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000162
163def install_opener(opener):
164 global _opener
165 _opener = opener
166
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700167_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000168def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700169 """
170 Retrieve a URL into a temporary location on disk.
171
172 Requires a URL argument. If a filename is passed, it is used as
173 the temporary file location. The reporthook argument should be
174 a callable that accepts a block number, a read size, and the
175 total file size of the URL target. The data argument should be
176 valid URL encoded data.
177
178 If a filename is passed and the URL points to a local resource,
179 the result is a copy from local file to new file.
180
181 Returns a tuple containing the path to the newly created
182 data file as well as the resulting HTTPMessage object.
183 """
184 url_type, path = splittype(url)
185
186 with contextlib.closing(urlopen(url, data)) as fp:
187 headers = fp.info()
188
189 # Just return the local path and the "headers" for file://
190 # URLs. No sense in performing a copy unless requested.
191 if url_type == "file" and not filename:
192 return os.path.normpath(path), headers
193
194 # Handle temporary file setup.
195 if filename:
196 tfp = open(filename, 'wb')
197 else:
198 tfp = tempfile.NamedTemporaryFile(delete=False)
199 filename = tfp.name
200 _url_tempfiles.append(filename)
201
202 with tfp:
203 result = filename, headers
204 bs = 1024*8
205 size = -1
206 read = 0
207 blocknum = 0
208 if "content-length" in headers:
209 size = int(headers["Content-Length"])
210
211 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800212 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700213
214 while True:
215 block = fp.read(bs)
216 if not block:
217 break
218 read += len(block)
219 tfp.write(block)
220 blocknum += 1
221 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800222 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700223
224 if size >= 0 and read < size:
225 raise ContentTooShortError(
226 "retrieval incomplete: got only %i out of %i bytes"
227 % (read, size), result)
228
229 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000230
231def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700232 for temp_file in _url_tempfiles:
233 try:
234 os.unlink(temp_file)
235 except EnvironmentError:
236 pass
237
238 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000239 global _opener
240 if _opener:
241 _opener = None
242
243# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000244_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000245def request_host(request):
246 """Return request-host, as defined by RFC 2965.
247
248 Variation from RFC: returned value is lowercased, for convenient
249 comparison.
250
251 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000252 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000253 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000254 if host == "":
255 host = request.get_header("Host", "")
256
257 # remove port, if present
258 host = _cut_port_re.sub("", host, 1)
259 return host.lower()
260
261class Request:
262
263 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800264 origin_req_host=None, unverifiable=False,
265 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000266 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaran45ce4dc2012-07-08 02:08:48 -0700267 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800268 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000269 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200270 self.unredirected_hdrs = {}
271 self._data = None
272 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000273 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000274 for key, value in headers.items():
275 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000276 if origin_req_host is None:
277 origin_req_host = request_host(self)
278 self.origin_req_host = origin_req_host
279 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800280 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000281 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000282
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200283 @property
284 def data(self):
285 return self._data
286
287 @data.setter
288 def data(self, data):
289 if data != self._data:
290 self._data = data
291 # issue 16464
292 # if we change data we need to remove content-length header
293 # (cause it's most probably calculated for previous value)
294 if self.has_header("Content-length"):
295 self.remove_header("Content-length")
296
297 @data.deleter
298 def data(self):
299 self._data = None
300
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000301 def _parse(self):
302 self.type, rest = splittype(self.full_url)
303 if self.type is None:
304 raise ValueError("unknown url type: %s" % self.full_url)
305 self.host, self.selector = splithost(rest)
306 if self.host:
307 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000308
309 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800310 """Return a string indicating the HTTP request method."""
311 if self.method is not None:
312 return self.method
313 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000314 return "POST"
315 else:
316 return "GET"
317
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000318 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800319 if self.fragment:
320 return '%s#%s' % (self.full_url, self.fragment)
321 else:
322 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000323
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700324 # Begin deprecated methods
325
326 def add_data(self, data):
327 msg = "Request.add_data method is deprecated."
328 warnings.warn(msg, DeprecationWarning, stacklevel=1)
329 self.data = data
330
331 def has_data(self):
332 msg = "Request.has_data method is deprecated."
333 warnings.warn(msg, DeprecationWarning, stacklevel=1)
334 return self.data is not None
335
336 def get_data(self):
337 msg = "Request.get_data method is deprecated."
338 warnings.warn(msg, DeprecationWarning, stacklevel=1)
339 return self.data
340
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000341 def get_type(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700342 msg = "Request.get_type method is deprecated."
343 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000344 return self.type
345
346 def get_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700347 msg = "Request.get_host method is deprecated."
348 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 return self.host
350
351 def get_selector(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700352 msg = "Request.get_selector method is deprecated."
353 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000354 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000355
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000356 def is_unverifiable(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700357 msg = "Request.is_unverifiable method is deprecated."
358 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000359 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000360
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000361 def get_origin_req_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700362 msg = "Request.get_origin_req_host method is deprecated."
363 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000364 return self.origin_req_host
365
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000366 # End deprecated methods
367
368 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000369 if self.type == 'https' and not self._tunnel_host:
370 self._tunnel_host = self.host
371 else:
372 self.type= type
373 self.selector = self.full_url
374 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000375
376 def has_proxy(self):
377 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000378
379 def add_header(self, key, val):
380 # useful for something like authentication
381 self.headers[key.capitalize()] = val
382
383 def add_unredirected_header(self, key, val):
384 # will not be added to a redirected request
385 self.unredirected_hdrs[key.capitalize()] = val
386
387 def has_header(self, header_name):
388 return (header_name in self.headers or
389 header_name in self.unredirected_hdrs)
390
391 def get_header(self, header_name, default=None):
392 return self.headers.get(
393 header_name,
394 self.unredirected_hdrs.get(header_name, default))
395
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200396 def remove_header(self, header_name):
397 self.headers.pop(header_name, None)
398 self.unredirected_hdrs.pop(header_name, None)
399
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400 def header_items(self):
401 hdrs = self.unredirected_hdrs.copy()
402 hdrs.update(self.headers)
403 return list(hdrs.items())
404
405class OpenerDirector:
406 def __init__(self):
407 client_version = "Python-urllib/%s" % __version__
408 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000409 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000410 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000411 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000412 self.handle_open = {}
413 self.handle_error = {}
414 self.process_response = {}
415 self.process_request = {}
416
417 def add_handler(self, handler):
418 if not hasattr(handler, "add_parent"):
419 raise TypeError("expected BaseHandler instance, got %r" %
420 type(handler))
421
422 added = False
423 for meth in dir(handler):
424 if meth in ["redirect_request", "do_open", "proxy_open"]:
425 # oops, coincidental match
426 continue
427
428 i = meth.find("_")
429 protocol = meth[:i]
430 condition = meth[i+1:]
431
432 if condition.startswith("error"):
433 j = condition.find("_") + i + 1
434 kind = meth[j+1:]
435 try:
436 kind = int(kind)
437 except ValueError:
438 pass
439 lookup = self.handle_error.get(protocol, {})
440 self.handle_error[protocol] = lookup
441 elif condition == "open":
442 kind = protocol
443 lookup = self.handle_open
444 elif condition == "response":
445 kind = protocol
446 lookup = self.process_response
447 elif condition == "request":
448 kind = protocol
449 lookup = self.process_request
450 else:
451 continue
452
453 handlers = lookup.setdefault(kind, [])
454 if handlers:
455 bisect.insort(handlers, handler)
456 else:
457 handlers.append(handler)
458 added = True
459
460 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000461 bisect.insort(self.handlers, handler)
462 handler.add_parent(self)
463
464 def close(self):
465 # Only exists for backwards compatibility.
466 pass
467
468 def _call_chain(self, chain, kind, meth_name, *args):
469 # Handlers raise an exception if no one else should try to handle
470 # the request, or return None if they can't but another handler
471 # could. Otherwise, they return the response.
472 handlers = chain.get(kind, ())
473 for handler in handlers:
474 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000475 result = func(*args)
476 if result is not None:
477 return result
478
479 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
480 # accept a URL or a Request object
481 if isinstance(fullurl, str):
482 req = Request(fullurl, data)
483 else:
484 req = fullurl
485 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000486 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000487
488 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000489 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000490
491 # pre-process request
492 meth_name = protocol+"_request"
493 for processor in self.process_request.get(protocol, []):
494 meth = getattr(processor, meth_name)
495 req = meth(req)
496
497 response = self._open(req, data)
498
499 # post-process response
500 meth_name = protocol+"_response"
501 for processor in self.process_response.get(protocol, []):
502 meth = getattr(processor, meth_name)
503 response = meth(req, response)
504
505 return response
506
507 def _open(self, req, data=None):
508 result = self._call_chain(self.handle_open, 'default',
509 'default_open', req)
510 if result:
511 return result
512
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000513 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000514 result = self._call_chain(self.handle_open, protocol, protocol +
515 '_open', req)
516 if result:
517 return result
518
519 return self._call_chain(self.handle_open, 'unknown',
520 'unknown_open', req)
521
522 def error(self, proto, *args):
523 if proto in ('http', 'https'):
524 # XXX http[s] protocols are special-cased
525 dict = self.handle_error['http'] # https is not different than http
526 proto = args[2] # YUCK!
527 meth_name = 'http_error_%s' % proto
528 http_err = 1
529 orig_args = args
530 else:
531 dict = self.handle_error
532 meth_name = proto + '_error'
533 http_err = 0
534 args = (dict, proto, meth_name) + args
535 result = self._call_chain(*args)
536 if result:
537 return result
538
539 if http_err:
540 args = (dict, 'default', 'http_error_default') + orig_args
541 return self._call_chain(*args)
542
543# XXX probably also want an abstract factory that knows when it makes
544# sense to skip a superclass in favor of a subclass and when it might
545# make sense to include both
546
547def build_opener(*handlers):
548 """Create an opener object from a list of handlers.
549
550 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000551 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000552
553 If any of the handlers passed as arguments are subclasses of the
554 default handlers, the default handlers will not be used.
555 """
556 def isclass(obj):
557 return isinstance(obj, type) or hasattr(obj, "__bases__")
558
559 opener = OpenerDirector()
560 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
561 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100562 FTPHandler, FileHandler, HTTPErrorProcessor,
563 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000564 if hasattr(http.client, "HTTPSConnection"):
565 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000566 skip = set()
567 for klass in default_classes:
568 for check in handlers:
569 if isclass(check):
570 if issubclass(check, klass):
571 skip.add(klass)
572 elif isinstance(check, klass):
573 skip.add(klass)
574 for klass in skip:
575 default_classes.remove(klass)
576
577 for klass in default_classes:
578 opener.add_handler(klass())
579
580 for h in handlers:
581 if isclass(h):
582 h = h()
583 opener.add_handler(h)
584 return opener
585
586class BaseHandler:
587 handler_order = 500
588
589 def add_parent(self, parent):
590 self.parent = parent
591
592 def close(self):
593 # Only exists for backwards compatibility
594 pass
595
596 def __lt__(self, other):
597 if not hasattr(other, "handler_order"):
598 # Try to preserve the old behavior of having custom classes
599 # inserted after default ones (works only for custom user
600 # classes which are not aware of handler_order).
601 return True
602 return self.handler_order < other.handler_order
603
604
605class HTTPErrorProcessor(BaseHandler):
606 """Process HTTP error responses."""
607 handler_order = 1000 # after all other processing
608
609 def http_response(self, request, response):
610 code, msg, hdrs = response.code, response.msg, response.info()
611
612 # According to RFC 2616, "2xx" code indicates that the client's
613 # request was successfully received, understood, and accepted.
614 if not (200 <= code < 300):
615 response = self.parent.error(
616 'http', request, response, code, msg, hdrs)
617
618 return response
619
620 https_response = http_response
621
622class HTTPDefaultErrorHandler(BaseHandler):
623 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000624 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000625
626class HTTPRedirectHandler(BaseHandler):
627 # maximum number of redirections to any single URL
628 # this is needed because of the state that cookies introduce
629 max_repeats = 4
630 # maximum total number of redirections (regardless of URL) before
631 # assuming we're in a loop
632 max_redirections = 10
633
634 def redirect_request(self, req, fp, code, msg, headers, newurl):
635 """Return a Request or None in response to a redirect.
636
637 This is called by the http_error_30x methods when a
638 redirection response is received. If a redirection should
639 take place, return a new Request to allow http_error_30x to
640 perform the redirect. Otherwise, raise HTTPError if no-one
641 else should try to handle this url. Return None if you can't
642 but another Handler might.
643 """
644 m = req.get_method()
645 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
646 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000647 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000648
649 # Strictly (according to RFC 2616), 301 or 302 in response to
650 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000651 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000652 # essentially all clients do redirect in this case, so we do
653 # the same.
654 # be conciliant with URIs containing a space
655 newurl = newurl.replace(' ', '%20')
656 CONTENT_HEADERS = ("content-length", "content-type")
657 newheaders = dict((k, v) for k, v in req.headers.items()
658 if k.lower() not in CONTENT_HEADERS)
659 return Request(newurl,
660 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000661 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000662 unverifiable=True)
663
664 # Implementation note: To avoid the server sending us into an
665 # infinite loop, the request object needs to track what URLs we
666 # have already seen. Do this by adding a handler-specific
667 # attribute to the Request object.
668 def http_error_302(self, req, fp, code, msg, headers):
669 # Some servers (incorrectly) return multiple Location headers
670 # (so probably same goes for URI). Use first header.
671 if "location" in headers:
672 newurl = headers["location"]
673 elif "uri" in headers:
674 newurl = headers["uri"]
675 else:
676 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000677
678 # fix a possible malformed URL
679 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700680
681 # For security reasons we don't allow redirection to anything other
682 # than http, https or ftp.
683
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800684 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800685 raise HTTPError(
686 newurl, code,
687 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
688 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700689
Facundo Batistaf24802c2008-08-17 03:36:03 +0000690 if not urlparts.path:
691 urlparts = list(urlparts)
692 urlparts[2] = "/"
693 newurl = urlunparse(urlparts)
694
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000695 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000696
697 # XXX Probably want to forget about the state of the current
698 # request, although that might interact poorly with other
699 # handlers that also use handler-specific request attributes
700 new = self.redirect_request(req, fp, code, msg, headers, newurl)
701 if new is None:
702 return
703
704 # loop detection
705 # .redirect_dict has a key url if url was previously visited.
706 if hasattr(req, 'redirect_dict'):
707 visited = new.redirect_dict = req.redirect_dict
708 if (visited.get(newurl, 0) >= self.max_repeats or
709 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000710 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000711 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000712 else:
713 visited = new.redirect_dict = req.redirect_dict = {}
714 visited[newurl] = visited.get(newurl, 0) + 1
715
716 # Don't close the fp until we are sure that we won't use it
717 # with HTTPError.
718 fp.read()
719 fp.close()
720
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000721 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000722
723 http_error_301 = http_error_303 = http_error_307 = http_error_302
724
725 inf_msg = "The HTTP server returned a redirect error that would " \
726 "lead to an infinite loop.\n" \
727 "The last 30x error message was:\n"
728
729
730def _parse_proxy(proxy):
731 """Return (scheme, user, password, host/port) given a URL or an authority.
732
733 If a URL is supplied, it must have an authority (host:port) component.
734 According to RFC 3986, having an authority component means the URL must
735 have two slashes after the scheme:
736
737 >>> _parse_proxy('file:/ftp.example.com/')
738 Traceback (most recent call last):
739 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
740
741 The first three items of the returned tuple may be None.
742
743 Examples of authority parsing:
744
745 >>> _parse_proxy('proxy.example.com')
746 (None, None, None, 'proxy.example.com')
747 >>> _parse_proxy('proxy.example.com:3128')
748 (None, None, None, 'proxy.example.com:3128')
749
750 The authority component may optionally include userinfo (assumed to be
751 username:password):
752
753 >>> _parse_proxy('joe:password@proxy.example.com')
754 (None, 'joe', 'password', 'proxy.example.com')
755 >>> _parse_proxy('joe:password@proxy.example.com:3128')
756 (None, 'joe', 'password', 'proxy.example.com:3128')
757
758 Same examples, but with URLs instead:
759
760 >>> _parse_proxy('http://proxy.example.com/')
761 ('http', None, None, 'proxy.example.com')
762 >>> _parse_proxy('http://proxy.example.com:3128/')
763 ('http', None, None, 'proxy.example.com:3128')
764 >>> _parse_proxy('http://joe:password@proxy.example.com/')
765 ('http', 'joe', 'password', 'proxy.example.com')
766 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
767 ('http', 'joe', 'password', 'proxy.example.com:3128')
768
769 Everything after the authority is ignored:
770
771 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
772 ('ftp', 'joe', 'password', 'proxy.example.com')
773
774 Test for no trailing '/' case:
775
776 >>> _parse_proxy('http://joe:password@proxy.example.com')
777 ('http', 'joe', 'password', 'proxy.example.com')
778
779 """
Georg Brandl13e89462008-07-01 19:56:00 +0000780 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000781 if not r_scheme.startswith("/"):
782 # authority
783 scheme = None
784 authority = proxy
785 else:
786 # URL
787 if not r_scheme.startswith("//"):
788 raise ValueError("proxy URL with no authority: %r" % proxy)
789 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
790 # and 3.3.), path is empty or starts with '/'
791 end = r_scheme.find("/", 2)
792 if end == -1:
793 end = None
794 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000795 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000796 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000797 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000798 else:
799 user = password = None
800 return scheme, user, password, hostport
801
802class ProxyHandler(BaseHandler):
803 # Proxies must be in front
804 handler_order = 100
805
806 def __init__(self, proxies=None):
807 if proxies is None:
808 proxies = getproxies()
809 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
810 self.proxies = proxies
811 for type, url in proxies.items():
812 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200813 lambda r, proxy=url, type=type, meth=self.proxy_open:
814 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815
816 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000817 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818 proxy_type, user, password, hostport = _parse_proxy(proxy)
819 if proxy_type is None:
820 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000821
822 if req.host and proxy_bypass(req.host):
823 return None
824
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000826 user_pass = '%s:%s' % (unquote(user),
827 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000828 creds = base64.b64encode(user_pass.encode()).decode("ascii")
829 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000830 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000831 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000832 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000833 # let other handlers take care of it
834 return None
835 else:
836 # need to start over, because the other handlers don't
837 # grok the proxy's URL type
838 # e.g. if we have a constructor arg proxies like so:
839 # {'http': 'ftp://proxy.example.com'}, we may end up turning
840 # a request for http://acme.example.com/a into one for
841 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000842 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000843
844class HTTPPasswordMgr:
845
846 def __init__(self):
847 self.passwd = {}
848
849 def add_password(self, realm, uri, user, passwd):
850 # uri could be a single URI or a sequence
851 if isinstance(uri, str):
852 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800853 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000854 self.passwd[realm] = {}
855 for default_port in True, False:
856 reduced_uri = tuple(
857 [self.reduce_uri(u, default_port) for u in uri])
858 self.passwd[realm][reduced_uri] = (user, passwd)
859
860 def find_user_password(self, realm, authuri):
861 domains = self.passwd.get(realm, {})
862 for default_port in True, False:
863 reduced_authuri = self.reduce_uri(authuri, default_port)
864 for uris, authinfo in domains.items():
865 for uri in uris:
866 if self.is_suburi(uri, reduced_authuri):
867 return authinfo
868 return None, None
869
870 def reduce_uri(self, uri, default_port=True):
871 """Accept authority or URI and extract only the authority and path."""
872 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000873 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000874 if parts[1]:
875 # URI
876 scheme = parts[0]
877 authority = parts[1]
878 path = parts[2] or '/'
879 else:
880 # host or host:port
881 scheme = None
882 authority = uri
883 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000884 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000885 if default_port and port is None and scheme is not None:
886 dport = {"http": 80,
887 "https": 443,
888 }.get(scheme)
889 if dport is not None:
890 authority = "%s:%d" % (host, dport)
891 return authority, path
892
893 def is_suburi(self, base, test):
894 """Check if test is below base in a URI tree
895
896 Both args must be URIs in reduced form.
897 """
898 if base == test:
899 return True
900 if base[0] != test[0]:
901 return False
902 common = posixpath.commonprefix((base[1], test[1]))
903 if len(common) == len(base[1]):
904 return True
905 return False
906
907
908class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
909
910 def find_user_password(self, realm, authuri):
911 user, password = HTTPPasswordMgr.find_user_password(self, realm,
912 authuri)
913 if user is not None:
914 return user, password
915 return HTTPPasswordMgr.find_user_password(self, None, authuri)
916
917
918class AbstractBasicAuthHandler:
919
920 # XXX this allows for multiple auth-schemes, but will stupidly pick
921 # the last one with a realm specified.
922
923 # allow for double- and single-quoted realm values
924 # (single quotes are a violation of the RFC, but appear in the wild)
925 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800926 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000927
928 # XXX could pre-emptively send auth info already accepted (RFC 2617,
929 # end of section 2, and section 1.2 immediately after "credentials"
930 # production).
931
932 def __init__(self, password_mgr=None):
933 if password_mgr is None:
934 password_mgr = HTTPPasswordMgr()
935 self.passwd = password_mgr
936 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000937 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000938
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000939 def reset_retry_count(self):
940 self.retried = 0
941
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000942 def http_error_auth_reqed(self, authreq, host, req, headers):
943 # host may be an authority (without userinfo) or a URL with an
944 # authority
945 # XXX could be multiple headers
946 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000947
948 if self.retried > 5:
949 # retry sending the username:password 5 times before failing.
950 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
951 headers, None)
952 else:
953 self.retried += 1
954
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000955 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800956 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800957 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800958 raise ValueError("AbstractBasicAuthHandler does not"
959 " support the following scheme: '%s'" %
960 scheme)
961 else:
962 mo = AbstractBasicAuthHandler.rx.search(authreq)
963 if mo:
964 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800965 if quote not in ['"',"'"]:
966 warnings.warn("Basic Auth Realm was unquoted",
967 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800968 if scheme.lower() == 'basic':
969 response = self.retry_http_basic_auth(host, req, realm)
970 if response and response.code != 401:
971 self.retried = 0
972 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000973
974 def retry_http_basic_auth(self, host, req, realm):
975 user, pw = self.passwd.find_user_password(realm, host)
976 if pw is not None:
977 raw = "%s:%s" % (user, pw)
978 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
979 if req.headers.get(self.auth_header, None) == auth:
980 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000981 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000982 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000983 else:
984 return None
985
986
987class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
988
989 auth_header = 'Authorization'
990
991 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000992 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000993 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000994 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000995 self.reset_retry_count()
996 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000997
998
999class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1000
1001 auth_header = 'Proxy-authorization'
1002
1003 def http_error_407(self, req, fp, code, msg, headers):
1004 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +00001005 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001006 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1007 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001008 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001009 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001010 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001011 self.reset_retry_count()
1012 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001013
1014
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001015# Return n random bytes.
1016_randombytes = os.urandom
1017
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001018
1019class AbstractDigestAuthHandler:
1020 # Digest authentication is specified in RFC 2617.
1021
1022 # XXX The client does not inspect the Authentication-Info header
1023 # in a successful response.
1024
1025 # XXX It should be possible to test this implementation against
1026 # a mock server that just generates a static set of challenges.
1027
1028 # XXX qop="auth-int" supports is shaky
1029
1030 def __init__(self, passwd=None):
1031 if passwd is None:
1032 passwd = HTTPPasswordMgr()
1033 self.passwd = passwd
1034 self.add_password = self.passwd.add_password
1035 self.retried = 0
1036 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001037 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001038
1039 def reset_retry_count(self):
1040 self.retried = 0
1041
1042 def http_error_auth_reqed(self, auth_header, host, req, headers):
1043 authreq = headers.get(auth_header, None)
1044 if self.retried > 5:
1045 # Don't fail endlessly - if we failed once, we'll probably
1046 # fail a second time. Hm. Unless the Password Manager is
1047 # prompting for the information. Crap. This isn't great
1048 # but it's better than the current 'repeat until recursion
1049 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001050 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001051 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001052 else:
1053 self.retried += 1
1054 if authreq:
1055 scheme = authreq.split()[0]
1056 if scheme.lower() == 'digest':
1057 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001058 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001059 raise ValueError("AbstractDigestAuthHandler does not support"
1060 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001061
1062 def retry_http_digest_auth(self, req, auth):
1063 token, challenge = auth.split(' ', 1)
1064 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1065 auth = self.get_authorization(req, chal)
1066 if auth:
1067 auth_val = 'Digest %s' % auth
1068 if req.headers.get(self.auth_header, None) == auth_val:
1069 return None
1070 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001071 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001072 return resp
1073
1074 def get_cnonce(self, nonce):
1075 # The cnonce-value is an opaque
1076 # quoted string value provided by the client and used by both client
1077 # and server to avoid chosen plaintext attacks, to provide mutual
1078 # authentication, and to provide some message integrity protection.
1079 # This isn't a fabulous effort, but it's probably Good Enough.
1080 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001081 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001082 dig = hashlib.sha1(b).hexdigest()
1083 return dig[:16]
1084
1085 def get_authorization(self, req, chal):
1086 try:
1087 realm = chal['realm']
1088 nonce = chal['nonce']
1089 qop = chal.get('qop')
1090 algorithm = chal.get('algorithm', 'MD5')
1091 # mod_digest doesn't send an opaque, even though it isn't
1092 # supposed to be optional
1093 opaque = chal.get('opaque', None)
1094 except KeyError:
1095 return None
1096
1097 H, KD = self.get_algorithm_impls(algorithm)
1098 if H is None:
1099 return None
1100
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001101 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001102 if user is None:
1103 return None
1104
1105 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001106 if req.data is not None:
1107 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001108 else:
1109 entdig = None
1110
1111 A1 = "%s:%s:%s" % (user, realm, pw)
1112 A2 = "%s:%s" % (req.get_method(),
1113 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001114 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001115 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001116 if nonce == self.last_nonce:
1117 self.nonce_count += 1
1118 else:
1119 self.nonce_count = 1
1120 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001121 ncvalue = '%08x' % self.nonce_count
1122 cnonce = self.get_cnonce(nonce)
1123 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1124 respdig = KD(H(A1), noncebit)
1125 elif qop is None:
1126 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1127 else:
1128 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001129 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001130
1131 # XXX should the partial digests be encoded too?
1132
1133 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001134 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001135 respdig)
1136 if opaque:
1137 base += ', opaque="%s"' % opaque
1138 if entdig:
1139 base += ', digest="%s"' % entdig
1140 base += ', algorithm="%s"' % algorithm
1141 if qop:
1142 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1143 return base
1144
1145 def get_algorithm_impls(self, algorithm):
1146 # lambdas assume digest modules are imported at the top level
1147 if algorithm == 'MD5':
1148 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1149 elif algorithm == 'SHA':
1150 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1151 # XXX MD5-sess
1152 KD = lambda s, d: H("%s:%s" % (s, d))
1153 return H, KD
1154
1155 def get_entity_digest(self, data, chal):
1156 # XXX not implemented yet
1157 return None
1158
1159
1160class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1161 """An authentication protocol defined by RFC 2069
1162
1163 Digest authentication improves on basic authentication because it
1164 does not transmit passwords in the clear.
1165 """
1166
1167 auth_header = 'Authorization'
1168 handler_order = 490 # before Basic auth
1169
1170 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001171 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001172 retry = self.http_error_auth_reqed('www-authenticate',
1173 host, req, headers)
1174 self.reset_retry_count()
1175 return retry
1176
1177
1178class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1179
1180 auth_header = 'Proxy-Authorization'
1181 handler_order = 490 # before Basic auth
1182
1183 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001184 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001185 retry = self.http_error_auth_reqed('proxy-authenticate',
1186 host, req, headers)
1187 self.reset_retry_count()
1188 return retry
1189
1190class AbstractHTTPHandler(BaseHandler):
1191
1192 def __init__(self, debuglevel=0):
1193 self._debuglevel = debuglevel
1194
1195 def set_http_debuglevel(self, level):
1196 self._debuglevel = level
1197
1198 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001199 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001200 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001201 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001202
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001203 if request.data is not None: # POST
1204 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001205 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001206 msg = "POST data should be bytes or an iterable of bytes. " \
1207 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001208 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001209 if not request.has_header('Content-type'):
1210 request.add_unredirected_header(
1211 'Content-type',
1212 'application/x-www-form-urlencoded')
1213 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001214 try:
1215 mv = memoryview(data)
1216 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001217 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001218 raise ValueError("Content-Length should be specified "
1219 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001220 data))
1221 else:
1222 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001223 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001224
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001225 sel_host = host
1226 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001227 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001228 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001229 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001230 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001231 for name, value in self.parent.addheaders:
1232 name = name.capitalize()
1233 if not request.has_header(name):
1234 request.add_unredirected_header(name, value)
1235
1236 return request
1237
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001238 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001239 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001240
1241 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001242 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001243 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001244 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001245 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001246
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001247 # will parse host:port
1248 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001249
1250 headers = dict(req.unredirected_hdrs)
1251 headers.update(dict((k, v) for k, v in req.headers.items()
1252 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001253
1254 # TODO(jhylton): Should this be redesigned to handle
1255 # persistent connections?
1256
1257 # We want to make an HTTP/1.1 request, but the addinfourl
1258 # class isn't prepared to deal with a persistent connection.
1259 # It will try to read all remaining data from the socket,
1260 # which will block while the server waits for the next request.
1261 # So make sure the connection gets closed after the (only)
1262 # request.
1263 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001264 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001265
1266 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001267 tunnel_headers = {}
1268 proxy_auth_hdr = "Proxy-Authorization"
1269 if proxy_auth_hdr in headers:
1270 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1271 # Proxy-Authorization should not be sent to origin
1272 # server.
1273 del headers[proxy_auth_hdr]
1274 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001275
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001276 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001277 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001278 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001279 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001280 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001281 else:
1282 r = h.getresponse()
Nadeem Vawdabd26b542012-10-21 17:37:43 +02001283 # If the server does not send us a 'Connection: close' header,
1284 # HTTPConnection assumes the socket should be left open. Manually
1285 # mark the socket to be closed when this response object goes away.
1286 if h.sock:
1287 h.sock.close()
1288 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001289
Senthil Kumaran26430412011-04-13 07:01:19 +08001290 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001291 # This line replaces the .msg attribute of the HTTPResponse
1292 # with .headers, because urllib clients expect the response to
1293 # have the reason in .msg. It would be good to mark this
1294 # attribute is deprecated and get then to use info() or
1295 # .headers.
1296 r.msg = r.reason
1297 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001298
1299
1300class HTTPHandler(AbstractHTTPHandler):
1301
1302 def http_open(self, req):
1303 return self.do_open(http.client.HTTPConnection, req)
1304
1305 http_request = AbstractHTTPHandler.do_request_
1306
1307if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001308
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309 class HTTPSHandler(AbstractHTTPHandler):
1310
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001311 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1312 AbstractHTTPHandler.__init__(self, debuglevel)
1313 self._context = context
1314 self._check_hostname = check_hostname
1315
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001316 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001317 return self.do_open(http.client.HTTPSConnection, req,
1318 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001319
1320 https_request = AbstractHTTPHandler.do_request_
1321
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001322 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001323
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001324class HTTPCookieProcessor(BaseHandler):
1325 def __init__(self, cookiejar=None):
1326 import http.cookiejar
1327 if cookiejar is None:
1328 cookiejar = http.cookiejar.CookieJar()
1329 self.cookiejar = cookiejar
1330
1331 def http_request(self, request):
1332 self.cookiejar.add_cookie_header(request)
1333 return request
1334
1335 def http_response(self, request, response):
1336 self.cookiejar.extract_cookies(response, request)
1337 return response
1338
1339 https_request = http_request
1340 https_response = http_response
1341
1342class UnknownHandler(BaseHandler):
1343 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001344 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001345 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001346
1347def parse_keqv_list(l):
1348 """Parse list of key=value strings where keys are not duplicated."""
1349 parsed = {}
1350 for elt in l:
1351 k, v = elt.split('=', 1)
1352 if v[0] == '"' and v[-1] == '"':
1353 v = v[1:-1]
1354 parsed[k] = v
1355 return parsed
1356
1357def parse_http_list(s):
1358 """Parse lists as described by RFC 2068 Section 2.
1359
1360 In particular, parse comma-separated lists where the elements of
1361 the list may include quoted-strings. A quoted-string could
1362 contain a comma. A non-quoted string could have quotes in the
1363 middle. Neither commas nor quotes count if they are escaped.
1364 Only double-quotes count, not single-quotes.
1365 """
1366 res = []
1367 part = ''
1368
1369 escape = quote = False
1370 for cur in s:
1371 if escape:
1372 part += cur
1373 escape = False
1374 continue
1375 if quote:
1376 if cur == '\\':
1377 escape = True
1378 continue
1379 elif cur == '"':
1380 quote = False
1381 part += cur
1382 continue
1383
1384 if cur == ',':
1385 res.append(part)
1386 part = ''
1387 continue
1388
1389 if cur == '"':
1390 quote = True
1391
1392 part += cur
1393
1394 # append last part
1395 if part:
1396 res.append(part)
1397
1398 return [part.strip() for part in res]
1399
1400class FileHandler(BaseHandler):
1401 # Use local file or FTP depending on form of URL
1402 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001403 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001404 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1405 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001406 if not req.host is self.get_names():
1407 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001408 else:
1409 return self.open_local_file(req)
1410
1411 # names for the localhost
1412 names = None
1413 def get_names(self):
1414 if FileHandler.names is None:
1415 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001416 FileHandler.names = tuple(
1417 socket.gethostbyname_ex('localhost')[2] +
1418 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001419 except socket.gaierror:
1420 FileHandler.names = (socket.gethostbyname('localhost'),)
1421 return FileHandler.names
1422
1423 # not entirely sure what the rules are here
1424 def open_local_file(self, req):
1425 import email.utils
1426 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001427 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001428 filename = req.selector
1429 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001430 try:
1431 stats = os.stat(localfile)
1432 size = stats.st_size
1433 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001434 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001435 headers = email.message_from_string(
1436 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1437 (mtype or 'text/plain', size, modified))
1438 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001439 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001440 if not host or \
1441 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001442 if host:
1443 origurl = 'file://' + host + filename
1444 else:
1445 origurl = 'file://' + filename
1446 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001447 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001448 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001449 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001450 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001451
1452def _safe_gethostbyname(host):
1453 try:
1454 return socket.gethostbyname(host)
1455 except socket.gaierror:
1456 return None
1457
1458class FTPHandler(BaseHandler):
1459 def ftp_open(self, req):
1460 import ftplib
1461 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001462 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001463 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001464 raise URLError('ftp error: no host given')
1465 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001466 if port is None:
1467 port = ftplib.FTP_PORT
1468 else:
1469 port = int(port)
1470
1471 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001472 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001473 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001474 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001475 else:
1476 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001477 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001478 user = user or ''
1479 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001480
1481 try:
1482 host = socket.gethostbyname(host)
1483 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001484 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001485 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001486 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001487 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001488 dirs, file = dirs[:-1], dirs[-1]
1489 if dirs and not dirs[0]:
1490 dirs = dirs[1:]
1491 try:
1492 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1493 type = file and 'I' or 'D'
1494 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001495 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001496 if attr.lower() == 'type' and \
1497 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1498 type = value.upper()
1499 fp, retrlen = fw.retrfile(file, type)
1500 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001501 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001502 if mtype:
1503 headers += "Content-type: %s\n" % mtype
1504 if retrlen is not None and retrlen >= 0:
1505 headers += "Content-length: %d\n" % retrlen
1506 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001507 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001508 except ftplib.all_errors as exp:
1509 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001510 raise exc.with_traceback(sys.exc_info()[2])
1511
1512 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001513 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1514 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001515
1516class CacheFTPHandler(FTPHandler):
1517 # XXX would be nice to have pluggable cache strategies
1518 # XXX this stuff is definitely not thread safe
1519 def __init__(self):
1520 self.cache = {}
1521 self.timeout = {}
1522 self.soonest = 0
1523 self.delay = 60
1524 self.max_conns = 16
1525
1526 def setTimeout(self, t):
1527 self.delay = t
1528
1529 def setMaxConns(self, m):
1530 self.max_conns = m
1531
1532 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1533 key = user, host, port, '/'.join(dirs), timeout
1534 if key in self.cache:
1535 self.timeout[key] = time.time() + self.delay
1536 else:
1537 self.cache[key] = ftpwrapper(user, passwd, host, port,
1538 dirs, timeout)
1539 self.timeout[key] = time.time() + self.delay
1540 self.check_cache()
1541 return self.cache[key]
1542
1543 def check_cache(self):
1544 # first check for old ones
1545 t = time.time()
1546 if self.soonest <= t:
1547 for k, v in list(self.timeout.items()):
1548 if v < t:
1549 self.cache[k].close()
1550 del self.cache[k]
1551 del self.timeout[k]
1552 self.soonest = min(list(self.timeout.values()))
1553
1554 # then check the size
1555 if len(self.cache) == self.max_conns:
1556 for k, v in list(self.timeout.items()):
1557 if v == self.soonest:
1558 del self.cache[k]
1559 del self.timeout[k]
1560 break
1561 self.soonest = min(list(self.timeout.values()))
1562
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001563 def clear_cache(self):
1564 for conn in self.cache.values():
1565 conn.close()
1566 self.cache.clear()
1567 self.timeout.clear()
1568
Antoine Pitroudf204be2012-11-24 17:59:08 +01001569class DataHandler(BaseHandler):
1570 def data_open(self, req):
1571 # data URLs as specified in RFC 2397.
1572 #
1573 # ignores POSTed data
1574 #
1575 # syntax:
1576 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1577 # mediatype := [ type "/" subtype ] *( ";" parameter )
1578 # data := *urlchar
1579 # parameter := attribute "=" value
1580 url = req.full_url
1581
1582 scheme, data = url.split(":",1)
1583 mediatype, data = data.split(",",1)
1584
1585 # even base64 encoded data URLs might be quoted so unquote in any case:
1586 data = unquote_to_bytes(data)
1587 if mediatype.endswith(";base64"):
1588 data = base64.decodebytes(data)
1589 mediatype = mediatype[:-7]
1590
1591 if not mediatype:
1592 mediatype = "text/plain;charset=US-ASCII"
1593
1594 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1595 (mediatype, len(data)))
1596
1597 return addinfourl(io.BytesIO(data), headers, url)
1598
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001599
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001600# Code move from the old urllib module
1601
1602MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1603
1604# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001605if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001606 from nturl2path import url2pathname, pathname2url
1607else:
1608 def url2pathname(pathname):
1609 """OS-specific conversion from a relative URL of the 'file' scheme
1610 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001611 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001612
1613 def pathname2url(pathname):
1614 """OS-specific conversion from a file system path to a relative URL
1615 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001616 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001617
1618# This really consists of two pieces:
1619# (1) a class which handles opening of all sorts of URLs
1620# (plus assorted utilities etc.)
1621# (2) a set of functions for parsing URLs
1622# XXX Should these be separated out into different modules?
1623
1624
1625ftpcache = {}
1626class URLopener:
1627 """Class to open URLs.
1628 This is a class rather than just a subroutine because we may need
1629 more than one set of global protocol-specific options.
1630 Note -- this is a base class for those who don't want the
1631 automatic handling of errors type 302 (relocated) and 401
1632 (authorization needed)."""
1633
1634 __tempfiles = None
1635
1636 version = "Python-urllib/%s" % __version__
1637
1638 # Constructor
1639 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001640 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001641 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1642 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001643 if proxies is None:
1644 proxies = getproxies()
1645 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1646 self.proxies = proxies
1647 self.key_file = x509.get('key_file')
1648 self.cert_file = x509.get('cert_file')
1649 self.addheaders = [('User-Agent', self.version)]
1650 self.__tempfiles = []
1651 self.__unlink = os.unlink # See cleanup()
1652 self.tempcache = None
1653 # Undocumented feature: if you assign {} to tempcache,
1654 # it is used to cache files retrieved with
1655 # self.retrieve(). This is not enabled by default
1656 # since it does not work for changing documents (and I
1657 # haven't got the logic to check expiration headers
1658 # yet).
1659 self.ftpcache = ftpcache
1660 # Undocumented feature: you can use a different
1661 # ftp cache by assigning to the .ftpcache member;
1662 # in case you want logically independent URL openers
1663 # XXX This is not threadsafe. Bah.
1664
1665 def __del__(self):
1666 self.close()
1667
1668 def close(self):
1669 self.cleanup()
1670
1671 def cleanup(self):
1672 # This code sometimes runs when the rest of this module
1673 # has already been deleted, so it can't use any globals
1674 # or import anything.
1675 if self.__tempfiles:
1676 for file in self.__tempfiles:
1677 try:
1678 self.__unlink(file)
1679 except OSError:
1680 pass
1681 del self.__tempfiles[:]
1682 if self.tempcache:
1683 self.tempcache.clear()
1684
1685 def addheader(self, *args):
1686 """Add a header to be used by the HTTP interface only
1687 e.g. u.addheader('Accept', 'sound/basic')"""
1688 self.addheaders.append(args)
1689
1690 # External interface
1691 def open(self, fullurl, data=None):
1692 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001693 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001694 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001695 if self.tempcache and fullurl in self.tempcache:
1696 filename, headers = self.tempcache[fullurl]
1697 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001698 return addinfourl(fp, headers, fullurl)
1699 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001700 if not urltype:
1701 urltype = 'file'
1702 if urltype in self.proxies:
1703 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001704 urltype, proxyhost = splittype(proxy)
1705 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001706 url = (host, fullurl) # Signal special case to open_*()
1707 else:
1708 proxy = None
1709 name = 'open_' + urltype
1710 self.type = urltype
1711 name = name.replace('-', '_')
1712 if not hasattr(self, name):
1713 if proxy:
1714 return self.open_unknown_proxy(proxy, fullurl, data)
1715 else:
1716 return self.open_unknown(fullurl, data)
1717 try:
1718 if data is None:
1719 return getattr(self, name)(url)
1720 else:
1721 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001722 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001723 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001724 except socket.error as msg:
1725 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1726
1727 def open_unknown(self, fullurl, data=None):
1728 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001729 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001730 raise IOError('url error', 'unknown url type', type)
1731
1732 def open_unknown_proxy(self, proxy, fullurl, data=None):
1733 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001734 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001735 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1736
1737 # External interface
1738 def retrieve(self, url, filename=None, reporthook=None, data=None):
1739 """retrieve(url) returns (filename, headers) for a local object
1740 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001741 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001742 if self.tempcache and url in self.tempcache:
1743 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001744 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001745 if filename is None and (not type or type == 'file'):
1746 try:
1747 fp = self.open_local_file(url1)
1748 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001749 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001750 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001751 except IOError as msg:
1752 pass
1753 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001754 try:
1755 headers = fp.info()
1756 if filename:
1757 tfp = open(filename, 'wb')
1758 else:
1759 import tempfile
1760 garbage, path = splittype(url)
1761 garbage, path = splithost(path or "")
1762 path, garbage = splitquery(path or "")
1763 path, garbage = splitattr(path or "")
1764 suffix = os.path.splitext(path)[1]
1765 (fd, filename) = tempfile.mkstemp(suffix)
1766 self.__tempfiles.append(filename)
1767 tfp = os.fdopen(fd, 'wb')
1768 try:
1769 result = filename, headers
1770 if self.tempcache is not None:
1771 self.tempcache[url] = result
1772 bs = 1024*8
1773 size = -1
1774 read = 0
1775 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001776 if "content-length" in headers:
1777 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001778 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001779 reporthook(blocknum, bs, size)
1780 while 1:
1781 block = fp.read(bs)
1782 if not block:
1783 break
1784 read += len(block)
1785 tfp.write(block)
1786 blocknum += 1
1787 if reporthook:
1788 reporthook(blocknum, bs, size)
1789 finally:
1790 tfp.close()
1791 finally:
1792 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001793
1794 # raise exception if actual size does not match content-length header
1795 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001796 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001797 "retrieval incomplete: got only %i out of %i bytes"
1798 % (read, size), result)
1799
1800 return result
1801
1802 # Each method named open_<type> knows how to open that type of URL
1803
1804 def _open_generic_http(self, connection_factory, url, data):
1805 """Make an HTTP connection using connection_class.
1806
1807 This is an internal method that should be called from
1808 open_http() or open_https().
1809
1810 Arguments:
1811 - connection_factory should take a host name and return an
1812 HTTPConnection instance.
1813 - url is the url to retrieval or a host, relative-path pair.
1814 - data is payload for a POST request or None.
1815 """
1816
1817 user_passwd = None
1818 proxy_passwd= None
1819 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001820 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001821 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001822 user_passwd, host = splituser(host)
1823 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001824 realhost = host
1825 else:
1826 host, selector = url
1827 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001828 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001829 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001830 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001831 url = rest
1832 user_passwd = None
1833 if urltype.lower() != 'http':
1834 realhost = None
1835 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001836 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001837 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001838 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001839 if user_passwd:
1840 selector = "%s://%s%s" % (urltype, realhost, rest)
1841 if proxy_bypass(realhost):
1842 host = realhost
1843
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001844 if not host: raise IOError('http error', 'no host given')
1845
1846 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001847 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001848 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001849 else:
1850 proxy_auth = None
1851
1852 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001853 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001854 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001855 else:
1856 auth = None
1857 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001858 headers = {}
1859 if proxy_auth:
1860 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1861 if auth:
1862 headers["Authorization"] = "Basic %s" % auth
1863 if realhost:
1864 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001865
1866 # Add Connection:close as we don't support persistent connections yet.
1867 # This helps in closing the socket and avoiding ResourceWarning
1868
1869 headers["Connection"] = "close"
1870
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001871 for header, value in self.addheaders:
1872 headers[header] = value
1873
1874 if data is not None:
1875 headers["Content-Type"] = "application/x-www-form-urlencoded"
1876 http_conn.request("POST", selector, data, headers)
1877 else:
1878 http_conn.request("GET", selector, headers=headers)
1879
1880 try:
1881 response = http_conn.getresponse()
1882 except http.client.BadStatusLine:
1883 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001884 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001885
1886 # According to RFC 2616, "2xx" code indicates that the client's
1887 # request was successfully received, understood, and accepted.
1888 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001889 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001890 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001891 else:
1892 return self.http_error(
1893 url, response.fp,
1894 response.status, response.reason, response.msg, data)
1895
1896 def open_http(self, url, data=None):
1897 """Use HTTP protocol."""
1898 return self._open_generic_http(http.client.HTTPConnection, url, data)
1899
1900 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1901 """Handle http errors.
1902
1903 Derived class can override this, or provide specific handlers
1904 named http_error_DDD where DDD is the 3-digit error code."""
1905 # First check if there's a specific handler for this error
1906 name = 'http_error_%d' % errcode
1907 if hasattr(self, name):
1908 method = getattr(self, name)
1909 if data is None:
1910 result = method(url, fp, errcode, errmsg, headers)
1911 else:
1912 result = method(url, fp, errcode, errmsg, headers, data)
1913 if result: return result
1914 return self.http_error_default(url, fp, errcode, errmsg, headers)
1915
1916 def http_error_default(self, url, fp, errcode, errmsg, headers):
1917 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001918 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001919 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001920
1921 if _have_ssl:
1922 def _https_connection(self, host):
1923 return http.client.HTTPSConnection(host,
1924 key_file=self.key_file,
1925 cert_file=self.cert_file)
1926
1927 def open_https(self, url, data=None):
1928 """Use HTTPS protocol."""
1929 return self._open_generic_http(self._https_connection, url, data)
1930
1931 def open_file(self, url):
1932 """Use local file or FTP depending on form of URL."""
1933 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001934 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001935 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001936 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001937 else:
1938 return self.open_local_file(url)
1939
1940 def open_local_file(self, url):
1941 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001942 import email.utils
1943 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001944 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001945 localname = url2pathname(file)
1946 try:
1947 stats = os.stat(localname)
1948 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001949 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001950 size = stats.st_size
1951 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1952 mtype = mimetypes.guess_type(url)[0]
1953 headers = email.message_from_string(
1954 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1955 (mtype or 'text/plain', size, modified))
1956 if not host:
1957 urlfile = file
1958 if file[:1] == '/':
1959 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001960 return addinfourl(open(localname, 'rb'), headers, urlfile)
1961 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001962 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001963 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001964 urlfile = file
1965 if file[:1] == '/':
1966 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001967 elif file[:2] == './':
1968 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001969 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001970 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001971
1972 def open_ftp(self, url):
1973 """Use FTP protocol."""
1974 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001975 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001976 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001977 host, path = splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001978 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001979 host, port = splitport(host)
1980 user, host = splituser(host)
1981 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001982 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001983 host = unquote(host)
1984 user = unquote(user or '')
1985 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001986 host = socket.gethostbyname(host)
1987 if not port:
1988 import ftplib
1989 port = ftplib.FTP_PORT
1990 else:
1991 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001992 path, attrs = splitattr(path)
1993 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001994 dirs = path.split('/')
1995 dirs, file = dirs[:-1], dirs[-1]
1996 if dirs and not dirs[0]: dirs = dirs[1:]
1997 if dirs and not dirs[0]: dirs[0] = '/'
1998 key = user, host, port, '/'.join(dirs)
1999 # XXX thread unsafe!
2000 if len(self.ftpcache) > MAXFTPCACHE:
2001 # Prune the cache, rather arbitrarily
2002 for k in self.ftpcache.keys():
2003 if k != key:
2004 v = self.ftpcache[k]
2005 del self.ftpcache[k]
2006 v.close()
2007 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002008 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002009 self.ftpcache[key] = \
2010 ftpwrapper(user, passwd, host, port, dirs)
2011 if not file: type = 'D'
2012 else: type = 'I'
2013 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00002014 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002015 if attr.lower() == 'type' and \
2016 value in ('a', 'A', 'i', 'I', 'd', 'D'):
2017 type = value.upper()
2018 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2019 mtype = mimetypes.guess_type("ftp:" + url)[0]
2020 headers = ""
2021 if mtype:
2022 headers += "Content-Type: %s\n" % mtype
2023 if retrlen is not None and retrlen >= 0:
2024 headers += "Content-Length: %d\n" % retrlen
2025 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00002026 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002027 except ftperrors() as exp:
2028 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002029
2030 def open_data(self, url, data=None):
2031 """Use "data" URL."""
2032 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002033 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002034 # ignore POSTed data
2035 #
2036 # syntax of data URLs:
2037 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2038 # mediatype := [ type "/" subtype ] *( ";" parameter )
2039 # data := *urlchar
2040 # parameter := attribute "=" value
2041 try:
2042 [type, data] = url.split(',', 1)
2043 except ValueError:
2044 raise IOError('data error', 'bad data URL')
2045 if not type:
2046 type = 'text/plain;charset=US-ASCII'
2047 semi = type.rfind(';')
2048 if semi >= 0 and '=' not in type[semi:]:
2049 encoding = type[semi+1:]
2050 type = type[:semi]
2051 else:
2052 encoding = ''
2053 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002054 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002055 time.gmtime(time.time())))
2056 msg.append('Content-type: %s' % type)
2057 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002058 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002059 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002060 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002061 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002062 msg.append('Content-Length: %d' % len(data))
2063 msg.append('')
2064 msg.append(data)
2065 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002066 headers = email.message_from_string(msg)
2067 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002068 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002069 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002070
2071
2072class FancyURLopener(URLopener):
2073 """Derived class with handlers for errors we can handle (perhaps)."""
2074
2075 def __init__(self, *args, **kwargs):
2076 URLopener.__init__(self, *args, **kwargs)
2077 self.auth_cache = {}
2078 self.tries = 0
2079 self.maxtries = 10
2080
2081 def http_error_default(self, url, fp, errcode, errmsg, headers):
2082 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002083 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002084
2085 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2086 """Error 302 -- relocated (temporarily)."""
2087 self.tries += 1
2088 if self.maxtries and self.tries >= self.maxtries:
2089 if hasattr(self, "http_error_500"):
2090 meth = self.http_error_500
2091 else:
2092 meth = self.http_error_default
2093 self.tries = 0
2094 return meth(url, fp, 500,
2095 "Internal Server Error: Redirect Recursion", headers)
2096 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2097 data)
2098 self.tries = 0
2099 return result
2100
2101 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2102 if 'location' in headers:
2103 newurl = headers['location']
2104 elif 'uri' in headers:
2105 newurl = headers['uri']
2106 else:
2107 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002108 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002109
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002110 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002111 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002112
2113 urlparts = urlparse(newurl)
2114
2115 # For security reasons, we don't allow redirection to anything other
2116 # than http, https and ftp.
2117
2118 # We are using newer HTTPError with older redirect_internal method
2119 # This older method will get deprecated in 3.3
2120
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002121 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002122 raise HTTPError(newurl, errcode,
2123 errmsg +
2124 " Redirection to url '%s' is not allowed." % newurl,
2125 headers, fp)
2126
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002127 return self.open(newurl)
2128
2129 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2130 """Error 301 -- also relocated (permanently)."""
2131 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2132
2133 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2134 """Error 303 -- also relocated (essentially identical to 302)."""
2135 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2136
2137 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2138 """Error 307 -- relocated, but turn POST into error."""
2139 if data is None:
2140 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2141 else:
2142 return self.http_error_default(url, fp, errcode, errmsg, headers)
2143
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002144 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2145 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002146 """Error 401 -- authentication required.
2147 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002148 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002149 URLopener.http_error_default(self, url, fp,
2150 errcode, errmsg, headers)
2151 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002152 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2153 if not match:
2154 URLopener.http_error_default(self, url, fp,
2155 errcode, errmsg, headers)
2156 scheme, realm = match.groups()
2157 if scheme.lower() != 'basic':
2158 URLopener.http_error_default(self, url, fp,
2159 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002160 if not retry:
2161 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2162 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002163 name = 'retry_' + self.type + '_basic_auth'
2164 if data is None:
2165 return getattr(self,name)(url, realm)
2166 else:
2167 return getattr(self,name)(url, realm, data)
2168
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002169 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2170 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002171 """Error 407 -- proxy authentication required.
2172 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002173 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002174 URLopener.http_error_default(self, url, fp,
2175 errcode, errmsg, headers)
2176 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002177 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2178 if not match:
2179 URLopener.http_error_default(self, url, fp,
2180 errcode, errmsg, headers)
2181 scheme, realm = match.groups()
2182 if scheme.lower() != 'basic':
2183 URLopener.http_error_default(self, url, fp,
2184 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002185 if not retry:
2186 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2187 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002188 name = 'retry_proxy_' + self.type + '_basic_auth'
2189 if data is None:
2190 return getattr(self,name)(url, realm)
2191 else:
2192 return getattr(self,name)(url, realm, data)
2193
2194 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002195 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002196 newurl = 'http://' + host + selector
2197 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002198 urltype, proxyhost = splittype(proxy)
2199 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002200 i = proxyhost.find('@') + 1
2201 proxyhost = proxyhost[i:]
2202 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2203 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002204 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002205 quote(passwd, safe=''), proxyhost)
2206 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2207 if data is None:
2208 return self.open(newurl)
2209 else:
2210 return self.open(newurl, data)
2211
2212 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002213 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002214 newurl = 'https://' + host + selector
2215 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002216 urltype, proxyhost = splittype(proxy)
2217 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002218 i = proxyhost.find('@') + 1
2219 proxyhost = proxyhost[i:]
2220 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2221 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002222 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002223 quote(passwd, safe=''), proxyhost)
2224 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2225 if data is None:
2226 return self.open(newurl)
2227 else:
2228 return self.open(newurl, data)
2229
2230 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002231 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002232 i = host.find('@') + 1
2233 host = host[i:]
2234 user, passwd = self.get_user_passwd(host, realm, i)
2235 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002236 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002237 quote(passwd, safe=''), host)
2238 newurl = 'http://' + host + selector
2239 if data is None:
2240 return self.open(newurl)
2241 else:
2242 return self.open(newurl, data)
2243
2244 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002245 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002246 i = host.find('@') + 1
2247 host = host[i:]
2248 user, passwd = self.get_user_passwd(host, realm, i)
2249 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002250 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002251 quote(passwd, safe=''), host)
2252 newurl = 'https://' + host + selector
2253 if data is None:
2254 return self.open(newurl)
2255 else:
2256 return self.open(newurl, data)
2257
Florent Xicluna757445b2010-05-17 17:24:07 +00002258 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002259 key = realm + '@' + host.lower()
2260 if key in self.auth_cache:
2261 if clear_cache:
2262 del self.auth_cache[key]
2263 else:
2264 return self.auth_cache[key]
2265 user, passwd = self.prompt_user_passwd(host, realm)
2266 if user or passwd: self.auth_cache[key] = (user, passwd)
2267 return user, passwd
2268
2269 def prompt_user_passwd(self, host, realm):
2270 """Override this in a GUI environment!"""
2271 import getpass
2272 try:
2273 user = input("Enter username for %s at %s: " % (realm, host))
2274 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2275 (user, realm, host))
2276 return user, passwd
2277 except KeyboardInterrupt:
2278 print()
2279 return None, None
2280
2281
2282# Utility functions
2283
2284_localhost = None
2285def localhost():
2286 """Return the IP address of the magic hostname 'localhost'."""
2287 global _localhost
2288 if _localhost is None:
2289 _localhost = socket.gethostbyname('localhost')
2290 return _localhost
2291
2292_thishost = None
2293def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002294 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002295 global _thishost
2296 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002297 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002298 return _thishost
2299
2300_ftperrors = None
2301def ftperrors():
2302 """Return the set of errors raised by the FTP class."""
2303 global _ftperrors
2304 if _ftperrors is None:
2305 import ftplib
2306 _ftperrors = ftplib.all_errors
2307 return _ftperrors
2308
2309_noheaders = None
2310def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002311 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002312 global _noheaders
2313 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002314 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002315 return _noheaders
2316
2317
2318# Utility classes
2319
2320class ftpwrapper:
2321 """Class used by open_ftp() for cache of open FTP connections."""
2322
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002323 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2324 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002325 self.user = user
2326 self.passwd = passwd
2327 self.host = host
2328 self.port = port
2329 self.dirs = dirs
2330 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002331 self.refcount = 0
2332 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002333 self.init()
2334
2335 def init(self):
2336 import ftplib
2337 self.busy = 0
2338 self.ftp = ftplib.FTP()
2339 self.ftp.connect(self.host, self.port, self.timeout)
2340 self.ftp.login(self.user, self.passwd)
2341 for dir in self.dirs:
2342 self.ftp.cwd(dir)
2343
2344 def retrfile(self, file, type):
2345 import ftplib
2346 self.endtransfer()
2347 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2348 else: cmd = 'TYPE ' + type; isdir = 0
2349 try:
2350 self.ftp.voidcmd(cmd)
2351 except ftplib.all_errors:
2352 self.init()
2353 self.ftp.voidcmd(cmd)
2354 conn = None
2355 if file and not isdir:
2356 # Try to retrieve as a file
2357 try:
2358 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002359 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002360 except ftplib.error_perm as reason:
2361 if str(reason)[:3] != '550':
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002362 raise URLError('ftp error: %d' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002363 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002364 if not conn:
2365 # Set transfer mode to ASCII!
2366 self.ftp.voidcmd('TYPE A')
2367 # Try a directory listing. Verify that directory exists.
2368 if file:
2369 pwd = self.ftp.pwd()
2370 try:
2371 try:
2372 self.ftp.cwd(file)
2373 except ftplib.error_perm as reason:
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002374 raise URLError('ftp error: %d' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002375 finally:
2376 self.ftp.cwd(pwd)
2377 cmd = 'LIST ' + file
2378 else:
2379 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002380 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002381 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002382
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002383 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2384 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002385 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002386 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002387 return (ftpobj, retrlen)
2388
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002389 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002390 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002391
2392 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002393 self.keepalive = False
2394 if self.refcount <= 0:
2395 self.real_close()
2396
2397 def file_close(self):
2398 self.endtransfer()
2399 self.refcount -= 1
2400 if self.refcount <= 0 and not self.keepalive:
2401 self.real_close()
2402
2403 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002404 self.endtransfer()
2405 try:
2406 self.ftp.close()
2407 except ftperrors():
2408 pass
2409
2410# Proxy handling
2411def getproxies_environment():
2412 """Return a dictionary of scheme -> proxy server URL mappings.
2413
2414 Scan the environment for variables named <scheme>_proxy;
2415 this seems to be the standard convention. If you need a
2416 different way, you can pass a proxies dictionary to the
2417 [Fancy]URLopener constructor.
2418
2419 """
2420 proxies = {}
2421 for name, value in os.environ.items():
2422 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002423 if value and name[-6:] == '_proxy':
2424 proxies[name[:-6]] = value
2425 return proxies
2426
2427def proxy_bypass_environment(host):
2428 """Test if proxies should not be used for a particular host.
2429
2430 Checks the environment for a variable named no_proxy, which should
2431 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2432 """
2433 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2434 # '*' is special case for always bypass
2435 if no_proxy == '*':
2436 return 1
2437 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002438 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002439 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002440 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2441 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002442 if name and (hostonly.endswith(name) or host.endswith(name)):
2443 return 1
2444 # otherwise, don't bypass
2445 return 0
2446
2447
Ronald Oussorene72e1612011-03-14 18:15:25 -04002448# This code tests an OSX specific data structure but is testable on all
2449# platforms
2450def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2451 """
2452 Return True iff this host shouldn't be accessed using a proxy
2453
2454 This function uses the MacOSX framework SystemConfiguration
2455 to fetch the proxy information.
2456
2457 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2458 { 'exclude_simple': bool,
2459 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2460 }
2461 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002462 from fnmatch import fnmatch
2463
2464 hostonly, port = splitport(host)
2465
2466 def ip2num(ipAddr):
2467 parts = ipAddr.split('.')
2468 parts = list(map(int, parts))
2469 if len(parts) != 4:
2470 parts = (parts + [0, 0, 0, 0])[:4]
2471 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2472
2473 # Check for simple host names:
2474 if '.' not in host:
2475 if proxy_settings['exclude_simple']:
2476 return True
2477
2478 hostIP = None
2479
2480 for value in proxy_settings.get('exceptions', ()):
2481 # Items in the list are strings like these: *.local, 169.254/16
2482 if not value: continue
2483
2484 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2485 if m is not None:
2486 if hostIP is None:
2487 try:
2488 hostIP = socket.gethostbyname(hostonly)
2489 hostIP = ip2num(hostIP)
2490 except socket.error:
2491 continue
2492
2493 base = ip2num(m.group(1))
2494 mask = m.group(2)
2495 if mask is None:
2496 mask = 8 * (m.group(1).count('.') + 1)
2497 else:
2498 mask = int(mask[1:])
2499 mask = 32 - mask
2500
2501 if (hostIP >> mask) == (base >> mask):
2502 return True
2503
2504 elif fnmatch(host, value):
2505 return True
2506
2507 return False
2508
2509
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002510if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002511 from _scproxy import _get_proxy_settings, _get_proxies
2512
2513 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002514 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002515 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002516
2517 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002518 """Return a dictionary of scheme -> proxy server URL mappings.
2519
Ronald Oussoren84151202010-04-18 20:46:11 +00002520 This function uses the MacOSX framework SystemConfiguration
2521 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002522 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002523 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002524
Ronald Oussoren84151202010-04-18 20:46:11 +00002525
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002526
2527 def proxy_bypass(host):
2528 if getproxies_environment():
2529 return proxy_bypass_environment(host)
2530 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002531 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002532
2533 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002534 return getproxies_environment() or getproxies_macosx_sysconf()
2535
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002536
2537elif os.name == 'nt':
2538 def getproxies_registry():
2539 """Return a dictionary of scheme -> proxy server URL mappings.
2540
2541 Win32 uses the registry to store proxies.
2542
2543 """
2544 proxies = {}
2545 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002546 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002547 except ImportError:
2548 # Std module, so should be around - but you never know!
2549 return proxies
2550 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002551 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002552 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002553 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002554 'ProxyEnable')[0]
2555 if proxyEnable:
2556 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002557 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002558 'ProxyServer')[0])
2559 if '=' in proxyServer:
2560 # Per-protocol settings
2561 for p in proxyServer.split(';'):
2562 protocol, address = p.split('=', 1)
2563 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002564 if not re.match('^([^/:]+)://', address):
2565 address = '%s://%s' % (protocol, address)
2566 proxies[protocol] = address
2567 else:
2568 # Use one setting for all protocols
2569 if proxyServer[:5] == 'http:':
2570 proxies['http'] = proxyServer
2571 else:
2572 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002573 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002574 proxies['ftp'] = 'ftp://%s' % proxyServer
2575 internetSettings.Close()
2576 except (WindowsError, ValueError, TypeError):
2577 # Either registry key not found etc, or the value in an
2578 # unexpected format.
2579 # proxies already set up to be empty so nothing to do
2580 pass
2581 return proxies
2582
2583 def getproxies():
2584 """Return a dictionary of scheme -> proxy server URL mappings.
2585
2586 Returns settings gathered from the environment, if specified,
2587 or the registry.
2588
2589 """
2590 return getproxies_environment() or getproxies_registry()
2591
2592 def proxy_bypass_registry(host):
2593 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002594 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002595 except ImportError:
2596 # Std modules, so should be around - but you never know!
2597 return 0
2598 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002599 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002600 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002601 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002602 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002603 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002604 'ProxyOverride')[0])
2605 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2606 except WindowsError:
2607 return 0
2608 if not proxyEnable or not proxyOverride:
2609 return 0
2610 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002611 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002612 host = [rawHost]
2613 try:
2614 addr = socket.gethostbyname(rawHost)
2615 if addr != rawHost:
2616 host.append(addr)
2617 except socket.error:
2618 pass
2619 try:
2620 fqdn = socket.getfqdn(rawHost)
2621 if fqdn != rawHost:
2622 host.append(fqdn)
2623 except socket.error:
2624 pass
2625 # make a check value list from the registry entry: replace the
2626 # '<local>' string by the localhost entry and the corresponding
2627 # canonical entry.
2628 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002629 # now check if we match one of the registry values.
2630 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002631 if test == '<local>':
2632 if '.' not in rawHost:
2633 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002634 test = test.replace(".", r"\.") # mask dots
2635 test = test.replace("*", r".*") # change glob sequence
2636 test = test.replace("?", r".") # change glob char
2637 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002638 if re.match(test, val, re.I):
2639 return 1
2640 return 0
2641
2642 def proxy_bypass(host):
2643 """Return a dictionary of scheme -> proxy server URL mappings.
2644
2645 Returns settings gathered from the environment, if specified,
2646 or the registry.
2647
2648 """
2649 if getproxies_environment():
2650 return proxy_bypass_environment(host)
2651 else:
2652 return proxy_bypass_registry(host)
2653
2654else:
2655 # By default use environment variables
2656 getproxies = getproxies_environment
2657 proxy_bypass = proxy_bypass_environment