blob: d389fa9799dd7ea130f0295080a58f846d374a0a [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Senthil Kumarand95cc752010-08-08 11:27:53 +0000106 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000107from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108
109# check for SSL
110try:
111 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000112except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 _have_ssl = False
114else:
115 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800117__all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
123 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
124 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
125 'UnknownHandler', 'HTTPErrorProcessor',
126 # Functions
127 'urlopen', 'install_opener', 'build_opener',
128 'pathname2url', 'url2pathname', 'getproxies',
129 # Legacy interface
130 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
131]
132
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133# used in User-Agent header sent
134__version__ = sys.version[:3]
135
136_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000137def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200138 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000139 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200140 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000141 if not _have_ssl:
142 raise ValueError('SSL support not available')
143 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
144 context.options |= ssl.OP_NO_SSLv2
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200145 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000146 context.verify_mode = ssl.CERT_REQUIRED
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200147 if cafile or capath:
148 context.load_verify_locations(cafile, capath)
149 else:
150 context.set_default_verify_paths()
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000151 check_hostname = True
152 else:
153 check_hostname = False
154 https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
155 opener = build_opener(https_handler)
156 elif _opener is None:
157 _opener = opener = build_opener()
158 else:
159 opener = _opener
160 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000161
162def install_opener(opener):
163 global _opener
164 _opener = opener
165
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700166_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000167def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700168 """
169 Retrieve a URL into a temporary location on disk.
170
171 Requires a URL argument. If a filename is passed, it is used as
172 the temporary file location. The reporthook argument should be
173 a callable that accepts a block number, a read size, and the
174 total file size of the URL target. The data argument should be
175 valid URL encoded data.
176
177 If a filename is passed and the URL points to a local resource,
178 the result is a copy from local file to new file.
179
180 Returns a tuple containing the path to the newly created
181 data file as well as the resulting HTTPMessage object.
182 """
183 url_type, path = splittype(url)
184
185 with contextlib.closing(urlopen(url, data)) as fp:
186 headers = fp.info()
187
188 # Just return the local path and the "headers" for file://
189 # URLs. No sense in performing a copy unless requested.
190 if url_type == "file" and not filename:
191 return os.path.normpath(path), headers
192
193 # Handle temporary file setup.
194 if filename:
195 tfp = open(filename, 'wb')
196 else:
197 tfp = tempfile.NamedTemporaryFile(delete=False)
198 filename = tfp.name
199 _url_tempfiles.append(filename)
200
201 with tfp:
202 result = filename, headers
203 bs = 1024*8
204 size = -1
205 read = 0
206 blocknum = 0
207 if "content-length" in headers:
208 size = int(headers["Content-Length"])
209
210 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800211 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700212
213 while True:
214 block = fp.read(bs)
215 if not block:
216 break
217 read += len(block)
218 tfp.write(block)
219 blocknum += 1
220 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800221 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700222
223 if size >= 0 and read < size:
224 raise ContentTooShortError(
225 "retrieval incomplete: got only %i out of %i bytes"
226 % (read, size), result)
227
228 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000229
230def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700231 for temp_file in _url_tempfiles:
232 try:
233 os.unlink(temp_file)
234 except EnvironmentError:
235 pass
236
237 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000238 global _opener
239 if _opener:
240 _opener = None
241
242# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000243_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000244def request_host(request):
245 """Return request-host, as defined by RFC 2965.
246
247 Variation from RFC: returned value is lowercased, for convenient
248 comparison.
249
250 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000251 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000252 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000253 if host == "":
254 host = request.get_header("Host", "")
255
256 # remove port, if present
257 host = _cut_port_re.sub("", host, 1)
258 return host.lower()
259
260class Request:
261
262 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800263 origin_req_host=None, unverifiable=False,
264 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000265 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaran45ce4dc2012-07-08 02:08:48 -0700266 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800267 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000268 self.data = data
269 self.headers = {}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000270 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000271 for key, value in headers.items():
272 self.add_header(key, value)
273 self.unredirected_hdrs = {}
274 if origin_req_host is None:
275 origin_req_host = request_host(self)
276 self.origin_req_host = origin_req_host
277 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800278 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000279 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000280
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000281 def _parse(self):
282 self.type, rest = splittype(self.full_url)
283 if self.type is None:
284 raise ValueError("unknown url type: %s" % self.full_url)
285 self.host, self.selector = splithost(rest)
286 if self.host:
287 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000288
289 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800290 """Return a string indicating the HTTP request method."""
291 if self.method is not None:
292 return self.method
293 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 return "POST"
295 else:
296 return "GET"
297
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000298 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800299 if self.fragment:
300 return '%s#%s' % (self.full_url, self.fragment)
301 else:
302 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000303
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700304 # Begin deprecated methods
305
306 def add_data(self, data):
307 msg = "Request.add_data method is deprecated."
308 warnings.warn(msg, DeprecationWarning, stacklevel=1)
309 self.data = data
310
311 def has_data(self):
312 msg = "Request.has_data method is deprecated."
313 warnings.warn(msg, DeprecationWarning, stacklevel=1)
314 return self.data is not None
315
316 def get_data(self):
317 msg = "Request.get_data method is deprecated."
318 warnings.warn(msg, DeprecationWarning, stacklevel=1)
319 return self.data
320
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000321 def get_type(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700322 msg = "Request.get_type method is deprecated."
323 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000324 return self.type
325
326 def get_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700327 msg = "Request.get_host method is deprecated."
328 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000329 return self.host
330
331 def get_selector(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700332 msg = "Request.get_selector method is deprecated."
333 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000334 return self.selector
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000335
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000336 def is_unverifiable(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700337 msg = "Request.is_unverifiable method is deprecated."
338 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000339 return self.unverifiable
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000340
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000341 def get_origin_req_host(self):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700342 msg = "Request.get_origin_req_host method is deprecated."
343 warnings.warn(msg, DeprecationWarning, stacklevel=1)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000344 return self.origin_req_host
345
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000346 # End deprecated methods
347
348 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000349 if self.type == 'https' and not self._tunnel_host:
350 self._tunnel_host = self.host
351 else:
352 self.type= type
353 self.selector = self.full_url
354 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000355
356 def has_proxy(self):
357 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000358
359 def add_header(self, key, val):
360 # useful for something like authentication
361 self.headers[key.capitalize()] = val
362
363 def add_unredirected_header(self, key, val):
364 # will not be added to a redirected request
365 self.unredirected_hdrs[key.capitalize()] = val
366
367 def has_header(self, header_name):
368 return (header_name in self.headers or
369 header_name in self.unredirected_hdrs)
370
371 def get_header(self, header_name, default=None):
372 return self.headers.get(
373 header_name,
374 self.unredirected_hdrs.get(header_name, default))
375
376 def header_items(self):
377 hdrs = self.unredirected_hdrs.copy()
378 hdrs.update(self.headers)
379 return list(hdrs.items())
380
381class OpenerDirector:
382 def __init__(self):
383 client_version = "Python-urllib/%s" % __version__
384 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000385 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000386 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000387 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000388 self.handle_open = {}
389 self.handle_error = {}
390 self.process_response = {}
391 self.process_request = {}
392
393 def add_handler(self, handler):
394 if not hasattr(handler, "add_parent"):
395 raise TypeError("expected BaseHandler instance, got %r" %
396 type(handler))
397
398 added = False
399 for meth in dir(handler):
400 if meth in ["redirect_request", "do_open", "proxy_open"]:
401 # oops, coincidental match
402 continue
403
404 i = meth.find("_")
405 protocol = meth[:i]
406 condition = meth[i+1:]
407
408 if condition.startswith("error"):
409 j = condition.find("_") + i + 1
410 kind = meth[j+1:]
411 try:
412 kind = int(kind)
413 except ValueError:
414 pass
415 lookup = self.handle_error.get(protocol, {})
416 self.handle_error[protocol] = lookup
417 elif condition == "open":
418 kind = protocol
419 lookup = self.handle_open
420 elif condition == "response":
421 kind = protocol
422 lookup = self.process_response
423 elif condition == "request":
424 kind = protocol
425 lookup = self.process_request
426 else:
427 continue
428
429 handlers = lookup.setdefault(kind, [])
430 if handlers:
431 bisect.insort(handlers, handler)
432 else:
433 handlers.append(handler)
434 added = True
435
436 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000437 bisect.insort(self.handlers, handler)
438 handler.add_parent(self)
439
440 def close(self):
441 # Only exists for backwards compatibility.
442 pass
443
444 def _call_chain(self, chain, kind, meth_name, *args):
445 # Handlers raise an exception if no one else should try to handle
446 # the request, or return None if they can't but another handler
447 # could. Otherwise, they return the response.
448 handlers = chain.get(kind, ())
449 for handler in handlers:
450 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000451 result = func(*args)
452 if result is not None:
453 return result
454
455 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
456 # accept a URL or a Request object
457 if isinstance(fullurl, str):
458 req = Request(fullurl, data)
459 else:
460 req = fullurl
461 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000462 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000463
464 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000465 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000466
467 # pre-process request
468 meth_name = protocol+"_request"
469 for processor in self.process_request.get(protocol, []):
470 meth = getattr(processor, meth_name)
471 req = meth(req)
472
473 response = self._open(req, data)
474
475 # post-process response
476 meth_name = protocol+"_response"
477 for processor in self.process_response.get(protocol, []):
478 meth = getattr(processor, meth_name)
479 response = meth(req, response)
480
481 return response
482
483 def _open(self, req, data=None):
484 result = self._call_chain(self.handle_open, 'default',
485 'default_open', req)
486 if result:
487 return result
488
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000489 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000490 result = self._call_chain(self.handle_open, protocol, protocol +
491 '_open', req)
492 if result:
493 return result
494
495 return self._call_chain(self.handle_open, 'unknown',
496 'unknown_open', req)
497
498 def error(self, proto, *args):
499 if proto in ('http', 'https'):
500 # XXX http[s] protocols are special-cased
501 dict = self.handle_error['http'] # https is not different than http
502 proto = args[2] # YUCK!
503 meth_name = 'http_error_%s' % proto
504 http_err = 1
505 orig_args = args
506 else:
507 dict = self.handle_error
508 meth_name = proto + '_error'
509 http_err = 0
510 args = (dict, proto, meth_name) + args
511 result = self._call_chain(*args)
512 if result:
513 return result
514
515 if http_err:
516 args = (dict, 'default', 'http_error_default') + orig_args
517 return self._call_chain(*args)
518
519# XXX probably also want an abstract factory that knows when it makes
520# sense to skip a superclass in favor of a subclass and when it might
521# make sense to include both
522
523def build_opener(*handlers):
524 """Create an opener object from a list of handlers.
525
526 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000527 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000528
529 If any of the handlers passed as arguments are subclasses of the
530 default handlers, the default handlers will not be used.
531 """
532 def isclass(obj):
533 return isinstance(obj, type) or hasattr(obj, "__bases__")
534
535 opener = OpenerDirector()
536 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
537 HTTPDefaultErrorHandler, HTTPRedirectHandler,
538 FTPHandler, FileHandler, HTTPErrorProcessor]
539 if hasattr(http.client, "HTTPSConnection"):
540 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000541 skip = set()
542 for klass in default_classes:
543 for check in handlers:
544 if isclass(check):
545 if issubclass(check, klass):
546 skip.add(klass)
547 elif isinstance(check, klass):
548 skip.add(klass)
549 for klass in skip:
550 default_classes.remove(klass)
551
552 for klass in default_classes:
553 opener.add_handler(klass())
554
555 for h in handlers:
556 if isclass(h):
557 h = h()
558 opener.add_handler(h)
559 return opener
560
561class BaseHandler:
562 handler_order = 500
563
564 def add_parent(self, parent):
565 self.parent = parent
566
567 def close(self):
568 # Only exists for backwards compatibility
569 pass
570
571 def __lt__(self, other):
572 if not hasattr(other, "handler_order"):
573 # Try to preserve the old behavior of having custom classes
574 # inserted after default ones (works only for custom user
575 # classes which are not aware of handler_order).
576 return True
577 return self.handler_order < other.handler_order
578
579
580class HTTPErrorProcessor(BaseHandler):
581 """Process HTTP error responses."""
582 handler_order = 1000 # after all other processing
583
584 def http_response(self, request, response):
585 code, msg, hdrs = response.code, response.msg, response.info()
586
587 # According to RFC 2616, "2xx" code indicates that the client's
588 # request was successfully received, understood, and accepted.
589 if not (200 <= code < 300):
590 response = self.parent.error(
591 'http', request, response, code, msg, hdrs)
592
593 return response
594
595 https_response = http_response
596
597class HTTPDefaultErrorHandler(BaseHandler):
598 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000599 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600
601class HTTPRedirectHandler(BaseHandler):
602 # maximum number of redirections to any single URL
603 # this is needed because of the state that cookies introduce
604 max_repeats = 4
605 # maximum total number of redirections (regardless of URL) before
606 # assuming we're in a loop
607 max_redirections = 10
608
609 def redirect_request(self, req, fp, code, msg, headers, newurl):
610 """Return a Request or None in response to a redirect.
611
612 This is called by the http_error_30x methods when a
613 redirection response is received. If a redirection should
614 take place, return a new Request to allow http_error_30x to
615 perform the redirect. Otherwise, raise HTTPError if no-one
616 else should try to handle this url. Return None if you can't
617 but another Handler might.
618 """
619 m = req.get_method()
620 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
621 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000622 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000623
624 # Strictly (according to RFC 2616), 301 or 302 in response to
625 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000626 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000627 # essentially all clients do redirect in this case, so we do
628 # the same.
629 # be conciliant with URIs containing a space
630 newurl = newurl.replace(' ', '%20')
631 CONTENT_HEADERS = ("content-length", "content-type")
632 newheaders = dict((k, v) for k, v in req.headers.items()
633 if k.lower() not in CONTENT_HEADERS)
634 return Request(newurl,
635 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000636 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000637 unverifiable=True)
638
639 # Implementation note: To avoid the server sending us into an
640 # infinite loop, the request object needs to track what URLs we
641 # have already seen. Do this by adding a handler-specific
642 # attribute to the Request object.
643 def http_error_302(self, req, fp, code, msg, headers):
644 # Some servers (incorrectly) return multiple Location headers
645 # (so probably same goes for URI). Use first header.
646 if "location" in headers:
647 newurl = headers["location"]
648 elif "uri" in headers:
649 newurl = headers["uri"]
650 else:
651 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000652
653 # fix a possible malformed URL
654 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700655
656 # For security reasons we don't allow redirection to anything other
657 # than http, https or ftp.
658
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800659 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800660 raise HTTPError(
661 newurl, code,
662 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
663 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700664
Facundo Batistaf24802c2008-08-17 03:36:03 +0000665 if not urlparts.path:
666 urlparts = list(urlparts)
667 urlparts[2] = "/"
668 newurl = urlunparse(urlparts)
669
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000670 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000671
672 # XXX Probably want to forget about the state of the current
673 # request, although that might interact poorly with other
674 # handlers that also use handler-specific request attributes
675 new = self.redirect_request(req, fp, code, msg, headers, newurl)
676 if new is None:
677 return
678
679 # loop detection
680 # .redirect_dict has a key url if url was previously visited.
681 if hasattr(req, 'redirect_dict'):
682 visited = new.redirect_dict = req.redirect_dict
683 if (visited.get(newurl, 0) >= self.max_repeats or
684 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000685 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000686 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000687 else:
688 visited = new.redirect_dict = req.redirect_dict = {}
689 visited[newurl] = visited.get(newurl, 0) + 1
690
691 # Don't close the fp until we are sure that we won't use it
692 # with HTTPError.
693 fp.read()
694 fp.close()
695
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000696 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000697
698 http_error_301 = http_error_303 = http_error_307 = http_error_302
699
700 inf_msg = "The HTTP server returned a redirect error that would " \
701 "lead to an infinite loop.\n" \
702 "The last 30x error message was:\n"
703
704
705def _parse_proxy(proxy):
706 """Return (scheme, user, password, host/port) given a URL or an authority.
707
708 If a URL is supplied, it must have an authority (host:port) component.
709 According to RFC 3986, having an authority component means the URL must
710 have two slashes after the scheme:
711
712 >>> _parse_proxy('file:/ftp.example.com/')
713 Traceback (most recent call last):
714 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
715
716 The first three items of the returned tuple may be None.
717
718 Examples of authority parsing:
719
720 >>> _parse_proxy('proxy.example.com')
721 (None, None, None, 'proxy.example.com')
722 >>> _parse_proxy('proxy.example.com:3128')
723 (None, None, None, 'proxy.example.com:3128')
724
725 The authority component may optionally include userinfo (assumed to be
726 username:password):
727
728 >>> _parse_proxy('joe:password@proxy.example.com')
729 (None, 'joe', 'password', 'proxy.example.com')
730 >>> _parse_proxy('joe:password@proxy.example.com:3128')
731 (None, 'joe', 'password', 'proxy.example.com:3128')
732
733 Same examples, but with URLs instead:
734
735 >>> _parse_proxy('http://proxy.example.com/')
736 ('http', None, None, 'proxy.example.com')
737 >>> _parse_proxy('http://proxy.example.com:3128/')
738 ('http', None, None, 'proxy.example.com:3128')
739 >>> _parse_proxy('http://joe:password@proxy.example.com/')
740 ('http', 'joe', 'password', 'proxy.example.com')
741 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
742 ('http', 'joe', 'password', 'proxy.example.com:3128')
743
744 Everything after the authority is ignored:
745
746 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
747 ('ftp', 'joe', 'password', 'proxy.example.com')
748
749 Test for no trailing '/' case:
750
751 >>> _parse_proxy('http://joe:password@proxy.example.com')
752 ('http', 'joe', 'password', 'proxy.example.com')
753
754 """
Georg Brandl13e89462008-07-01 19:56:00 +0000755 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000756 if not r_scheme.startswith("/"):
757 # authority
758 scheme = None
759 authority = proxy
760 else:
761 # URL
762 if not r_scheme.startswith("//"):
763 raise ValueError("proxy URL with no authority: %r" % proxy)
764 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
765 # and 3.3.), path is empty or starts with '/'
766 end = r_scheme.find("/", 2)
767 if end == -1:
768 end = None
769 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000770 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000771 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000772 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000773 else:
774 user = password = None
775 return scheme, user, password, hostport
776
777class ProxyHandler(BaseHandler):
778 # Proxies must be in front
779 handler_order = 100
780
781 def __init__(self, proxies=None):
782 if proxies is None:
783 proxies = getproxies()
784 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
785 self.proxies = proxies
786 for type, url in proxies.items():
787 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200788 lambda r, proxy=url, type=type, meth=self.proxy_open:
789 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000790
791 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000792 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000793 proxy_type, user, password, hostport = _parse_proxy(proxy)
794 if proxy_type is None:
795 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000796
797 if req.host and proxy_bypass(req.host):
798 return None
799
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000800 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000801 user_pass = '%s:%s' % (unquote(user),
802 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 creds = base64.b64encode(user_pass.encode()).decode("ascii")
804 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000805 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000806 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000807 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000808 # let other handlers take care of it
809 return None
810 else:
811 # need to start over, because the other handlers don't
812 # grok the proxy's URL type
813 # e.g. if we have a constructor arg proxies like so:
814 # {'http': 'ftp://proxy.example.com'}, we may end up turning
815 # a request for http://acme.example.com/a into one for
816 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000817 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818
819class HTTPPasswordMgr:
820
821 def __init__(self):
822 self.passwd = {}
823
824 def add_password(self, realm, uri, user, passwd):
825 # uri could be a single URI or a sequence
826 if isinstance(uri, str):
827 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800828 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000829 self.passwd[realm] = {}
830 for default_port in True, False:
831 reduced_uri = tuple(
832 [self.reduce_uri(u, default_port) for u in uri])
833 self.passwd[realm][reduced_uri] = (user, passwd)
834
835 def find_user_password(self, realm, authuri):
836 domains = self.passwd.get(realm, {})
837 for default_port in True, False:
838 reduced_authuri = self.reduce_uri(authuri, default_port)
839 for uris, authinfo in domains.items():
840 for uri in uris:
841 if self.is_suburi(uri, reduced_authuri):
842 return authinfo
843 return None, None
844
845 def reduce_uri(self, uri, default_port=True):
846 """Accept authority or URI and extract only the authority and path."""
847 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000848 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000849 if parts[1]:
850 # URI
851 scheme = parts[0]
852 authority = parts[1]
853 path = parts[2] or '/'
854 else:
855 # host or host:port
856 scheme = None
857 authority = uri
858 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000859 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000860 if default_port and port is None and scheme is not None:
861 dport = {"http": 80,
862 "https": 443,
863 }.get(scheme)
864 if dport is not None:
865 authority = "%s:%d" % (host, dport)
866 return authority, path
867
868 def is_suburi(self, base, test):
869 """Check if test is below base in a URI tree
870
871 Both args must be URIs in reduced form.
872 """
873 if base == test:
874 return True
875 if base[0] != test[0]:
876 return False
877 common = posixpath.commonprefix((base[1], test[1]))
878 if len(common) == len(base[1]):
879 return True
880 return False
881
882
883class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
884
885 def find_user_password(self, realm, authuri):
886 user, password = HTTPPasswordMgr.find_user_password(self, realm,
887 authuri)
888 if user is not None:
889 return user, password
890 return HTTPPasswordMgr.find_user_password(self, None, authuri)
891
892
893class AbstractBasicAuthHandler:
894
895 # XXX this allows for multiple auth-schemes, but will stupidly pick
896 # the last one with a realm specified.
897
898 # allow for double- and single-quoted realm values
899 # (single quotes are a violation of the RFC, but appear in the wild)
900 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800901 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000902
903 # XXX could pre-emptively send auth info already accepted (RFC 2617,
904 # end of section 2, and section 1.2 immediately after "credentials"
905 # production).
906
907 def __init__(self, password_mgr=None):
908 if password_mgr is None:
909 password_mgr = HTTPPasswordMgr()
910 self.passwd = password_mgr
911 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000912 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000913
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000914 def reset_retry_count(self):
915 self.retried = 0
916
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000917 def http_error_auth_reqed(self, authreq, host, req, headers):
918 # host may be an authority (without userinfo) or a URL with an
919 # authority
920 # XXX could be multiple headers
921 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000922
923 if self.retried > 5:
924 # retry sending the username:password 5 times before failing.
925 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
926 headers, None)
927 else:
928 self.retried += 1
929
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000930 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800931 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800932 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800933 raise ValueError("AbstractBasicAuthHandler does not"
934 " support the following scheme: '%s'" %
935 scheme)
936 else:
937 mo = AbstractBasicAuthHandler.rx.search(authreq)
938 if mo:
939 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800940 if quote not in ['"',"'"]:
941 warnings.warn("Basic Auth Realm was unquoted",
942 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800943 if scheme.lower() == 'basic':
944 response = self.retry_http_basic_auth(host, req, realm)
945 if response and response.code != 401:
946 self.retried = 0
947 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000948
949 def retry_http_basic_auth(self, host, req, realm):
950 user, pw = self.passwd.find_user_password(realm, host)
951 if pw is not None:
952 raw = "%s:%s" % (user, pw)
953 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
954 if req.headers.get(self.auth_header, None) == auth:
955 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000956 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000957 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000958 else:
959 return None
960
961
962class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
963
964 auth_header = 'Authorization'
965
966 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000967 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000968 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000969 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000970 self.reset_retry_count()
971 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000972
973
974class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
975
976 auth_header = 'Proxy-authorization'
977
978 def http_error_407(self, req, fp, code, msg, headers):
979 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000980 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
982 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000983 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000984 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000985 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000986 self.reset_retry_count()
987 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000988
989
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800990# Return n random bytes.
991_randombytes = os.urandom
992
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000993
994class AbstractDigestAuthHandler:
995 # Digest authentication is specified in RFC 2617.
996
997 # XXX The client does not inspect the Authentication-Info header
998 # in a successful response.
999
1000 # XXX It should be possible to test this implementation against
1001 # a mock server that just generates a static set of challenges.
1002
1003 # XXX qop="auth-int" supports is shaky
1004
1005 def __init__(self, passwd=None):
1006 if passwd is None:
1007 passwd = HTTPPasswordMgr()
1008 self.passwd = passwd
1009 self.add_password = self.passwd.add_password
1010 self.retried = 0
1011 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001012 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001013
1014 def reset_retry_count(self):
1015 self.retried = 0
1016
1017 def http_error_auth_reqed(self, auth_header, host, req, headers):
1018 authreq = headers.get(auth_header, None)
1019 if self.retried > 5:
1020 # Don't fail endlessly - if we failed once, we'll probably
1021 # fail a second time. Hm. Unless the Password Manager is
1022 # prompting for the information. Crap. This isn't great
1023 # but it's better than the current 'repeat until recursion
1024 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001025 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001026 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001027 else:
1028 self.retried += 1
1029 if authreq:
1030 scheme = authreq.split()[0]
1031 if scheme.lower() == 'digest':
1032 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001033 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001034 raise ValueError("AbstractDigestAuthHandler does not support"
1035 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001036
1037 def retry_http_digest_auth(self, req, auth):
1038 token, challenge = auth.split(' ', 1)
1039 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1040 auth = self.get_authorization(req, chal)
1041 if auth:
1042 auth_val = 'Digest %s' % auth
1043 if req.headers.get(self.auth_header, None) == auth_val:
1044 return None
1045 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001046 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001047 return resp
1048
1049 def get_cnonce(self, nonce):
1050 # The cnonce-value is an opaque
1051 # quoted string value provided by the client and used by both client
1052 # and server to avoid chosen plaintext attacks, to provide mutual
1053 # authentication, and to provide some message integrity protection.
1054 # This isn't a fabulous effort, but it's probably Good Enough.
1055 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001056 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001057 dig = hashlib.sha1(b).hexdigest()
1058 return dig[:16]
1059
1060 def get_authorization(self, req, chal):
1061 try:
1062 realm = chal['realm']
1063 nonce = chal['nonce']
1064 qop = chal.get('qop')
1065 algorithm = chal.get('algorithm', 'MD5')
1066 # mod_digest doesn't send an opaque, even though it isn't
1067 # supposed to be optional
1068 opaque = chal.get('opaque', None)
1069 except KeyError:
1070 return None
1071
1072 H, KD = self.get_algorithm_impls(algorithm)
1073 if H is None:
1074 return None
1075
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001076 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001077 if user is None:
1078 return None
1079
1080 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001081 if req.data is not None:
1082 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001083 else:
1084 entdig = None
1085
1086 A1 = "%s:%s:%s" % (user, realm, pw)
1087 A2 = "%s:%s" % (req.get_method(),
1088 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001089 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001091 if nonce == self.last_nonce:
1092 self.nonce_count += 1
1093 else:
1094 self.nonce_count = 1
1095 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 ncvalue = '%08x' % self.nonce_count
1097 cnonce = self.get_cnonce(nonce)
1098 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1099 respdig = KD(H(A1), noncebit)
1100 elif qop is None:
1101 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1102 else:
1103 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001104 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001105
1106 # XXX should the partial digests be encoded too?
1107
1108 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001109 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001110 respdig)
1111 if opaque:
1112 base += ', opaque="%s"' % opaque
1113 if entdig:
1114 base += ', digest="%s"' % entdig
1115 base += ', algorithm="%s"' % algorithm
1116 if qop:
1117 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1118 return base
1119
1120 def get_algorithm_impls(self, algorithm):
1121 # lambdas assume digest modules are imported at the top level
1122 if algorithm == 'MD5':
1123 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1124 elif algorithm == 'SHA':
1125 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1126 # XXX MD5-sess
1127 KD = lambda s, d: H("%s:%s" % (s, d))
1128 return H, KD
1129
1130 def get_entity_digest(self, data, chal):
1131 # XXX not implemented yet
1132 return None
1133
1134
1135class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1136 """An authentication protocol defined by RFC 2069
1137
1138 Digest authentication improves on basic authentication because it
1139 does not transmit passwords in the clear.
1140 """
1141
1142 auth_header = 'Authorization'
1143 handler_order = 490 # before Basic auth
1144
1145 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001146 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001147 retry = self.http_error_auth_reqed('www-authenticate',
1148 host, req, headers)
1149 self.reset_retry_count()
1150 return retry
1151
1152
1153class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1154
1155 auth_header = 'Proxy-Authorization'
1156 handler_order = 490 # before Basic auth
1157
1158 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001159 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 retry = self.http_error_auth_reqed('proxy-authenticate',
1161 host, req, headers)
1162 self.reset_retry_count()
1163 return retry
1164
1165class AbstractHTTPHandler(BaseHandler):
1166
1167 def __init__(self, debuglevel=0):
1168 self._debuglevel = debuglevel
1169
1170 def set_http_debuglevel(self, level):
1171 self._debuglevel = level
1172
1173 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001174 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001175 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001176 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001177
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001178 if request.data is not None: # POST
1179 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001180 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001181 msg = "POST data should be bytes or an iterable of bytes. " \
1182 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001183 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001184 if not request.has_header('Content-type'):
1185 request.add_unredirected_header(
1186 'Content-type',
1187 'application/x-www-form-urlencoded')
1188 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001189 try:
1190 mv = memoryview(data)
1191 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001192 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001193 raise ValueError("Content-Length should be specified "
1194 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001195 data))
1196 else:
1197 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001198 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001199
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001200 sel_host = host
1201 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001202 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001203 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001204 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001205 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001206 for name, value in self.parent.addheaders:
1207 name = name.capitalize()
1208 if not request.has_header(name):
1209 request.add_unredirected_header(name, value)
1210
1211 return request
1212
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001213 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001214 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215
1216 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001218 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001219 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001220 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001221
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001222 # will parse host:port
1223 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001224
1225 headers = dict(req.unredirected_hdrs)
1226 headers.update(dict((k, v) for k, v in req.headers.items()
1227 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001228
1229 # TODO(jhylton): Should this be redesigned to handle
1230 # persistent connections?
1231
1232 # We want to make an HTTP/1.1 request, but the addinfourl
1233 # class isn't prepared to deal with a persistent connection.
1234 # It will try to read all remaining data from the socket,
1235 # which will block while the server waits for the next request.
1236 # So make sure the connection gets closed after the (only)
1237 # request.
1238 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001239 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001240
1241 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001242 tunnel_headers = {}
1243 proxy_auth_hdr = "Proxy-Authorization"
1244 if proxy_auth_hdr in headers:
1245 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1246 # Proxy-Authorization should not be sent to origin
1247 # server.
1248 del headers[proxy_auth_hdr]
1249 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001250
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001251 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001252 h.request(req.get_method(), req.selector, req.data, headers)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +08001253 except socket.error as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001254 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001255 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001256 else:
1257 r = h.getresponse()
Nadeem Vawdabd26b542012-10-21 17:37:43 +02001258 # If the server does not send us a 'Connection: close' header,
1259 # HTTPConnection assumes the socket should be left open. Manually
1260 # mark the socket to be closed when this response object goes away.
1261 if h.sock:
1262 h.sock.close()
1263 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001264
Senthil Kumaran26430412011-04-13 07:01:19 +08001265 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001266 # This line replaces the .msg attribute of the HTTPResponse
1267 # with .headers, because urllib clients expect the response to
1268 # have the reason in .msg. It would be good to mark this
1269 # attribute is deprecated and get then to use info() or
1270 # .headers.
1271 r.msg = r.reason
1272 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001273
1274
1275class HTTPHandler(AbstractHTTPHandler):
1276
1277 def http_open(self, req):
1278 return self.do_open(http.client.HTTPConnection, req)
1279
1280 http_request = AbstractHTTPHandler.do_request_
1281
1282if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001283
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001284 class HTTPSHandler(AbstractHTTPHandler):
1285
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001286 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1287 AbstractHTTPHandler.__init__(self, debuglevel)
1288 self._context = context
1289 self._check_hostname = check_hostname
1290
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001291 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001292 return self.do_open(http.client.HTTPSConnection, req,
1293 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001294
1295 https_request = AbstractHTTPHandler.do_request_
1296
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001297 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001298
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001299class HTTPCookieProcessor(BaseHandler):
1300 def __init__(self, cookiejar=None):
1301 import http.cookiejar
1302 if cookiejar is None:
1303 cookiejar = http.cookiejar.CookieJar()
1304 self.cookiejar = cookiejar
1305
1306 def http_request(self, request):
1307 self.cookiejar.add_cookie_header(request)
1308 return request
1309
1310 def http_response(self, request, response):
1311 self.cookiejar.extract_cookies(response, request)
1312 return response
1313
1314 https_request = http_request
1315 https_response = http_response
1316
1317class UnknownHandler(BaseHandler):
1318 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001319 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001320 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001321
1322def parse_keqv_list(l):
1323 """Parse list of key=value strings where keys are not duplicated."""
1324 parsed = {}
1325 for elt in l:
1326 k, v = elt.split('=', 1)
1327 if v[0] == '"' and v[-1] == '"':
1328 v = v[1:-1]
1329 parsed[k] = v
1330 return parsed
1331
1332def parse_http_list(s):
1333 """Parse lists as described by RFC 2068 Section 2.
1334
1335 In particular, parse comma-separated lists where the elements of
1336 the list may include quoted-strings. A quoted-string could
1337 contain a comma. A non-quoted string could have quotes in the
1338 middle. Neither commas nor quotes count if they are escaped.
1339 Only double-quotes count, not single-quotes.
1340 """
1341 res = []
1342 part = ''
1343
1344 escape = quote = False
1345 for cur in s:
1346 if escape:
1347 part += cur
1348 escape = False
1349 continue
1350 if quote:
1351 if cur == '\\':
1352 escape = True
1353 continue
1354 elif cur == '"':
1355 quote = False
1356 part += cur
1357 continue
1358
1359 if cur == ',':
1360 res.append(part)
1361 part = ''
1362 continue
1363
1364 if cur == '"':
1365 quote = True
1366
1367 part += cur
1368
1369 # append last part
1370 if part:
1371 res.append(part)
1372
1373 return [part.strip() for part in res]
1374
1375class FileHandler(BaseHandler):
1376 # Use local file or FTP depending on form of URL
1377 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001378 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001379 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1380 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001381 if not req.host is self.get_names():
1382 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001383 else:
1384 return self.open_local_file(req)
1385
1386 # names for the localhost
1387 names = None
1388 def get_names(self):
1389 if FileHandler.names is None:
1390 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001391 FileHandler.names = tuple(
1392 socket.gethostbyname_ex('localhost')[2] +
1393 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001394 except socket.gaierror:
1395 FileHandler.names = (socket.gethostbyname('localhost'),)
1396 return FileHandler.names
1397
1398 # not entirely sure what the rules are here
1399 def open_local_file(self, req):
1400 import email.utils
1401 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001402 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001403 filename = req.selector
1404 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001405 try:
1406 stats = os.stat(localfile)
1407 size = stats.st_size
1408 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001409 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001410 headers = email.message_from_string(
1411 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1412 (mtype or 'text/plain', size, modified))
1413 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001414 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001415 if not host or \
1416 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001417 if host:
1418 origurl = 'file://' + host + filename
1419 else:
1420 origurl = 'file://' + filename
1421 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001422 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001423 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001424 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001425 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001426
1427def _safe_gethostbyname(host):
1428 try:
1429 return socket.gethostbyname(host)
1430 except socket.gaierror:
1431 return None
1432
1433class FTPHandler(BaseHandler):
1434 def ftp_open(self, req):
1435 import ftplib
1436 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001437 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001438 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001439 raise URLError('ftp error: no host given')
1440 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001441 if port is None:
1442 port = ftplib.FTP_PORT
1443 else:
1444 port = int(port)
1445
1446 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001447 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001448 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001449 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001450 else:
1451 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001452 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001453 user = user or ''
1454 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001455
1456 try:
1457 host = socket.gethostbyname(host)
1458 except socket.error as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001459 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001460 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001461 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001462 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001463 dirs, file = dirs[:-1], dirs[-1]
1464 if dirs and not dirs[0]:
1465 dirs = dirs[1:]
1466 try:
1467 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1468 type = file and 'I' or 'D'
1469 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001470 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001471 if attr.lower() == 'type' and \
1472 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1473 type = value.upper()
1474 fp, retrlen = fw.retrfile(file, type)
1475 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001476 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001477 if mtype:
1478 headers += "Content-type: %s\n" % mtype
1479 if retrlen is not None and retrlen >= 0:
1480 headers += "Content-length: %d\n" % retrlen
1481 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001482 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001483 except ftplib.all_errors as exp:
1484 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001485 raise exc.with_traceback(sys.exc_info()[2])
1486
1487 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001488 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1489 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001490
1491class CacheFTPHandler(FTPHandler):
1492 # XXX would be nice to have pluggable cache strategies
1493 # XXX this stuff is definitely not thread safe
1494 def __init__(self):
1495 self.cache = {}
1496 self.timeout = {}
1497 self.soonest = 0
1498 self.delay = 60
1499 self.max_conns = 16
1500
1501 def setTimeout(self, t):
1502 self.delay = t
1503
1504 def setMaxConns(self, m):
1505 self.max_conns = m
1506
1507 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1508 key = user, host, port, '/'.join(dirs), timeout
1509 if key in self.cache:
1510 self.timeout[key] = time.time() + self.delay
1511 else:
1512 self.cache[key] = ftpwrapper(user, passwd, host, port,
1513 dirs, timeout)
1514 self.timeout[key] = time.time() + self.delay
1515 self.check_cache()
1516 return self.cache[key]
1517
1518 def check_cache(self):
1519 # first check for old ones
1520 t = time.time()
1521 if self.soonest <= t:
1522 for k, v in list(self.timeout.items()):
1523 if v < t:
1524 self.cache[k].close()
1525 del self.cache[k]
1526 del self.timeout[k]
1527 self.soonest = min(list(self.timeout.values()))
1528
1529 # then check the size
1530 if len(self.cache) == self.max_conns:
1531 for k, v in list(self.timeout.items()):
1532 if v == self.soonest:
1533 del self.cache[k]
1534 del self.timeout[k]
1535 break
1536 self.soonest = min(list(self.timeout.values()))
1537
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001538 def clear_cache(self):
1539 for conn in self.cache.values():
1540 conn.close()
1541 self.cache.clear()
1542 self.timeout.clear()
1543
1544
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001545# Code move from the old urllib module
1546
1547MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1548
1549# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001550if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001551 from nturl2path import url2pathname, pathname2url
1552else:
1553 def url2pathname(pathname):
1554 """OS-specific conversion from a relative URL of the 'file' scheme
1555 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001556 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001557
1558 def pathname2url(pathname):
1559 """OS-specific conversion from a file system path to a relative URL
1560 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001561 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001562
1563# This really consists of two pieces:
1564# (1) a class which handles opening of all sorts of URLs
1565# (plus assorted utilities etc.)
1566# (2) a set of functions for parsing URLs
1567# XXX Should these be separated out into different modules?
1568
1569
1570ftpcache = {}
1571class URLopener:
1572 """Class to open URLs.
1573 This is a class rather than just a subroutine because we may need
1574 more than one set of global protocol-specific options.
1575 Note -- this is a base class for those who don't want the
1576 automatic handling of errors type 302 (relocated) and 401
1577 (authorization needed)."""
1578
1579 __tempfiles = None
1580
1581 version = "Python-urllib/%s" % __version__
1582
1583 # Constructor
1584 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001585 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001586 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1587 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001588 if proxies is None:
1589 proxies = getproxies()
1590 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1591 self.proxies = proxies
1592 self.key_file = x509.get('key_file')
1593 self.cert_file = x509.get('cert_file')
1594 self.addheaders = [('User-Agent', self.version)]
1595 self.__tempfiles = []
1596 self.__unlink = os.unlink # See cleanup()
1597 self.tempcache = None
1598 # Undocumented feature: if you assign {} to tempcache,
1599 # it is used to cache files retrieved with
1600 # self.retrieve(). This is not enabled by default
1601 # since it does not work for changing documents (and I
1602 # haven't got the logic to check expiration headers
1603 # yet).
1604 self.ftpcache = ftpcache
1605 # Undocumented feature: you can use a different
1606 # ftp cache by assigning to the .ftpcache member;
1607 # in case you want logically independent URL openers
1608 # XXX This is not threadsafe. Bah.
1609
1610 def __del__(self):
1611 self.close()
1612
1613 def close(self):
1614 self.cleanup()
1615
1616 def cleanup(self):
1617 # This code sometimes runs when the rest of this module
1618 # has already been deleted, so it can't use any globals
1619 # or import anything.
1620 if self.__tempfiles:
1621 for file in self.__tempfiles:
1622 try:
1623 self.__unlink(file)
1624 except OSError:
1625 pass
1626 del self.__tempfiles[:]
1627 if self.tempcache:
1628 self.tempcache.clear()
1629
1630 def addheader(self, *args):
1631 """Add a header to be used by the HTTP interface only
1632 e.g. u.addheader('Accept', 'sound/basic')"""
1633 self.addheaders.append(args)
1634
1635 # External interface
1636 def open(self, fullurl, data=None):
1637 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001638 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001639 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001640 if self.tempcache and fullurl in self.tempcache:
1641 filename, headers = self.tempcache[fullurl]
1642 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001643 return addinfourl(fp, headers, fullurl)
1644 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001645 if not urltype:
1646 urltype = 'file'
1647 if urltype in self.proxies:
1648 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001649 urltype, proxyhost = splittype(proxy)
1650 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001651 url = (host, fullurl) # Signal special case to open_*()
1652 else:
1653 proxy = None
1654 name = 'open_' + urltype
1655 self.type = urltype
1656 name = name.replace('-', '_')
1657 if not hasattr(self, name):
1658 if proxy:
1659 return self.open_unknown_proxy(proxy, fullurl, data)
1660 else:
1661 return self.open_unknown(fullurl, data)
1662 try:
1663 if data is None:
1664 return getattr(self, name)(url)
1665 else:
1666 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001667 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001668 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001669 except socket.error as msg:
1670 raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
1671
1672 def open_unknown(self, fullurl, data=None):
1673 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001674 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001675 raise IOError('url error', 'unknown url type', type)
1676
1677 def open_unknown_proxy(self, proxy, fullurl, data=None):
1678 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001679 type, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001680 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1681
1682 # External interface
1683 def retrieve(self, url, filename=None, reporthook=None, data=None):
1684 """retrieve(url) returns (filename, headers) for a local object
1685 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001686 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001687 if self.tempcache and url in self.tempcache:
1688 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001689 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001690 if filename is None and (not type or type == 'file'):
1691 try:
1692 fp = self.open_local_file(url1)
1693 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001694 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001695 return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001696 except IOError as msg:
1697 pass
1698 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001699 try:
1700 headers = fp.info()
1701 if filename:
1702 tfp = open(filename, 'wb')
1703 else:
1704 import tempfile
1705 garbage, path = splittype(url)
1706 garbage, path = splithost(path or "")
1707 path, garbage = splitquery(path or "")
1708 path, garbage = splitattr(path or "")
1709 suffix = os.path.splitext(path)[1]
1710 (fd, filename) = tempfile.mkstemp(suffix)
1711 self.__tempfiles.append(filename)
1712 tfp = os.fdopen(fd, 'wb')
1713 try:
1714 result = filename, headers
1715 if self.tempcache is not None:
1716 self.tempcache[url] = result
1717 bs = 1024*8
1718 size = -1
1719 read = 0
1720 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001721 if "content-length" in headers:
1722 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001723 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001724 reporthook(blocknum, bs, size)
1725 while 1:
1726 block = fp.read(bs)
1727 if not block:
1728 break
1729 read += len(block)
1730 tfp.write(block)
1731 blocknum += 1
1732 if reporthook:
1733 reporthook(blocknum, bs, size)
1734 finally:
1735 tfp.close()
1736 finally:
1737 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001738
1739 # raise exception if actual size does not match content-length header
1740 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001741 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001742 "retrieval incomplete: got only %i out of %i bytes"
1743 % (read, size), result)
1744
1745 return result
1746
1747 # Each method named open_<type> knows how to open that type of URL
1748
1749 def _open_generic_http(self, connection_factory, url, data):
1750 """Make an HTTP connection using connection_class.
1751
1752 This is an internal method that should be called from
1753 open_http() or open_https().
1754
1755 Arguments:
1756 - connection_factory should take a host name and return an
1757 HTTPConnection instance.
1758 - url is the url to retrieval or a host, relative-path pair.
1759 - data is payload for a POST request or None.
1760 """
1761
1762 user_passwd = None
1763 proxy_passwd= None
1764 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001765 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001766 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001767 user_passwd, host = splituser(host)
1768 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001769 realhost = host
1770 else:
1771 host, selector = url
1772 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001773 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001774 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001775 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001776 url = rest
1777 user_passwd = None
1778 if urltype.lower() != 'http':
1779 realhost = None
1780 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001781 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001782 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001783 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001784 if user_passwd:
1785 selector = "%s://%s%s" % (urltype, realhost, rest)
1786 if proxy_bypass(realhost):
1787 host = realhost
1788
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001789 if not host: raise IOError('http error', 'no host given')
1790
1791 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001792 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001793 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001794 else:
1795 proxy_auth = None
1796
1797 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001798 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001799 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001800 else:
1801 auth = None
1802 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001803 headers = {}
1804 if proxy_auth:
1805 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1806 if auth:
1807 headers["Authorization"] = "Basic %s" % auth
1808 if realhost:
1809 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001810
1811 # Add Connection:close as we don't support persistent connections yet.
1812 # This helps in closing the socket and avoiding ResourceWarning
1813
1814 headers["Connection"] = "close"
1815
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001816 for header, value in self.addheaders:
1817 headers[header] = value
1818
1819 if data is not None:
1820 headers["Content-Type"] = "application/x-www-form-urlencoded"
1821 http_conn.request("POST", selector, data, headers)
1822 else:
1823 http_conn.request("GET", selector, headers=headers)
1824
1825 try:
1826 response = http_conn.getresponse()
1827 except http.client.BadStatusLine:
1828 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001829 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001830
1831 # According to RFC 2616, "2xx" code indicates that the client's
1832 # request was successfully received, understood, and accepted.
1833 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001834 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001835 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001836 else:
1837 return self.http_error(
1838 url, response.fp,
1839 response.status, response.reason, response.msg, data)
1840
1841 def open_http(self, url, data=None):
1842 """Use HTTP protocol."""
1843 return self._open_generic_http(http.client.HTTPConnection, url, data)
1844
1845 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1846 """Handle http errors.
1847
1848 Derived class can override this, or provide specific handlers
1849 named http_error_DDD where DDD is the 3-digit error code."""
1850 # First check if there's a specific handler for this error
1851 name = 'http_error_%d' % errcode
1852 if hasattr(self, name):
1853 method = getattr(self, name)
1854 if data is None:
1855 result = method(url, fp, errcode, errmsg, headers)
1856 else:
1857 result = method(url, fp, errcode, errmsg, headers, data)
1858 if result: return result
1859 return self.http_error_default(url, fp, errcode, errmsg, headers)
1860
1861 def http_error_default(self, url, fp, errcode, errmsg, headers):
1862 """Default error handler: close the connection and raise IOError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001863 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001864 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001865
1866 if _have_ssl:
1867 def _https_connection(self, host):
1868 return http.client.HTTPSConnection(host,
1869 key_file=self.key_file,
1870 cert_file=self.cert_file)
1871
1872 def open_https(self, url, data=None):
1873 """Use HTTPS protocol."""
1874 return self._open_generic_http(self._https_connection, url, data)
1875
1876 def open_file(self, url):
1877 """Use local file or FTP depending on form of URL."""
1878 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001879 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001880 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001881 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001882 else:
1883 return self.open_local_file(url)
1884
1885 def open_local_file(self, url):
1886 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001887 import email.utils
1888 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001889 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001890 localname = url2pathname(file)
1891 try:
1892 stats = os.stat(localname)
1893 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001894 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001895 size = stats.st_size
1896 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1897 mtype = mimetypes.guess_type(url)[0]
1898 headers = email.message_from_string(
1899 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1900 (mtype or 'text/plain', size, modified))
1901 if not host:
1902 urlfile = file
1903 if file[:1] == '/':
1904 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001905 return addinfourl(open(localname, 'rb'), headers, urlfile)
1906 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001907 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001908 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001909 urlfile = file
1910 if file[:1] == '/':
1911 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001912 elif file[:2] == './':
1913 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001914 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001915 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001916
1917 def open_ftp(self, url):
1918 """Use FTP protocol."""
1919 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001920 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001921 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001922 host, path = splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001923 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001924 host, port = splitport(host)
1925 user, host = splituser(host)
1926 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001927 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001928 host = unquote(host)
1929 user = unquote(user or '')
1930 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001931 host = socket.gethostbyname(host)
1932 if not port:
1933 import ftplib
1934 port = ftplib.FTP_PORT
1935 else:
1936 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001937 path, attrs = splitattr(path)
1938 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001939 dirs = path.split('/')
1940 dirs, file = dirs[:-1], dirs[-1]
1941 if dirs and not dirs[0]: dirs = dirs[1:]
1942 if dirs and not dirs[0]: dirs[0] = '/'
1943 key = user, host, port, '/'.join(dirs)
1944 # XXX thread unsafe!
1945 if len(self.ftpcache) > MAXFTPCACHE:
1946 # Prune the cache, rather arbitrarily
1947 for k in self.ftpcache.keys():
1948 if k != key:
1949 v = self.ftpcache[k]
1950 del self.ftpcache[k]
1951 v.close()
1952 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001953 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001954 self.ftpcache[key] = \
1955 ftpwrapper(user, passwd, host, port, dirs)
1956 if not file: type = 'D'
1957 else: type = 'I'
1958 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001959 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001960 if attr.lower() == 'type' and \
1961 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1962 type = value.upper()
1963 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1964 mtype = mimetypes.guess_type("ftp:" + url)[0]
1965 headers = ""
1966 if mtype:
1967 headers += "Content-Type: %s\n" % mtype
1968 if retrlen is not None and retrlen >= 0:
1969 headers += "Content-Length: %d\n" % retrlen
1970 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001971 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001972 except ftperrors() as exp:
1973 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001974
1975 def open_data(self, url, data=None):
1976 """Use "data" URL."""
1977 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001978 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001979 # ignore POSTed data
1980 #
1981 # syntax of data URLs:
1982 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1983 # mediatype := [ type "/" subtype ] *( ";" parameter )
1984 # data := *urlchar
1985 # parameter := attribute "=" value
1986 try:
1987 [type, data] = url.split(',', 1)
1988 except ValueError:
1989 raise IOError('data error', 'bad data URL')
1990 if not type:
1991 type = 'text/plain;charset=US-ASCII'
1992 semi = type.rfind(';')
1993 if semi >= 0 and '=' not in type[semi:]:
1994 encoding = type[semi+1:]
1995 type = type[:semi]
1996 else:
1997 encoding = ''
1998 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00001999 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002000 time.gmtime(time.time())))
2001 msg.append('Content-type: %s' % type)
2002 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002003 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002004 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002005 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002006 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002007 msg.append('Content-Length: %d' % len(data))
2008 msg.append('')
2009 msg.append(data)
2010 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002011 headers = email.message_from_string(msg)
2012 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002013 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002014 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002015
2016
2017class FancyURLopener(URLopener):
2018 """Derived class with handlers for errors we can handle (perhaps)."""
2019
2020 def __init__(self, *args, **kwargs):
2021 URLopener.__init__(self, *args, **kwargs)
2022 self.auth_cache = {}
2023 self.tries = 0
2024 self.maxtries = 10
2025
2026 def http_error_default(self, url, fp, errcode, errmsg, headers):
2027 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002028 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002029
2030 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2031 """Error 302 -- relocated (temporarily)."""
2032 self.tries += 1
2033 if self.maxtries and self.tries >= self.maxtries:
2034 if hasattr(self, "http_error_500"):
2035 meth = self.http_error_500
2036 else:
2037 meth = self.http_error_default
2038 self.tries = 0
2039 return meth(url, fp, 500,
2040 "Internal Server Error: Redirect Recursion", headers)
2041 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2042 data)
2043 self.tries = 0
2044 return result
2045
2046 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2047 if 'location' in headers:
2048 newurl = headers['location']
2049 elif 'uri' in headers:
2050 newurl = headers['uri']
2051 else:
2052 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002053 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002054
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002055 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002056 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002057
2058 urlparts = urlparse(newurl)
2059
2060 # For security reasons, we don't allow redirection to anything other
2061 # than http, https and ftp.
2062
2063 # We are using newer HTTPError with older redirect_internal method
2064 # This older method will get deprecated in 3.3
2065
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002066 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002067 raise HTTPError(newurl, errcode,
2068 errmsg +
2069 " Redirection to url '%s' is not allowed." % newurl,
2070 headers, fp)
2071
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002072 return self.open(newurl)
2073
2074 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2075 """Error 301 -- also relocated (permanently)."""
2076 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2077
2078 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2079 """Error 303 -- also relocated (essentially identical to 302)."""
2080 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2081
2082 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2083 """Error 307 -- relocated, but turn POST into error."""
2084 if data is None:
2085 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2086 else:
2087 return self.http_error_default(url, fp, errcode, errmsg, headers)
2088
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002089 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2090 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002091 """Error 401 -- authentication required.
2092 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002093 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002094 URLopener.http_error_default(self, url, fp,
2095 errcode, errmsg, headers)
2096 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002097 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2098 if not match:
2099 URLopener.http_error_default(self, url, fp,
2100 errcode, errmsg, headers)
2101 scheme, realm = match.groups()
2102 if scheme.lower() != 'basic':
2103 URLopener.http_error_default(self, url, fp,
2104 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002105 if not retry:
2106 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2107 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002108 name = 'retry_' + self.type + '_basic_auth'
2109 if data is None:
2110 return getattr(self,name)(url, realm)
2111 else:
2112 return getattr(self,name)(url, realm, data)
2113
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002114 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2115 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002116 """Error 407 -- proxy authentication required.
2117 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002118 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002119 URLopener.http_error_default(self, url, fp,
2120 errcode, errmsg, headers)
2121 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002122 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2123 if not match:
2124 URLopener.http_error_default(self, url, fp,
2125 errcode, errmsg, headers)
2126 scheme, realm = match.groups()
2127 if scheme.lower() != 'basic':
2128 URLopener.http_error_default(self, url, fp,
2129 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002130 if not retry:
2131 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2132 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002133 name = 'retry_proxy_' + self.type + '_basic_auth'
2134 if data is None:
2135 return getattr(self,name)(url, realm)
2136 else:
2137 return getattr(self,name)(url, realm, data)
2138
2139 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002140 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002141 newurl = 'http://' + host + selector
2142 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002143 urltype, proxyhost = splittype(proxy)
2144 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002145 i = proxyhost.find('@') + 1
2146 proxyhost = proxyhost[i:]
2147 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2148 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002149 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002150 quote(passwd, safe=''), proxyhost)
2151 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2152 if data is None:
2153 return self.open(newurl)
2154 else:
2155 return self.open(newurl, data)
2156
2157 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002158 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002159 newurl = 'https://' + host + selector
2160 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002161 urltype, proxyhost = splittype(proxy)
2162 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002163 i = proxyhost.find('@') + 1
2164 proxyhost = proxyhost[i:]
2165 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2166 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002167 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002168 quote(passwd, safe=''), proxyhost)
2169 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2170 if data is None:
2171 return self.open(newurl)
2172 else:
2173 return self.open(newurl, data)
2174
2175 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002176 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002177 i = host.find('@') + 1
2178 host = host[i:]
2179 user, passwd = self.get_user_passwd(host, realm, i)
2180 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002181 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002182 quote(passwd, safe=''), host)
2183 newurl = 'http://' + host + selector
2184 if data is None:
2185 return self.open(newurl)
2186 else:
2187 return self.open(newurl, data)
2188
2189 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002190 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002191 i = host.find('@') + 1
2192 host = host[i:]
2193 user, passwd = self.get_user_passwd(host, realm, i)
2194 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002195 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002196 quote(passwd, safe=''), host)
2197 newurl = 'https://' + host + selector
2198 if data is None:
2199 return self.open(newurl)
2200 else:
2201 return self.open(newurl, data)
2202
Florent Xicluna757445b2010-05-17 17:24:07 +00002203 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002204 key = realm + '@' + host.lower()
2205 if key in self.auth_cache:
2206 if clear_cache:
2207 del self.auth_cache[key]
2208 else:
2209 return self.auth_cache[key]
2210 user, passwd = self.prompt_user_passwd(host, realm)
2211 if user or passwd: self.auth_cache[key] = (user, passwd)
2212 return user, passwd
2213
2214 def prompt_user_passwd(self, host, realm):
2215 """Override this in a GUI environment!"""
2216 import getpass
2217 try:
2218 user = input("Enter username for %s at %s: " % (realm, host))
2219 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2220 (user, realm, host))
2221 return user, passwd
2222 except KeyboardInterrupt:
2223 print()
2224 return None, None
2225
2226
2227# Utility functions
2228
2229_localhost = None
2230def localhost():
2231 """Return the IP address of the magic hostname 'localhost'."""
2232 global _localhost
2233 if _localhost is None:
2234 _localhost = socket.gethostbyname('localhost')
2235 return _localhost
2236
2237_thishost = None
2238def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002239 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002240 global _thishost
2241 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002242 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002243 return _thishost
2244
2245_ftperrors = None
2246def ftperrors():
2247 """Return the set of errors raised by the FTP class."""
2248 global _ftperrors
2249 if _ftperrors is None:
2250 import ftplib
2251 _ftperrors = ftplib.all_errors
2252 return _ftperrors
2253
2254_noheaders = None
2255def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002256 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002257 global _noheaders
2258 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002259 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002260 return _noheaders
2261
2262
2263# Utility classes
2264
2265class ftpwrapper:
2266 """Class used by open_ftp() for cache of open FTP connections."""
2267
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002268 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2269 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002270 self.user = user
2271 self.passwd = passwd
2272 self.host = host
2273 self.port = port
2274 self.dirs = dirs
2275 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002276 self.refcount = 0
2277 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002278 self.init()
2279
2280 def init(self):
2281 import ftplib
2282 self.busy = 0
2283 self.ftp = ftplib.FTP()
2284 self.ftp.connect(self.host, self.port, self.timeout)
2285 self.ftp.login(self.user, self.passwd)
2286 for dir in self.dirs:
2287 self.ftp.cwd(dir)
2288
2289 def retrfile(self, file, type):
2290 import ftplib
2291 self.endtransfer()
2292 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2293 else: cmd = 'TYPE ' + type; isdir = 0
2294 try:
2295 self.ftp.voidcmd(cmd)
2296 except ftplib.all_errors:
2297 self.init()
2298 self.ftp.voidcmd(cmd)
2299 conn = None
2300 if file and not isdir:
2301 # Try to retrieve as a file
2302 try:
2303 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002304 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002305 except ftplib.error_perm as reason:
2306 if str(reason)[:3] != '550':
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002307 raise URLError('ftp error: %d' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002308 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002309 if not conn:
2310 # Set transfer mode to ASCII!
2311 self.ftp.voidcmd('TYPE A')
2312 # Try a directory listing. Verify that directory exists.
2313 if file:
2314 pwd = self.ftp.pwd()
2315 try:
2316 try:
2317 self.ftp.cwd(file)
2318 except ftplib.error_perm as reason:
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002319 raise URLError('ftp error: %d' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002320 finally:
2321 self.ftp.cwd(pwd)
2322 cmd = 'LIST ' + file
2323 else:
2324 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002325 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002326 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002327
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002328 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2329 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002330 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002331 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002332 return (ftpobj, retrlen)
2333
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002334 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002335 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002336
2337 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002338 self.keepalive = False
2339 if self.refcount <= 0:
2340 self.real_close()
2341
2342 def file_close(self):
2343 self.endtransfer()
2344 self.refcount -= 1
2345 if self.refcount <= 0 and not self.keepalive:
2346 self.real_close()
2347
2348 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002349 self.endtransfer()
2350 try:
2351 self.ftp.close()
2352 except ftperrors():
2353 pass
2354
2355# Proxy handling
2356def getproxies_environment():
2357 """Return a dictionary of scheme -> proxy server URL mappings.
2358
2359 Scan the environment for variables named <scheme>_proxy;
2360 this seems to be the standard convention. If you need a
2361 different way, you can pass a proxies dictionary to the
2362 [Fancy]URLopener constructor.
2363
2364 """
2365 proxies = {}
2366 for name, value in os.environ.items():
2367 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002368 if value and name[-6:] == '_proxy':
2369 proxies[name[:-6]] = value
2370 return proxies
2371
2372def proxy_bypass_environment(host):
2373 """Test if proxies should not be used for a particular host.
2374
2375 Checks the environment for a variable named no_proxy, which should
2376 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2377 """
2378 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2379 # '*' is special case for always bypass
2380 if no_proxy == '*':
2381 return 1
2382 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002383 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002384 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002385 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2386 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002387 if name and (hostonly.endswith(name) or host.endswith(name)):
2388 return 1
2389 # otherwise, don't bypass
2390 return 0
2391
2392
Ronald Oussorene72e1612011-03-14 18:15:25 -04002393# This code tests an OSX specific data structure but is testable on all
2394# platforms
2395def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2396 """
2397 Return True iff this host shouldn't be accessed using a proxy
2398
2399 This function uses the MacOSX framework SystemConfiguration
2400 to fetch the proxy information.
2401
2402 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2403 { 'exclude_simple': bool,
2404 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2405 }
2406 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002407 from fnmatch import fnmatch
2408
2409 hostonly, port = splitport(host)
2410
2411 def ip2num(ipAddr):
2412 parts = ipAddr.split('.')
2413 parts = list(map(int, parts))
2414 if len(parts) != 4:
2415 parts = (parts + [0, 0, 0, 0])[:4]
2416 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2417
2418 # Check for simple host names:
2419 if '.' not in host:
2420 if proxy_settings['exclude_simple']:
2421 return True
2422
2423 hostIP = None
2424
2425 for value in proxy_settings.get('exceptions', ()):
2426 # Items in the list are strings like these: *.local, 169.254/16
2427 if not value: continue
2428
2429 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2430 if m is not None:
2431 if hostIP is None:
2432 try:
2433 hostIP = socket.gethostbyname(hostonly)
2434 hostIP = ip2num(hostIP)
2435 except socket.error:
2436 continue
2437
2438 base = ip2num(m.group(1))
2439 mask = m.group(2)
2440 if mask is None:
2441 mask = 8 * (m.group(1).count('.') + 1)
2442 else:
2443 mask = int(mask[1:])
2444 mask = 32 - mask
2445
2446 if (hostIP >> mask) == (base >> mask):
2447 return True
2448
2449 elif fnmatch(host, value):
2450 return True
2451
2452 return False
2453
2454
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002455if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002456 from _scproxy import _get_proxy_settings, _get_proxies
2457
2458 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002459 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002460 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002461
2462 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463 """Return a dictionary of scheme -> proxy server URL mappings.
2464
Ronald Oussoren84151202010-04-18 20:46:11 +00002465 This function uses the MacOSX framework SystemConfiguration
2466 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002467 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002468 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002469
Ronald Oussoren84151202010-04-18 20:46:11 +00002470
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002471
2472 def proxy_bypass(host):
2473 if getproxies_environment():
2474 return proxy_bypass_environment(host)
2475 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002476 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002477
2478 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002479 return getproxies_environment() or getproxies_macosx_sysconf()
2480
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002481
2482elif os.name == 'nt':
2483 def getproxies_registry():
2484 """Return a dictionary of scheme -> proxy server URL mappings.
2485
2486 Win32 uses the registry to store proxies.
2487
2488 """
2489 proxies = {}
2490 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002491 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002492 except ImportError:
2493 # Std module, so should be around - but you never know!
2494 return proxies
2495 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002496 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002497 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002498 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002499 'ProxyEnable')[0]
2500 if proxyEnable:
2501 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002502 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002503 'ProxyServer')[0])
2504 if '=' in proxyServer:
2505 # Per-protocol settings
2506 for p in proxyServer.split(';'):
2507 protocol, address = p.split('=', 1)
2508 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002509 if not re.match('^([^/:]+)://', address):
2510 address = '%s://%s' % (protocol, address)
2511 proxies[protocol] = address
2512 else:
2513 # Use one setting for all protocols
2514 if proxyServer[:5] == 'http:':
2515 proxies['http'] = proxyServer
2516 else:
2517 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002518 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002519 proxies['ftp'] = 'ftp://%s' % proxyServer
2520 internetSettings.Close()
2521 except (WindowsError, ValueError, TypeError):
2522 # Either registry key not found etc, or the value in an
2523 # unexpected format.
2524 # proxies already set up to be empty so nothing to do
2525 pass
2526 return proxies
2527
2528 def getproxies():
2529 """Return a dictionary of scheme -> proxy server URL mappings.
2530
2531 Returns settings gathered from the environment, if specified,
2532 or the registry.
2533
2534 """
2535 return getproxies_environment() or getproxies_registry()
2536
2537 def proxy_bypass_registry(host):
2538 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002539 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002540 except ImportError:
2541 # Std modules, so should be around - but you never know!
2542 return 0
2543 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002544 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002545 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002546 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002547 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002548 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002549 'ProxyOverride')[0])
2550 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2551 except WindowsError:
2552 return 0
2553 if not proxyEnable or not proxyOverride:
2554 return 0
2555 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002556 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002557 host = [rawHost]
2558 try:
2559 addr = socket.gethostbyname(rawHost)
2560 if addr != rawHost:
2561 host.append(addr)
2562 except socket.error:
2563 pass
2564 try:
2565 fqdn = socket.getfqdn(rawHost)
2566 if fqdn != rawHost:
2567 host.append(fqdn)
2568 except socket.error:
2569 pass
2570 # make a check value list from the registry entry: replace the
2571 # '<local>' string by the localhost entry and the corresponding
2572 # canonical entry.
2573 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002574 # now check if we match one of the registry values.
2575 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002576 if test == '<local>':
2577 if '.' not in rawHost:
2578 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002579 test = test.replace(".", r"\.") # mask dots
2580 test = test.replace("*", r".*") # change glob sequence
2581 test = test.replace("?", r".") # change glob char
2582 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002583 if re.match(test, val, re.I):
2584 return 1
2585 return 0
2586
2587 def proxy_bypass(host):
2588 """Return a dictionary of scheme -> proxy server URL mappings.
2589
2590 Returns settings gathered from the environment, if specified,
2591 or the registry.
2592
2593 """
2594 if getproxies_environment():
2595 return proxy_bypass_environment(host)
2596 else:
2597 return proxy_bypass_registry(host)
2598
2599else:
2600 # By default use environment variables
2601 getproxies = getproxies_environment
2602 proxy_bypass = proxy_bypass_environment