blob: 17c92514d78068ee8ca8da1ed720f116fc9d28dc [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100106 splitattr, splitquery, splitvalue, splittag, to_bytes,
107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Senthil Kumaranc2958622010-11-22 04:48:26 +0000113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
124 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
Antoine Pitroudf204be2012-11-24 17:59:08 +0100125 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800126 'UnknownHandler', 'HTTPErrorProcessor',
127 # Functions
128 'urlopen', 'install_opener', 'build_opener',
129 'pathname2url', 'url2pathname', 'getproxies',
130 # Legacy interface
131 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
132]
133
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000134# used in User-Agent header sent
135__version__ = sys.version[:3]
136
137_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000138def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200139 *, cafile=None, capath=None, cadefault=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200141 if cafile or capath or cadefault:
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000142 if not _have_ssl:
143 raise ValueError('SSL support not available')
144 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
145 context.options |= ssl.OP_NO_SSLv2
Antoine Pitrou9a8d6932013-04-01 18:55:35 +0200146 context.verify_mode = ssl.CERT_REQUIRED
147 if cafile or capath:
148 context.load_verify_locations(cafile, capath)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000149 else:
Antoine Pitrou9a8d6932013-04-01 18:55:35 +0200150 context.set_default_verify_paths()
151 https_handler = HTTPSHandler(context=context, check_hostname=True)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000152 opener = build_opener(https_handler)
153 elif _opener is None:
154 _opener = opener = build_opener()
155 else:
156 opener = _opener
157 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000158
159def install_opener(opener):
160 global _opener
161 _opener = opener
162
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700163_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000164def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700165 """
166 Retrieve a URL into a temporary location on disk.
167
168 Requires a URL argument. If a filename is passed, it is used as
169 the temporary file location. The reporthook argument should be
170 a callable that accepts a block number, a read size, and the
171 total file size of the URL target. The data argument should be
172 valid URL encoded data.
173
174 If a filename is passed and the URL points to a local resource,
175 the result is a copy from local file to new file.
176
177 Returns a tuple containing the path to the newly created
178 data file as well as the resulting HTTPMessage object.
179 """
180 url_type, path = splittype(url)
181
182 with contextlib.closing(urlopen(url, data)) as fp:
183 headers = fp.info()
184
185 # Just return the local path and the "headers" for file://
186 # URLs. No sense in performing a copy unless requested.
187 if url_type == "file" and not filename:
188 return os.path.normpath(path), headers
189
190 # Handle temporary file setup.
191 if filename:
192 tfp = open(filename, 'wb')
193 else:
194 tfp = tempfile.NamedTemporaryFile(delete=False)
195 filename = tfp.name
196 _url_tempfiles.append(filename)
197
198 with tfp:
199 result = filename, headers
200 bs = 1024*8
201 size = -1
202 read = 0
203 blocknum = 0
204 if "content-length" in headers:
205 size = int(headers["Content-Length"])
206
207 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800208 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700209
210 while True:
211 block = fp.read(bs)
212 if not block:
213 break
214 read += len(block)
215 tfp.write(block)
216 blocknum += 1
217 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800218 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700219
220 if size >= 0 and read < size:
221 raise ContentTooShortError(
222 "retrieval incomplete: got only %i out of %i bytes"
223 % (read, size), result)
224
225 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000226
227def urlcleanup():
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700228 for temp_file in _url_tempfiles:
229 try:
230 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200231 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700232 pass
233
234 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000235 global _opener
236 if _opener:
237 _opener = None
238
239# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000240_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000241def request_host(request):
242 """Return request-host, as defined by RFC 2965.
243
244 Variation from RFC: returned value is lowercased, for convenient
245 comparison.
246
247 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000248 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000249 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000250 if host == "":
251 host = request.get_header("Host", "")
252
253 # remove port, if present
254 host = _cut_port_re.sub("", host, 1)
255 return host.lower()
256
257class Request:
258
259 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800260 origin_req_host=None, unverifiable=False,
261 method=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000262 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Senthil Kumaran45ce4dc2012-07-08 02:08:48 -0700263 self.full_url = unwrap(url)
Senthil Kumaran26430412011-04-13 07:01:19 +0800264 self.full_url, self.fragment = splittag(self.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000265 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200266 self.unredirected_hdrs = {}
267 self._data = None
268 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000269 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000270 for key, value in headers.items():
271 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000272 if origin_req_host is None:
273 origin_req_host = request_host(self)
274 self.origin_req_host = origin_req_host
275 self.unverifiable = unverifiable
Senthil Kumarande49d642011-10-16 23:54:44 +0800276 self.method = method
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000277 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000278
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200279 @property
280 def data(self):
281 return self._data
282
283 @data.setter
284 def data(self, data):
285 if data != self._data:
286 self._data = data
287 # issue 16464
288 # if we change data we need to remove content-length header
289 # (cause it's most probably calculated for previous value)
290 if self.has_header("Content-length"):
291 self.remove_header("Content-length")
292
293 @data.deleter
294 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400295 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200296
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000297 def _parse(self):
298 self.type, rest = splittype(self.full_url)
299 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400300 raise ValueError("unknown url type: %r" % self.full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000301 self.host, self.selector = splithost(rest)
302 if self.host:
303 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000304
305 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800306 """Return a string indicating the HTTP request method."""
307 if self.method is not None:
308 return self.method
309 elif self.data is not None:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000310 return "POST"
311 else:
312 return "GET"
313
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000314 def get_full_url(self):
Senthil Kumaran26430412011-04-13 07:01:19 +0800315 if self.fragment:
316 return '%s#%s' % (self.full_url, self.fragment)
317 else:
318 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000319
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000320 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000321 if self.type == 'https' and not self._tunnel_host:
322 self._tunnel_host = self.host
323 else:
324 self.type= type
325 self.selector = self.full_url
326 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000327
328 def has_proxy(self):
329 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330
331 def add_header(self, key, val):
332 # useful for something like authentication
333 self.headers[key.capitalize()] = val
334
335 def add_unredirected_header(self, key, val):
336 # will not be added to a redirected request
337 self.unredirected_hdrs[key.capitalize()] = val
338
339 def has_header(self, header_name):
340 return (header_name in self.headers or
341 header_name in self.unredirected_hdrs)
342
343 def get_header(self, header_name, default=None):
344 return self.headers.get(
345 header_name,
346 self.unredirected_hdrs.get(header_name, default))
347
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200348 def remove_header(self, header_name):
349 self.headers.pop(header_name, None)
350 self.unredirected_hdrs.pop(header_name, None)
351
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000352 def header_items(self):
353 hdrs = self.unredirected_hdrs.copy()
354 hdrs.update(self.headers)
355 return list(hdrs.items())
356
357class OpenerDirector:
358 def __init__(self):
359 client_version = "Python-urllib/%s" % __version__
360 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000361 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000362 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000363 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000364 self.handle_open = {}
365 self.handle_error = {}
366 self.process_response = {}
367 self.process_request = {}
368
369 def add_handler(self, handler):
370 if not hasattr(handler, "add_parent"):
371 raise TypeError("expected BaseHandler instance, got %r" %
372 type(handler))
373
374 added = False
375 for meth in dir(handler):
376 if meth in ["redirect_request", "do_open", "proxy_open"]:
377 # oops, coincidental match
378 continue
379
380 i = meth.find("_")
381 protocol = meth[:i]
382 condition = meth[i+1:]
383
384 if condition.startswith("error"):
385 j = condition.find("_") + i + 1
386 kind = meth[j+1:]
387 try:
388 kind = int(kind)
389 except ValueError:
390 pass
391 lookup = self.handle_error.get(protocol, {})
392 self.handle_error[protocol] = lookup
393 elif condition == "open":
394 kind = protocol
395 lookup = self.handle_open
396 elif condition == "response":
397 kind = protocol
398 lookup = self.process_response
399 elif condition == "request":
400 kind = protocol
401 lookup = self.process_request
402 else:
403 continue
404
405 handlers = lookup.setdefault(kind, [])
406 if handlers:
407 bisect.insort(handlers, handler)
408 else:
409 handlers.append(handler)
410 added = True
411
412 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000413 bisect.insort(self.handlers, handler)
414 handler.add_parent(self)
415
416 def close(self):
417 # Only exists for backwards compatibility.
418 pass
419
420 def _call_chain(self, chain, kind, meth_name, *args):
421 # Handlers raise an exception if no one else should try to handle
422 # the request, or return None if they can't but another handler
423 # could. Otherwise, they return the response.
424 handlers = chain.get(kind, ())
425 for handler in handlers:
426 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000427 result = func(*args)
428 if result is not None:
429 return result
430
431 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
432 # accept a URL or a Request object
433 if isinstance(fullurl, str):
434 req = Request(fullurl, data)
435 else:
436 req = fullurl
437 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000438 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000439
440 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000441 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000442
443 # pre-process request
444 meth_name = protocol+"_request"
445 for processor in self.process_request.get(protocol, []):
446 meth = getattr(processor, meth_name)
447 req = meth(req)
448
449 response = self._open(req, data)
450
451 # post-process response
452 meth_name = protocol+"_response"
453 for processor in self.process_response.get(protocol, []):
454 meth = getattr(processor, meth_name)
455 response = meth(req, response)
456
457 return response
458
459 def _open(self, req, data=None):
460 result = self._call_chain(self.handle_open, 'default',
461 'default_open', req)
462 if result:
463 return result
464
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000465 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000466 result = self._call_chain(self.handle_open, protocol, protocol +
467 '_open', req)
468 if result:
469 return result
470
471 return self._call_chain(self.handle_open, 'unknown',
472 'unknown_open', req)
473
474 def error(self, proto, *args):
475 if proto in ('http', 'https'):
476 # XXX http[s] protocols are special-cased
477 dict = self.handle_error['http'] # https is not different than http
478 proto = args[2] # YUCK!
479 meth_name = 'http_error_%s' % proto
480 http_err = 1
481 orig_args = args
482 else:
483 dict = self.handle_error
484 meth_name = proto + '_error'
485 http_err = 0
486 args = (dict, proto, meth_name) + args
487 result = self._call_chain(*args)
488 if result:
489 return result
490
491 if http_err:
492 args = (dict, 'default', 'http_error_default') + orig_args
493 return self._call_chain(*args)
494
495# XXX probably also want an abstract factory that knows when it makes
496# sense to skip a superclass in favor of a subclass and when it might
497# make sense to include both
498
499def build_opener(*handlers):
500 """Create an opener object from a list of handlers.
501
502 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000503 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000504
505 If any of the handlers passed as arguments are subclasses of the
506 default handlers, the default handlers will not be used.
507 """
508 def isclass(obj):
509 return isinstance(obj, type) or hasattr(obj, "__bases__")
510
511 opener = OpenerDirector()
512 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
513 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100514 FTPHandler, FileHandler, HTTPErrorProcessor,
515 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000516 if hasattr(http.client, "HTTPSConnection"):
517 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000518 skip = set()
519 for klass in default_classes:
520 for check in handlers:
521 if isclass(check):
522 if issubclass(check, klass):
523 skip.add(klass)
524 elif isinstance(check, klass):
525 skip.add(klass)
526 for klass in skip:
527 default_classes.remove(klass)
528
529 for klass in default_classes:
530 opener.add_handler(klass())
531
532 for h in handlers:
533 if isclass(h):
534 h = h()
535 opener.add_handler(h)
536 return opener
537
538class BaseHandler:
539 handler_order = 500
540
541 def add_parent(self, parent):
542 self.parent = parent
543
544 def close(self):
545 # Only exists for backwards compatibility
546 pass
547
548 def __lt__(self, other):
549 if not hasattr(other, "handler_order"):
550 # Try to preserve the old behavior of having custom classes
551 # inserted after default ones (works only for custom user
552 # classes which are not aware of handler_order).
553 return True
554 return self.handler_order < other.handler_order
555
556
557class HTTPErrorProcessor(BaseHandler):
558 """Process HTTP error responses."""
559 handler_order = 1000 # after all other processing
560
561 def http_response(self, request, response):
562 code, msg, hdrs = response.code, response.msg, response.info()
563
564 # According to RFC 2616, "2xx" code indicates that the client's
565 # request was successfully received, understood, and accepted.
566 if not (200 <= code < 300):
567 response = self.parent.error(
568 'http', request, response, code, msg, hdrs)
569
570 return response
571
572 https_response = http_response
573
574class HTTPDefaultErrorHandler(BaseHandler):
575 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000576 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000577
578class HTTPRedirectHandler(BaseHandler):
579 # maximum number of redirections to any single URL
580 # this is needed because of the state that cookies introduce
581 max_repeats = 4
582 # maximum total number of redirections (regardless of URL) before
583 # assuming we're in a loop
584 max_redirections = 10
585
586 def redirect_request(self, req, fp, code, msg, headers, newurl):
587 """Return a Request or None in response to a redirect.
588
589 This is called by the http_error_30x methods when a
590 redirection response is received. If a redirection should
591 take place, return a new Request to allow http_error_30x to
592 perform the redirect. Otherwise, raise HTTPError if no-one
593 else should try to handle this url. Return None if you can't
594 but another Handler might.
595 """
596 m = req.get_method()
597 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
598 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000599 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600
601 # Strictly (according to RFC 2616), 301 or 302 in response to
602 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000603 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000604 # essentially all clients do redirect in this case, so we do
605 # the same.
606 # be conciliant with URIs containing a space
607 newurl = newurl.replace(' ', '%20')
608 CONTENT_HEADERS = ("content-length", "content-type")
609 newheaders = dict((k, v) for k, v in req.headers.items()
610 if k.lower() not in CONTENT_HEADERS)
611 return Request(newurl,
612 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000613 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000614 unverifiable=True)
615
616 # Implementation note: To avoid the server sending us into an
617 # infinite loop, the request object needs to track what URLs we
618 # have already seen. Do this by adding a handler-specific
619 # attribute to the Request object.
620 def http_error_302(self, req, fp, code, msg, headers):
621 # Some servers (incorrectly) return multiple Location headers
622 # (so probably same goes for URI). Use first header.
623 if "location" in headers:
624 newurl = headers["location"]
625 elif "uri" in headers:
626 newurl = headers["uri"]
627 else:
628 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000629
630 # fix a possible malformed URL
631 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700632
633 # For security reasons we don't allow redirection to anything other
634 # than http, https or ftp.
635
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800636 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800637 raise HTTPError(
638 newurl, code,
639 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
640 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700641
Facundo Batistaf24802c2008-08-17 03:36:03 +0000642 if not urlparts.path:
643 urlparts = list(urlparts)
644 urlparts[2] = "/"
645 newurl = urlunparse(urlparts)
646
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000647 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000648
649 # XXX Probably want to forget about the state of the current
650 # request, although that might interact poorly with other
651 # handlers that also use handler-specific request attributes
652 new = self.redirect_request(req, fp, code, msg, headers, newurl)
653 if new is None:
654 return
655
656 # loop detection
657 # .redirect_dict has a key url if url was previously visited.
658 if hasattr(req, 'redirect_dict'):
659 visited = new.redirect_dict = req.redirect_dict
660 if (visited.get(newurl, 0) >= self.max_repeats or
661 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000662 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000663 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000664 else:
665 visited = new.redirect_dict = req.redirect_dict = {}
666 visited[newurl] = visited.get(newurl, 0) + 1
667
668 # Don't close the fp until we are sure that we won't use it
669 # with HTTPError.
670 fp.read()
671 fp.close()
672
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000673 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000674
675 http_error_301 = http_error_303 = http_error_307 = http_error_302
676
677 inf_msg = "The HTTP server returned a redirect error that would " \
678 "lead to an infinite loop.\n" \
679 "The last 30x error message was:\n"
680
681
682def _parse_proxy(proxy):
683 """Return (scheme, user, password, host/port) given a URL or an authority.
684
685 If a URL is supplied, it must have an authority (host:port) component.
686 According to RFC 3986, having an authority component means the URL must
687 have two slashes after the scheme:
688
689 >>> _parse_proxy('file:/ftp.example.com/')
690 Traceback (most recent call last):
691 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
692
693 The first three items of the returned tuple may be None.
694
695 Examples of authority parsing:
696
697 >>> _parse_proxy('proxy.example.com')
698 (None, None, None, 'proxy.example.com')
699 >>> _parse_proxy('proxy.example.com:3128')
700 (None, None, None, 'proxy.example.com:3128')
701
702 The authority component may optionally include userinfo (assumed to be
703 username:password):
704
705 >>> _parse_proxy('joe:password@proxy.example.com')
706 (None, 'joe', 'password', 'proxy.example.com')
707 >>> _parse_proxy('joe:password@proxy.example.com:3128')
708 (None, 'joe', 'password', 'proxy.example.com:3128')
709
710 Same examples, but with URLs instead:
711
712 >>> _parse_proxy('http://proxy.example.com/')
713 ('http', None, None, 'proxy.example.com')
714 >>> _parse_proxy('http://proxy.example.com:3128/')
715 ('http', None, None, 'proxy.example.com:3128')
716 >>> _parse_proxy('http://joe:password@proxy.example.com/')
717 ('http', 'joe', 'password', 'proxy.example.com')
718 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
719 ('http', 'joe', 'password', 'proxy.example.com:3128')
720
721 Everything after the authority is ignored:
722
723 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
724 ('ftp', 'joe', 'password', 'proxy.example.com')
725
726 Test for no trailing '/' case:
727
728 >>> _parse_proxy('http://joe:password@proxy.example.com')
729 ('http', 'joe', 'password', 'proxy.example.com')
730
731 """
Georg Brandl13e89462008-07-01 19:56:00 +0000732 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000733 if not r_scheme.startswith("/"):
734 # authority
735 scheme = None
736 authority = proxy
737 else:
738 # URL
739 if not r_scheme.startswith("//"):
740 raise ValueError("proxy URL with no authority: %r" % proxy)
741 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
742 # and 3.3.), path is empty or starts with '/'
743 end = r_scheme.find("/", 2)
744 if end == -1:
745 end = None
746 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000747 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000748 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000749 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750 else:
751 user = password = None
752 return scheme, user, password, hostport
753
754class ProxyHandler(BaseHandler):
755 # Proxies must be in front
756 handler_order = 100
757
758 def __init__(self, proxies=None):
759 if proxies is None:
760 proxies = getproxies()
761 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
762 self.proxies = proxies
763 for type, url in proxies.items():
764 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200765 lambda r, proxy=url, type=type, meth=self.proxy_open:
766 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000767
768 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000769 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000770 proxy_type, user, password, hostport = _parse_proxy(proxy)
771 if proxy_type is None:
772 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000773
774 if req.host and proxy_bypass(req.host):
775 return None
776
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000777 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000778 user_pass = '%s:%s' % (unquote(user),
779 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000780 creds = base64.b64encode(user_pass.encode()).decode("ascii")
781 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000782 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000783 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000784 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000785 # let other handlers take care of it
786 return None
787 else:
788 # need to start over, because the other handlers don't
789 # grok the proxy's URL type
790 # e.g. if we have a constructor arg proxies like so:
791 # {'http': 'ftp://proxy.example.com'}, we may end up turning
792 # a request for http://acme.example.com/a into one for
793 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000794 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000795
796class HTTPPasswordMgr:
797
798 def __init__(self):
799 self.passwd = {}
800
801 def add_password(self, realm, uri, user, passwd):
802 # uri could be a single URI or a sequence
803 if isinstance(uri, str):
804 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800805 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000806 self.passwd[realm] = {}
807 for default_port in True, False:
808 reduced_uri = tuple(
809 [self.reduce_uri(u, default_port) for u in uri])
810 self.passwd[realm][reduced_uri] = (user, passwd)
811
812 def find_user_password(self, realm, authuri):
813 domains = self.passwd.get(realm, {})
814 for default_port in True, False:
815 reduced_authuri = self.reduce_uri(authuri, default_port)
816 for uris, authinfo in domains.items():
817 for uri in uris:
818 if self.is_suburi(uri, reduced_authuri):
819 return authinfo
820 return None, None
821
822 def reduce_uri(self, uri, default_port=True):
823 """Accept authority or URI and extract only the authority and path."""
824 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000825 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000826 if parts[1]:
827 # URI
828 scheme = parts[0]
829 authority = parts[1]
830 path = parts[2] or '/'
831 else:
832 # host or host:port
833 scheme = None
834 authority = uri
835 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000836 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000837 if default_port and port is None and scheme is not None:
838 dport = {"http": 80,
839 "https": 443,
840 }.get(scheme)
841 if dport is not None:
842 authority = "%s:%d" % (host, dport)
843 return authority, path
844
845 def is_suburi(self, base, test):
846 """Check if test is below base in a URI tree
847
848 Both args must be URIs in reduced form.
849 """
850 if base == test:
851 return True
852 if base[0] != test[0]:
853 return False
854 common = posixpath.commonprefix((base[1], test[1]))
855 if len(common) == len(base[1]):
856 return True
857 return False
858
859
860class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
861
862 def find_user_password(self, realm, authuri):
863 user, password = HTTPPasswordMgr.find_user_password(self, realm,
864 authuri)
865 if user is not None:
866 return user, password
867 return HTTPPasswordMgr.find_user_password(self, None, authuri)
868
869
870class AbstractBasicAuthHandler:
871
872 # XXX this allows for multiple auth-schemes, but will stupidly pick
873 # the last one with a realm specified.
874
875 # allow for double- and single-quoted realm values
876 # (single quotes are a violation of the RFC, but appear in the wild)
877 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800878 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000879
880 # XXX could pre-emptively send auth info already accepted (RFC 2617,
881 # end of section 2, and section 1.2 immediately after "credentials"
882 # production).
883
884 def __init__(self, password_mgr=None):
885 if password_mgr is None:
886 password_mgr = HTTPPasswordMgr()
887 self.passwd = password_mgr
888 self.add_password = self.passwd.add_password
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000889 self.retried = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000890
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000891 def reset_retry_count(self):
892 self.retried = 0
893
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000894 def http_error_auth_reqed(self, authreq, host, req, headers):
895 # host may be an authority (without userinfo) or a URL with an
896 # authority
897 # XXX could be multiple headers
898 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000899
900 if self.retried > 5:
901 # retry sending the username:password 5 times before failing.
902 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
903 headers, None)
904 else:
905 self.retried += 1
906
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000907 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800908 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800909 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800910 raise ValueError("AbstractBasicAuthHandler does not"
911 " support the following scheme: '%s'" %
912 scheme)
913 else:
914 mo = AbstractBasicAuthHandler.rx.search(authreq)
915 if mo:
916 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800917 if quote not in ['"',"'"]:
918 warnings.warn("Basic Auth Realm was unquoted",
919 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800920 if scheme.lower() == 'basic':
921 response = self.retry_http_basic_auth(host, req, realm)
922 if response and response.code != 401:
923 self.retried = 0
924 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000925
926 def retry_http_basic_auth(self, host, req, realm):
927 user, pw = self.passwd.find_user_password(realm, host)
928 if pw is not None:
929 raw = "%s:%s" % (user, pw)
930 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
931 if req.headers.get(self.auth_header, None) == auth:
932 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000933 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000934 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000935 else:
936 return None
937
938
939class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
940
941 auth_header = 'Authorization'
942
943 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000944 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000945 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000947 self.reset_retry_count()
948 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000949
950
951class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
952
953 auth_header = 'Proxy-authorization'
954
955 def http_error_407(self, req, fp, code, msg, headers):
956 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +0000957 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000958 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
959 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000960 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000961 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000962 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000963 self.reset_retry_count()
964 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965
966
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800967# Return n random bytes.
968_randombytes = os.urandom
969
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000970
971class AbstractDigestAuthHandler:
972 # Digest authentication is specified in RFC 2617.
973
974 # XXX The client does not inspect the Authentication-Info header
975 # in a successful response.
976
977 # XXX It should be possible to test this implementation against
978 # a mock server that just generates a static set of challenges.
979
980 # XXX qop="auth-int" supports is shaky
981
982 def __init__(self, passwd=None):
983 if passwd is None:
984 passwd = HTTPPasswordMgr()
985 self.passwd = passwd
986 self.add_password = self.passwd.add_password
987 self.retried = 0
988 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +0000989 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000990
991 def reset_retry_count(self):
992 self.retried = 0
993
994 def http_error_auth_reqed(self, auth_header, host, req, headers):
995 authreq = headers.get(auth_header, None)
996 if self.retried > 5:
997 # Don't fail endlessly - if we failed once, we'll probably
998 # fail a second time. Hm. Unless the Password Manager is
999 # prompting for the information. Crap. This isn't great
1000 # but it's better than the current 'repeat until recursion
1001 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001002 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001003 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001004 else:
1005 self.retried += 1
1006 if authreq:
1007 scheme = authreq.split()[0]
1008 if scheme.lower() == 'digest':
1009 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001010 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001011 raise ValueError("AbstractDigestAuthHandler does not support"
1012 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001013
1014 def retry_http_digest_auth(self, req, auth):
1015 token, challenge = auth.split(' ', 1)
1016 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1017 auth = self.get_authorization(req, chal)
1018 if auth:
1019 auth_val = 'Digest %s' % auth
1020 if req.headers.get(self.auth_header, None) == auth_val:
1021 return None
1022 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001023 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001024 return resp
1025
1026 def get_cnonce(self, nonce):
1027 # The cnonce-value is an opaque
1028 # quoted string value provided by the client and used by both client
1029 # and server to avoid chosen plaintext attacks, to provide mutual
1030 # authentication, and to provide some message integrity protection.
1031 # This isn't a fabulous effort, but it's probably Good Enough.
1032 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001033 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001034 dig = hashlib.sha1(b).hexdigest()
1035 return dig[:16]
1036
1037 def get_authorization(self, req, chal):
1038 try:
1039 realm = chal['realm']
1040 nonce = chal['nonce']
1041 qop = chal.get('qop')
1042 algorithm = chal.get('algorithm', 'MD5')
1043 # mod_digest doesn't send an opaque, even though it isn't
1044 # supposed to be optional
1045 opaque = chal.get('opaque', None)
1046 except KeyError:
1047 return None
1048
1049 H, KD = self.get_algorithm_impls(algorithm)
1050 if H is None:
1051 return None
1052
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001053 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001054 if user is None:
1055 return None
1056
1057 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001058 if req.data is not None:
1059 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001060 else:
1061 entdig = None
1062
1063 A1 = "%s:%s:%s" % (user, realm, pw)
1064 A2 = "%s:%s" % (req.get_method(),
1065 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001066 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001067 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001068 if nonce == self.last_nonce:
1069 self.nonce_count += 1
1070 else:
1071 self.nonce_count = 1
1072 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001073 ncvalue = '%08x' % self.nonce_count
1074 cnonce = self.get_cnonce(nonce)
1075 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1076 respdig = KD(H(A1), noncebit)
1077 elif qop is None:
1078 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1079 else:
1080 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001081 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001082
1083 # XXX should the partial digests be encoded too?
1084
1085 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001086 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001087 respdig)
1088 if opaque:
1089 base += ', opaque="%s"' % opaque
1090 if entdig:
1091 base += ', digest="%s"' % entdig
1092 base += ', algorithm="%s"' % algorithm
1093 if qop:
1094 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1095 return base
1096
1097 def get_algorithm_impls(self, algorithm):
1098 # lambdas assume digest modules are imported at the top level
1099 if algorithm == 'MD5':
1100 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1101 elif algorithm == 'SHA':
1102 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1103 # XXX MD5-sess
1104 KD = lambda s, d: H("%s:%s" % (s, d))
1105 return H, KD
1106
1107 def get_entity_digest(self, data, chal):
1108 # XXX not implemented yet
1109 return None
1110
1111
1112class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1113 """An authentication protocol defined by RFC 2069
1114
1115 Digest authentication improves on basic authentication because it
1116 does not transmit passwords in the clear.
1117 """
1118
1119 auth_header = 'Authorization'
1120 handler_order = 490 # before Basic auth
1121
1122 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001123 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001124 retry = self.http_error_auth_reqed('www-authenticate',
1125 host, req, headers)
1126 self.reset_retry_count()
1127 return retry
1128
1129
1130class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1131
1132 auth_header = 'Proxy-Authorization'
1133 handler_order = 490 # before Basic auth
1134
1135 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001136 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001137 retry = self.http_error_auth_reqed('proxy-authenticate',
1138 host, req, headers)
1139 self.reset_retry_count()
1140 return retry
1141
1142class AbstractHTTPHandler(BaseHandler):
1143
1144 def __init__(self, debuglevel=0):
1145 self._debuglevel = debuglevel
1146
1147 def set_http_debuglevel(self, level):
1148 self._debuglevel = level
1149
1150 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001151 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001152 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001153 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001154
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001155 if request.data is not None: # POST
1156 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001157 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001158 msg = "POST data should be bytes or an iterable of bytes. " \
1159 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001160 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001161 if not request.has_header('Content-type'):
1162 request.add_unredirected_header(
1163 'Content-type',
1164 'application/x-www-form-urlencoded')
1165 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001166 try:
1167 mv = memoryview(data)
1168 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001169 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001170 raise ValueError("Content-Length should be specified "
1171 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001172 data))
1173 else:
1174 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001175 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001176
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001177 sel_host = host
1178 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001179 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001180 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001182 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001183 for name, value in self.parent.addheaders:
1184 name = name.capitalize()
1185 if not request.has_header(name):
1186 request.add_unredirected_header(name, value)
1187
1188 return request
1189
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001190 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001191 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001192
1193 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001194 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001195 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001196 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001197 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001198
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001199 # will parse host:port
1200 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001201
1202 headers = dict(req.unredirected_hdrs)
1203 headers.update(dict((k, v) for k, v in req.headers.items()
1204 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001205
1206 # TODO(jhylton): Should this be redesigned to handle
1207 # persistent connections?
1208
1209 # We want to make an HTTP/1.1 request, but the addinfourl
1210 # class isn't prepared to deal with a persistent connection.
1211 # It will try to read all remaining data from the socket,
1212 # which will block while the server waits for the next request.
1213 # So make sure the connection gets closed after the (only)
1214 # request.
1215 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001216 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001217
1218 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001219 tunnel_headers = {}
1220 proxy_auth_hdr = "Proxy-Authorization"
1221 if proxy_auth_hdr in headers:
1222 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1223 # Proxy-Authorization should not be sent to origin
1224 # server.
1225 del headers[proxy_auth_hdr]
1226 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001227
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001228 try:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001229 h.request(req.get_method(), req.selector, req.data, headers)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001230 except OSError as err: # timeout error
Senthil Kumaran45686b42011-07-27 09:31:03 +08001231 h.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001232 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001233 else:
1234 r = h.getresponse()
Nadeem Vawdabd26b542012-10-21 17:37:43 +02001235 # If the server does not send us a 'Connection: close' header,
1236 # HTTPConnection assumes the socket should be left open. Manually
1237 # mark the socket to be closed when this response object goes away.
1238 if h.sock:
1239 h.sock.close()
1240 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001241
Senthil Kumaran26430412011-04-13 07:01:19 +08001242 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001243 # This line replaces the .msg attribute of the HTTPResponse
1244 # with .headers, because urllib clients expect the response to
1245 # have the reason in .msg. It would be good to mark this
1246 # attribute is deprecated and get then to use info() or
1247 # .headers.
1248 r.msg = r.reason
1249 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001250
1251
1252class HTTPHandler(AbstractHTTPHandler):
1253
1254 def http_open(self, req):
1255 return self.do_open(http.client.HTTPConnection, req)
1256
1257 http_request = AbstractHTTPHandler.do_request_
1258
1259if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001260
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001261 class HTTPSHandler(AbstractHTTPHandler):
1262
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001263 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1264 AbstractHTTPHandler.__init__(self, debuglevel)
1265 self._context = context
1266 self._check_hostname = check_hostname
1267
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001268 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001269 return self.do_open(http.client.HTTPSConnection, req,
1270 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001271
1272 https_request = AbstractHTTPHandler.do_request_
1273
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001274 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001275
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001276class HTTPCookieProcessor(BaseHandler):
1277 def __init__(self, cookiejar=None):
1278 import http.cookiejar
1279 if cookiejar is None:
1280 cookiejar = http.cookiejar.CookieJar()
1281 self.cookiejar = cookiejar
1282
1283 def http_request(self, request):
1284 self.cookiejar.add_cookie_header(request)
1285 return request
1286
1287 def http_response(self, request, response):
1288 self.cookiejar.extract_cookies(response, request)
1289 return response
1290
1291 https_request = http_request
1292 https_response = http_response
1293
1294class UnknownHandler(BaseHandler):
1295 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001296 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001297 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001298
1299def parse_keqv_list(l):
1300 """Parse list of key=value strings where keys are not duplicated."""
1301 parsed = {}
1302 for elt in l:
1303 k, v = elt.split('=', 1)
1304 if v[0] == '"' and v[-1] == '"':
1305 v = v[1:-1]
1306 parsed[k] = v
1307 return parsed
1308
1309def parse_http_list(s):
1310 """Parse lists as described by RFC 2068 Section 2.
1311
1312 In particular, parse comma-separated lists where the elements of
1313 the list may include quoted-strings. A quoted-string could
1314 contain a comma. A non-quoted string could have quotes in the
1315 middle. Neither commas nor quotes count if they are escaped.
1316 Only double-quotes count, not single-quotes.
1317 """
1318 res = []
1319 part = ''
1320
1321 escape = quote = False
1322 for cur in s:
1323 if escape:
1324 part += cur
1325 escape = False
1326 continue
1327 if quote:
1328 if cur == '\\':
1329 escape = True
1330 continue
1331 elif cur == '"':
1332 quote = False
1333 part += cur
1334 continue
1335
1336 if cur == ',':
1337 res.append(part)
1338 part = ''
1339 continue
1340
1341 if cur == '"':
1342 quote = True
1343
1344 part += cur
1345
1346 # append last part
1347 if part:
1348 res.append(part)
1349
1350 return [part.strip() for part in res]
1351
1352class FileHandler(BaseHandler):
1353 # Use local file or FTP depending on form of URL
1354 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001355 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001356 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1357 req.host != 'localhost'):
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001358 if not req.host is self.get_names():
1359 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001360 else:
1361 return self.open_local_file(req)
1362
1363 # names for the localhost
1364 names = None
1365 def get_names(self):
1366 if FileHandler.names is None:
1367 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001368 FileHandler.names = tuple(
1369 socket.gethostbyname_ex('localhost')[2] +
1370 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001371 except socket.gaierror:
1372 FileHandler.names = (socket.gethostbyname('localhost'),)
1373 return FileHandler.names
1374
1375 # not entirely sure what the rules are here
1376 def open_local_file(self, req):
1377 import email.utils
1378 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001379 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001380 filename = req.selector
1381 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001382 try:
1383 stats = os.stat(localfile)
1384 size = stats.st_size
1385 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001386 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001387 headers = email.message_from_string(
1388 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1389 (mtype or 'text/plain', size, modified))
1390 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001391 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001392 if not host or \
1393 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001394 if host:
1395 origurl = 'file://' + host + filename
1396 else:
1397 origurl = 'file://' + filename
1398 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001399 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001400 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001401 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001402 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001403
1404def _safe_gethostbyname(host):
1405 try:
1406 return socket.gethostbyname(host)
1407 except socket.gaierror:
1408 return None
1409
1410class FTPHandler(BaseHandler):
1411 def ftp_open(self, req):
1412 import ftplib
1413 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001414 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001415 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001416 raise URLError('ftp error: no host given')
1417 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001418 if port is None:
1419 port = ftplib.FTP_PORT
1420 else:
1421 port = int(port)
1422
1423 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001424 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001425 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001426 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001427 else:
1428 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001429 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001430 user = user or ''
1431 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001432
1433 try:
1434 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001435 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001436 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001437 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001438 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001439 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001440 dirs, file = dirs[:-1], dirs[-1]
1441 if dirs and not dirs[0]:
1442 dirs = dirs[1:]
1443 try:
1444 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1445 type = file and 'I' or 'D'
1446 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001447 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001448 if attr.lower() == 'type' and \
1449 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1450 type = value.upper()
1451 fp, retrlen = fw.retrfile(file, type)
1452 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001453 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001454 if mtype:
1455 headers += "Content-type: %s\n" % mtype
1456 if retrlen is not None and retrlen >= 0:
1457 headers += "Content-length: %d\n" % retrlen
1458 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001459 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001460 except ftplib.all_errors as exp:
1461 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001462 raise exc.with_traceback(sys.exc_info()[2])
1463
1464 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001465 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1466 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001467
1468class CacheFTPHandler(FTPHandler):
1469 # XXX would be nice to have pluggable cache strategies
1470 # XXX this stuff is definitely not thread safe
1471 def __init__(self):
1472 self.cache = {}
1473 self.timeout = {}
1474 self.soonest = 0
1475 self.delay = 60
1476 self.max_conns = 16
1477
1478 def setTimeout(self, t):
1479 self.delay = t
1480
1481 def setMaxConns(self, m):
1482 self.max_conns = m
1483
1484 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1485 key = user, host, port, '/'.join(dirs), timeout
1486 if key in self.cache:
1487 self.timeout[key] = time.time() + self.delay
1488 else:
1489 self.cache[key] = ftpwrapper(user, passwd, host, port,
1490 dirs, timeout)
1491 self.timeout[key] = time.time() + self.delay
1492 self.check_cache()
1493 return self.cache[key]
1494
1495 def check_cache(self):
1496 # first check for old ones
1497 t = time.time()
1498 if self.soonest <= t:
1499 for k, v in list(self.timeout.items()):
1500 if v < t:
1501 self.cache[k].close()
1502 del self.cache[k]
1503 del self.timeout[k]
1504 self.soonest = min(list(self.timeout.values()))
1505
1506 # then check the size
1507 if len(self.cache) == self.max_conns:
1508 for k, v in list(self.timeout.items()):
1509 if v == self.soonest:
1510 del self.cache[k]
1511 del self.timeout[k]
1512 break
1513 self.soonest = min(list(self.timeout.values()))
1514
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001515 def clear_cache(self):
1516 for conn in self.cache.values():
1517 conn.close()
1518 self.cache.clear()
1519 self.timeout.clear()
1520
Antoine Pitroudf204be2012-11-24 17:59:08 +01001521class DataHandler(BaseHandler):
1522 def data_open(self, req):
1523 # data URLs as specified in RFC 2397.
1524 #
1525 # ignores POSTed data
1526 #
1527 # syntax:
1528 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1529 # mediatype := [ type "/" subtype ] *( ";" parameter )
1530 # data := *urlchar
1531 # parameter := attribute "=" value
1532 url = req.full_url
1533
1534 scheme, data = url.split(":",1)
1535 mediatype, data = data.split(",",1)
1536
1537 # even base64 encoded data URLs might be quoted so unquote in any case:
1538 data = unquote_to_bytes(data)
1539 if mediatype.endswith(";base64"):
1540 data = base64.decodebytes(data)
1541 mediatype = mediatype[:-7]
1542
1543 if not mediatype:
1544 mediatype = "text/plain;charset=US-ASCII"
1545
1546 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1547 (mediatype, len(data)))
1548
1549 return addinfourl(io.BytesIO(data), headers, url)
1550
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001551
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001552# Code move from the old urllib module
1553
1554MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1555
1556# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001557if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001558 from nturl2path import url2pathname, pathname2url
1559else:
1560 def url2pathname(pathname):
1561 """OS-specific conversion from a relative URL of the 'file' scheme
1562 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001563 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001564
1565 def pathname2url(pathname):
1566 """OS-specific conversion from a file system path to a relative URL
1567 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001568 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001569
1570# This really consists of two pieces:
1571# (1) a class which handles opening of all sorts of URLs
1572# (plus assorted utilities etc.)
1573# (2) a set of functions for parsing URLs
1574# XXX Should these be separated out into different modules?
1575
1576
1577ftpcache = {}
1578class URLopener:
1579 """Class to open URLs.
1580 This is a class rather than just a subroutine because we may need
1581 more than one set of global protocol-specific options.
1582 Note -- this is a base class for those who don't want the
1583 automatic handling of errors type 302 (relocated) and 401
1584 (authorization needed)."""
1585
1586 __tempfiles = None
1587
1588 version = "Python-urllib/%s" % __version__
1589
1590 # Constructor
1591 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001592 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001593 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1594 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001595 if proxies is None:
1596 proxies = getproxies()
1597 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1598 self.proxies = proxies
1599 self.key_file = x509.get('key_file')
1600 self.cert_file = x509.get('cert_file')
1601 self.addheaders = [('User-Agent', self.version)]
1602 self.__tempfiles = []
1603 self.__unlink = os.unlink # See cleanup()
1604 self.tempcache = None
1605 # Undocumented feature: if you assign {} to tempcache,
1606 # it is used to cache files retrieved with
1607 # self.retrieve(). This is not enabled by default
1608 # since it does not work for changing documents (and I
1609 # haven't got the logic to check expiration headers
1610 # yet).
1611 self.ftpcache = ftpcache
1612 # Undocumented feature: you can use a different
1613 # ftp cache by assigning to the .ftpcache member;
1614 # in case you want logically independent URL openers
1615 # XXX This is not threadsafe. Bah.
1616
1617 def __del__(self):
1618 self.close()
1619
1620 def close(self):
1621 self.cleanup()
1622
1623 def cleanup(self):
1624 # This code sometimes runs when the rest of this module
1625 # has already been deleted, so it can't use any globals
1626 # or import anything.
1627 if self.__tempfiles:
1628 for file in self.__tempfiles:
1629 try:
1630 self.__unlink(file)
1631 except OSError:
1632 pass
1633 del self.__tempfiles[:]
1634 if self.tempcache:
1635 self.tempcache.clear()
1636
1637 def addheader(self, *args):
1638 """Add a header to be used by the HTTP interface only
1639 e.g. u.addheader('Accept', 'sound/basic')"""
1640 self.addheaders.append(args)
1641
1642 # External interface
1643 def open(self, fullurl, data=None):
1644 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001645 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001646 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001647 if self.tempcache and fullurl in self.tempcache:
1648 filename, headers = self.tempcache[fullurl]
1649 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001650 return addinfourl(fp, headers, fullurl)
1651 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001652 if not urltype:
1653 urltype = 'file'
1654 if urltype in self.proxies:
1655 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001656 urltype, proxyhost = splittype(proxy)
1657 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001658 url = (host, fullurl) # Signal special case to open_*()
1659 else:
1660 proxy = None
1661 name = 'open_' + urltype
1662 self.type = urltype
1663 name = name.replace('-', '_')
1664 if not hasattr(self, name):
1665 if proxy:
1666 return self.open_unknown_proxy(proxy, fullurl, data)
1667 else:
1668 return self.open_unknown(fullurl, data)
1669 try:
1670 if data is None:
1671 return getattr(self, name)(url)
1672 else:
1673 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001674 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001675 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001676 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001677 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001678
1679 def open_unknown(self, fullurl, data=None):
1680 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001681 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001682 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001683
1684 def open_unknown_proxy(self, proxy, fullurl, data=None):
1685 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001686 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001687 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001688
1689 # External interface
1690 def retrieve(self, url, filename=None, reporthook=None, data=None):
1691 """retrieve(url) returns (filename, headers) for a local object
1692 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001693 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001694 if self.tempcache and url in self.tempcache:
1695 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001696 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001697 if filename is None and (not type or type == 'file'):
1698 try:
1699 fp = self.open_local_file(url1)
1700 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001701 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001702 return url2pathname(splithost(url1)[1]), hdrs
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001703 except OSError as msg:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001704 pass
1705 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001706 try:
1707 headers = fp.info()
1708 if filename:
1709 tfp = open(filename, 'wb')
1710 else:
1711 import tempfile
1712 garbage, path = splittype(url)
1713 garbage, path = splithost(path or "")
1714 path, garbage = splitquery(path or "")
1715 path, garbage = splitattr(path or "")
1716 suffix = os.path.splitext(path)[1]
1717 (fd, filename) = tempfile.mkstemp(suffix)
1718 self.__tempfiles.append(filename)
1719 tfp = os.fdopen(fd, 'wb')
1720 try:
1721 result = filename, headers
1722 if self.tempcache is not None:
1723 self.tempcache[url] = result
1724 bs = 1024*8
1725 size = -1
1726 read = 0
1727 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001728 if "content-length" in headers:
1729 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001730 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001731 reporthook(blocknum, bs, size)
1732 while 1:
1733 block = fp.read(bs)
1734 if not block:
1735 break
1736 read += len(block)
1737 tfp.write(block)
1738 blocknum += 1
1739 if reporthook:
1740 reporthook(blocknum, bs, size)
1741 finally:
1742 tfp.close()
1743 finally:
1744 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001745
1746 # raise exception if actual size does not match content-length header
1747 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001748 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001749 "retrieval incomplete: got only %i out of %i bytes"
1750 % (read, size), result)
1751
1752 return result
1753
1754 # Each method named open_<type> knows how to open that type of URL
1755
1756 def _open_generic_http(self, connection_factory, url, data):
1757 """Make an HTTP connection using connection_class.
1758
1759 This is an internal method that should be called from
1760 open_http() or open_https().
1761
1762 Arguments:
1763 - connection_factory should take a host name and return an
1764 HTTPConnection instance.
1765 - url is the url to retrieval or a host, relative-path pair.
1766 - data is payload for a POST request or None.
1767 """
1768
1769 user_passwd = None
1770 proxy_passwd= None
1771 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001772 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001773 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001774 user_passwd, host = splituser(host)
1775 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001776 realhost = host
1777 else:
1778 host, selector = url
1779 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001780 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001781 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001782 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001783 url = rest
1784 user_passwd = None
1785 if urltype.lower() != 'http':
1786 realhost = None
1787 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001788 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001789 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001790 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001791 if user_passwd:
1792 selector = "%s://%s%s" % (urltype, realhost, rest)
1793 if proxy_bypass(realhost):
1794 host = realhost
1795
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001796 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001797
1798 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001799 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001800 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001801 else:
1802 proxy_auth = None
1803
1804 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001805 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001806 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001807 else:
1808 auth = None
1809 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001810 headers = {}
1811 if proxy_auth:
1812 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1813 if auth:
1814 headers["Authorization"] = "Basic %s" % auth
1815 if realhost:
1816 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001817
1818 # Add Connection:close as we don't support persistent connections yet.
1819 # This helps in closing the socket and avoiding ResourceWarning
1820
1821 headers["Connection"] = "close"
1822
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001823 for header, value in self.addheaders:
1824 headers[header] = value
1825
1826 if data is not None:
1827 headers["Content-Type"] = "application/x-www-form-urlencoded"
1828 http_conn.request("POST", selector, data, headers)
1829 else:
1830 http_conn.request("GET", selector, headers=headers)
1831
1832 try:
1833 response = http_conn.getresponse()
1834 except http.client.BadStatusLine:
1835 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001836 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001837
1838 # According to RFC 2616, "2xx" code indicates that the client's
1839 # request was successfully received, understood, and accepted.
1840 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001841 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001842 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001843 else:
1844 return self.http_error(
1845 url, response.fp,
1846 response.status, response.reason, response.msg, data)
1847
1848 def open_http(self, url, data=None):
1849 """Use HTTP protocol."""
1850 return self._open_generic_http(http.client.HTTPConnection, url, data)
1851
1852 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1853 """Handle http errors.
1854
1855 Derived class can override this, or provide specific handlers
1856 named http_error_DDD where DDD is the 3-digit error code."""
1857 # First check if there's a specific handler for this error
1858 name = 'http_error_%d' % errcode
1859 if hasattr(self, name):
1860 method = getattr(self, name)
1861 if data is None:
1862 result = method(url, fp, errcode, errmsg, headers)
1863 else:
1864 result = method(url, fp, errcode, errmsg, headers, data)
1865 if result: return result
1866 return self.http_error_default(url, fp, errcode, errmsg, headers)
1867
1868 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001869 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001870 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001871 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001872
1873 if _have_ssl:
1874 def _https_connection(self, host):
1875 return http.client.HTTPSConnection(host,
1876 key_file=self.key_file,
1877 cert_file=self.cert_file)
1878
1879 def open_https(self, url, data=None):
1880 """Use HTTPS protocol."""
1881 return self._open_generic_http(self._https_connection, url, data)
1882
1883 def open_file(self, url):
1884 """Use local file or FTP depending on form of URL."""
1885 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001886 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001887 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001888 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001889 else:
1890 return self.open_local_file(url)
1891
1892 def open_local_file(self, url):
1893 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001894 import email.utils
1895 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001896 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001897 localname = url2pathname(file)
1898 try:
1899 stats = os.stat(localname)
1900 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001901 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001902 size = stats.st_size
1903 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1904 mtype = mimetypes.guess_type(url)[0]
1905 headers = email.message_from_string(
1906 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1907 (mtype or 'text/plain', size, modified))
1908 if not host:
1909 urlfile = file
1910 if file[:1] == '/':
1911 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001912 return addinfourl(open(localname, 'rb'), headers, urlfile)
1913 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001914 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001915 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001916 urlfile = file
1917 if file[:1] == '/':
1918 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001919 elif file[:2] == './':
1920 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001921 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001922 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001923
1924 def open_ftp(self, url):
1925 """Use FTP protocol."""
1926 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001927 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001928 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001929 host, path = splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001930 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00001931 host, port = splitport(host)
1932 user, host = splituser(host)
1933 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001934 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001935 host = unquote(host)
1936 user = unquote(user or '')
1937 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001938 host = socket.gethostbyname(host)
1939 if not port:
1940 import ftplib
1941 port = ftplib.FTP_PORT
1942 else:
1943 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00001944 path, attrs = splitattr(path)
1945 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001946 dirs = path.split('/')
1947 dirs, file = dirs[:-1], dirs[-1]
1948 if dirs and not dirs[0]: dirs = dirs[1:]
1949 if dirs and not dirs[0]: dirs[0] = '/'
1950 key = user, host, port, '/'.join(dirs)
1951 # XXX thread unsafe!
1952 if len(self.ftpcache) > MAXFTPCACHE:
1953 # Prune the cache, rather arbitrarily
1954 for k in self.ftpcache.keys():
1955 if k != key:
1956 v = self.ftpcache[k]
1957 del self.ftpcache[k]
1958 v.close()
1959 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08001960 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001961 self.ftpcache[key] = \
1962 ftpwrapper(user, passwd, host, port, dirs)
1963 if not file: type = 'D'
1964 else: type = 'I'
1965 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001966 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001967 if attr.lower() == 'type' and \
1968 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1969 type = value.upper()
1970 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1971 mtype = mimetypes.guess_type("ftp:" + url)[0]
1972 headers = ""
1973 if mtype:
1974 headers += "Content-Type: %s\n" % mtype
1975 if retrlen is not None and retrlen >= 0:
1976 headers += "Content-Length: %d\n" % retrlen
1977 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00001978 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001979 except ftperrors() as exp:
1980 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001981
1982 def open_data(self, url, data=None):
1983 """Use "data" URL."""
1984 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001985 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001986 # ignore POSTed data
1987 #
1988 # syntax of data URLs:
1989 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1990 # mediatype := [ type "/" subtype ] *( ";" parameter )
1991 # data := *urlchar
1992 # parameter := attribute "=" value
1993 try:
1994 [type, data] = url.split(',', 1)
1995 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001996 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001997 if not type:
1998 type = 'text/plain;charset=US-ASCII'
1999 semi = type.rfind(';')
2000 if semi >= 0 and '=' not in type[semi:]:
2001 encoding = type[semi+1:]
2002 type = type[:semi]
2003 else:
2004 encoding = ''
2005 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002006 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002007 time.gmtime(time.time())))
2008 msg.append('Content-type: %s' % type)
2009 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002010 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002011 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002012 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002013 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002014 msg.append('Content-Length: %d' % len(data))
2015 msg.append('')
2016 msg.append(data)
2017 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002018 headers = email.message_from_string(msg)
2019 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002020 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002021 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002022
2023
2024class FancyURLopener(URLopener):
2025 """Derived class with handlers for errors we can handle (perhaps)."""
2026
2027 def __init__(self, *args, **kwargs):
2028 URLopener.__init__(self, *args, **kwargs)
2029 self.auth_cache = {}
2030 self.tries = 0
2031 self.maxtries = 10
2032
2033 def http_error_default(self, url, fp, errcode, errmsg, headers):
2034 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002035 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002036
2037 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2038 """Error 302 -- relocated (temporarily)."""
2039 self.tries += 1
2040 if self.maxtries and self.tries >= self.maxtries:
2041 if hasattr(self, "http_error_500"):
2042 meth = self.http_error_500
2043 else:
2044 meth = self.http_error_default
2045 self.tries = 0
2046 return meth(url, fp, 500,
2047 "Internal Server Error: Redirect Recursion", headers)
2048 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2049 data)
2050 self.tries = 0
2051 return result
2052
2053 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2054 if 'location' in headers:
2055 newurl = headers['location']
2056 elif 'uri' in headers:
2057 newurl = headers['uri']
2058 else:
2059 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002060 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002061
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002062 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002063 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002064
2065 urlparts = urlparse(newurl)
2066
2067 # For security reasons, we don't allow redirection to anything other
2068 # than http, https and ftp.
2069
2070 # We are using newer HTTPError with older redirect_internal method
2071 # This older method will get deprecated in 3.3
2072
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002073 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002074 raise HTTPError(newurl, errcode,
2075 errmsg +
2076 " Redirection to url '%s' is not allowed." % newurl,
2077 headers, fp)
2078
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002079 return self.open(newurl)
2080
2081 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2082 """Error 301 -- also relocated (permanently)."""
2083 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2084
2085 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2086 """Error 303 -- also relocated (essentially identical to 302)."""
2087 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2088
2089 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2090 """Error 307 -- relocated, but turn POST into error."""
2091 if data is None:
2092 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2093 else:
2094 return self.http_error_default(url, fp, errcode, errmsg, headers)
2095
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002096 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2097 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002098 """Error 401 -- authentication required.
2099 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002100 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002101 URLopener.http_error_default(self, url, fp,
2102 errcode, errmsg, headers)
2103 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002104 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2105 if not match:
2106 URLopener.http_error_default(self, url, fp,
2107 errcode, errmsg, headers)
2108 scheme, realm = match.groups()
2109 if scheme.lower() != 'basic':
2110 URLopener.http_error_default(self, url, fp,
2111 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002112 if not retry:
2113 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2114 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002115 name = 'retry_' + self.type + '_basic_auth'
2116 if data is None:
2117 return getattr(self,name)(url, realm)
2118 else:
2119 return getattr(self,name)(url, realm, data)
2120
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002121 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2122 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002123 """Error 407 -- proxy authentication required.
2124 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002125 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002126 URLopener.http_error_default(self, url, fp,
2127 errcode, errmsg, headers)
2128 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002129 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2130 if not match:
2131 URLopener.http_error_default(self, url, fp,
2132 errcode, errmsg, headers)
2133 scheme, realm = match.groups()
2134 if scheme.lower() != 'basic':
2135 URLopener.http_error_default(self, url, fp,
2136 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002137 if not retry:
2138 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2139 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002140 name = 'retry_proxy_' + self.type + '_basic_auth'
2141 if data is None:
2142 return getattr(self,name)(url, realm)
2143 else:
2144 return getattr(self,name)(url, realm, data)
2145
2146 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002147 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002148 newurl = 'http://' + host + selector
2149 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002150 urltype, proxyhost = splittype(proxy)
2151 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002152 i = proxyhost.find('@') + 1
2153 proxyhost = proxyhost[i:]
2154 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2155 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002156 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002157 quote(passwd, safe=''), proxyhost)
2158 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2159 if data is None:
2160 return self.open(newurl)
2161 else:
2162 return self.open(newurl, data)
2163
2164 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002165 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002166 newurl = 'https://' + host + selector
2167 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002168 urltype, proxyhost = splittype(proxy)
2169 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002170 i = proxyhost.find('@') + 1
2171 proxyhost = proxyhost[i:]
2172 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2173 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002174 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002175 quote(passwd, safe=''), proxyhost)
2176 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2177 if data is None:
2178 return self.open(newurl)
2179 else:
2180 return self.open(newurl, data)
2181
2182 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002183 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002184 i = host.find('@') + 1
2185 host = host[i:]
2186 user, passwd = self.get_user_passwd(host, realm, i)
2187 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002188 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002189 quote(passwd, safe=''), host)
2190 newurl = 'http://' + host + selector
2191 if data is None:
2192 return self.open(newurl)
2193 else:
2194 return self.open(newurl, data)
2195
2196 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002197 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002198 i = host.find('@') + 1
2199 host = host[i:]
2200 user, passwd = self.get_user_passwd(host, realm, i)
2201 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002202 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002203 quote(passwd, safe=''), host)
2204 newurl = 'https://' + host + selector
2205 if data is None:
2206 return self.open(newurl)
2207 else:
2208 return self.open(newurl, data)
2209
Florent Xicluna757445b2010-05-17 17:24:07 +00002210 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002211 key = realm + '@' + host.lower()
2212 if key in self.auth_cache:
2213 if clear_cache:
2214 del self.auth_cache[key]
2215 else:
2216 return self.auth_cache[key]
2217 user, passwd = self.prompt_user_passwd(host, realm)
2218 if user or passwd: self.auth_cache[key] = (user, passwd)
2219 return user, passwd
2220
2221 def prompt_user_passwd(self, host, realm):
2222 """Override this in a GUI environment!"""
2223 import getpass
2224 try:
2225 user = input("Enter username for %s at %s: " % (realm, host))
2226 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2227 (user, realm, host))
2228 return user, passwd
2229 except KeyboardInterrupt:
2230 print()
2231 return None, None
2232
2233
2234# Utility functions
2235
2236_localhost = None
2237def localhost():
2238 """Return the IP address of the magic hostname 'localhost'."""
2239 global _localhost
2240 if _localhost is None:
2241 _localhost = socket.gethostbyname('localhost')
2242 return _localhost
2243
2244_thishost = None
2245def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002246 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002247 global _thishost
2248 if _thishost is None:
Senthil Kumaran1b7da512011-10-06 00:32:02 +08002249 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002250 return _thishost
2251
2252_ftperrors = None
2253def ftperrors():
2254 """Return the set of errors raised by the FTP class."""
2255 global _ftperrors
2256 if _ftperrors is None:
2257 import ftplib
2258 _ftperrors = ftplib.all_errors
2259 return _ftperrors
2260
2261_noheaders = None
2262def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002263 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002264 global _noheaders
2265 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002266 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002267 return _noheaders
2268
2269
2270# Utility classes
2271
2272class ftpwrapper:
2273 """Class used by open_ftp() for cache of open FTP connections."""
2274
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002275 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2276 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002277 self.user = user
2278 self.passwd = passwd
2279 self.host = host
2280 self.port = port
2281 self.dirs = dirs
2282 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002283 self.refcount = 0
2284 self.keepalive = persistent
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002285 self.init()
2286
2287 def init(self):
2288 import ftplib
2289 self.busy = 0
2290 self.ftp = ftplib.FTP()
2291 self.ftp.connect(self.host, self.port, self.timeout)
2292 self.ftp.login(self.user, self.passwd)
2293 for dir in self.dirs:
2294 self.ftp.cwd(dir)
2295
2296 def retrfile(self, file, type):
2297 import ftplib
2298 self.endtransfer()
2299 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2300 else: cmd = 'TYPE ' + type; isdir = 0
2301 try:
2302 self.ftp.voidcmd(cmd)
2303 except ftplib.all_errors:
2304 self.init()
2305 self.ftp.voidcmd(cmd)
2306 conn = None
2307 if file and not isdir:
2308 # Try to retrieve as a file
2309 try:
2310 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002311 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002312 except ftplib.error_perm as reason:
2313 if str(reason)[:3] != '550':
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002314 raise URLError('ftp error: %d' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002315 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002316 if not conn:
2317 # Set transfer mode to ASCII!
2318 self.ftp.voidcmd('TYPE A')
2319 # Try a directory listing. Verify that directory exists.
2320 if file:
2321 pwd = self.ftp.pwd()
2322 try:
2323 try:
2324 self.ftp.cwd(file)
2325 except ftplib.error_perm as reason:
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002326 raise URLError('ftp error: %d' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002327 finally:
2328 self.ftp.cwd(pwd)
2329 cmd = 'LIST ' + file
2330 else:
2331 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002332 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002333 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002334
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002335 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2336 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002337 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002338 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002339 return (ftpobj, retrlen)
2340
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002341 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002342 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002343
2344 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002345 self.keepalive = False
2346 if self.refcount <= 0:
2347 self.real_close()
2348
2349 def file_close(self):
2350 self.endtransfer()
2351 self.refcount -= 1
2352 if self.refcount <= 0 and not self.keepalive:
2353 self.real_close()
2354
2355 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002356 self.endtransfer()
2357 try:
2358 self.ftp.close()
2359 except ftperrors():
2360 pass
2361
2362# Proxy handling
2363def getproxies_environment():
2364 """Return a dictionary of scheme -> proxy server URL mappings.
2365
2366 Scan the environment for variables named <scheme>_proxy;
2367 this seems to be the standard convention. If you need a
2368 different way, you can pass a proxies dictionary to the
2369 [Fancy]URLopener constructor.
2370
2371 """
2372 proxies = {}
2373 for name, value in os.environ.items():
2374 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002375 if value and name[-6:] == '_proxy':
2376 proxies[name[:-6]] = value
2377 return proxies
2378
2379def proxy_bypass_environment(host):
2380 """Test if proxies should not be used for a particular host.
2381
2382 Checks the environment for a variable named no_proxy, which should
2383 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2384 """
2385 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2386 # '*' is special case for always bypass
2387 if no_proxy == '*':
2388 return 1
2389 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002390 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002391 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002392 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2393 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002394 if name and (hostonly.endswith(name) or host.endswith(name)):
2395 return 1
2396 # otherwise, don't bypass
2397 return 0
2398
2399
Ronald Oussorene72e1612011-03-14 18:15:25 -04002400# This code tests an OSX specific data structure but is testable on all
2401# platforms
2402def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2403 """
2404 Return True iff this host shouldn't be accessed using a proxy
2405
2406 This function uses the MacOSX framework SystemConfiguration
2407 to fetch the proxy information.
2408
2409 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2410 { 'exclude_simple': bool,
2411 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2412 }
2413 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002414 from fnmatch import fnmatch
2415
2416 hostonly, port = splitport(host)
2417
2418 def ip2num(ipAddr):
2419 parts = ipAddr.split('.')
2420 parts = list(map(int, parts))
2421 if len(parts) != 4:
2422 parts = (parts + [0, 0, 0, 0])[:4]
2423 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2424
2425 # Check for simple host names:
2426 if '.' not in host:
2427 if proxy_settings['exclude_simple']:
2428 return True
2429
2430 hostIP = None
2431
2432 for value in proxy_settings.get('exceptions', ()):
2433 # Items in the list are strings like these: *.local, 169.254/16
2434 if not value: continue
2435
2436 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2437 if m is not None:
2438 if hostIP is None:
2439 try:
2440 hostIP = socket.gethostbyname(hostonly)
2441 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002442 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002443 continue
2444
2445 base = ip2num(m.group(1))
2446 mask = m.group(2)
2447 if mask is None:
2448 mask = 8 * (m.group(1).count('.') + 1)
2449 else:
2450 mask = int(mask[1:])
2451 mask = 32 - mask
2452
2453 if (hostIP >> mask) == (base >> mask):
2454 return True
2455
2456 elif fnmatch(host, value):
2457 return True
2458
2459 return False
2460
2461
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002462if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002463 from _scproxy import _get_proxy_settings, _get_proxies
2464
2465 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002466 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002467 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002468
2469 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002470 """Return a dictionary of scheme -> proxy server URL mappings.
2471
Ronald Oussoren84151202010-04-18 20:46:11 +00002472 This function uses the MacOSX framework SystemConfiguration
2473 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002474 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002475 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002476
Ronald Oussoren84151202010-04-18 20:46:11 +00002477
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002478
2479 def proxy_bypass(host):
2480 if getproxies_environment():
2481 return proxy_bypass_environment(host)
2482 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002483 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002484
2485 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002486 return getproxies_environment() or getproxies_macosx_sysconf()
2487
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002488
2489elif os.name == 'nt':
2490 def getproxies_registry():
2491 """Return a dictionary of scheme -> proxy server URL mappings.
2492
2493 Win32 uses the registry to store proxies.
2494
2495 """
2496 proxies = {}
2497 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002498 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002499 except ImportError:
2500 # Std module, so should be around - but you never know!
2501 return proxies
2502 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002503 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002504 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002505 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002506 'ProxyEnable')[0]
2507 if proxyEnable:
2508 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002509 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002510 'ProxyServer')[0])
2511 if '=' in proxyServer:
2512 # Per-protocol settings
2513 for p in proxyServer.split(';'):
2514 protocol, address = p.split('=', 1)
2515 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002516 if not re.match('^([^/:]+)://', address):
2517 address = '%s://%s' % (protocol, address)
2518 proxies[protocol] = address
2519 else:
2520 # Use one setting for all protocols
2521 if proxyServer[:5] == 'http:':
2522 proxies['http'] = proxyServer
2523 else:
2524 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002525 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002526 proxies['ftp'] = 'ftp://%s' % proxyServer
2527 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002528 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002529 # Either registry key not found etc, or the value in an
2530 # unexpected format.
2531 # proxies already set up to be empty so nothing to do
2532 pass
2533 return proxies
2534
2535 def getproxies():
2536 """Return a dictionary of scheme -> proxy server URL mappings.
2537
2538 Returns settings gathered from the environment, if specified,
2539 or the registry.
2540
2541 """
2542 return getproxies_environment() or getproxies_registry()
2543
2544 def proxy_bypass_registry(host):
2545 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002546 import winreg
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002547 except ImportError:
2548 # Std modules, so should be around - but you never know!
2549 return 0
2550 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002551 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002552 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002553 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002554 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002555 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002556 'ProxyOverride')[0])
2557 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002558 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002559 return 0
2560 if not proxyEnable or not proxyOverride:
2561 return 0
2562 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002563 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002564 host = [rawHost]
2565 try:
2566 addr = socket.gethostbyname(rawHost)
2567 if addr != rawHost:
2568 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002569 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002570 pass
2571 try:
2572 fqdn = socket.getfqdn(rawHost)
2573 if fqdn != rawHost:
2574 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002575 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002576 pass
2577 # make a check value list from the registry entry: replace the
2578 # '<local>' string by the localhost entry and the corresponding
2579 # canonical entry.
2580 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002581 # now check if we match one of the registry values.
2582 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002583 if test == '<local>':
2584 if '.' not in rawHost:
2585 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002586 test = test.replace(".", r"\.") # mask dots
2587 test = test.replace("*", r".*") # change glob sequence
2588 test = test.replace("?", r".") # change glob char
2589 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002590 if re.match(test, val, re.I):
2591 return 1
2592 return 0
2593
2594 def proxy_bypass(host):
2595 """Return a dictionary of scheme -> proxy server URL mappings.
2596
2597 Returns settings gathered from the environment, if specified,
2598 or the registry.
2599
2600 """
2601 if getproxies_environment():
2602 return proxy_bypass_environment(host)
2603 else:
2604 return proxy_bypass_registry(host)
2605
2606else:
2607 # By default use environment variables
2608 getproxies = getproxies_environment
2609 proxy_bypass = proxy_bypass_environment