blob: e6abf34fa071803a3b826f4876ef0e64d569a2dd [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
94import sys
95import time
Senthil Kumaran7bc0d872010-12-19 10:49:52 +000096import collections
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105 splittype, splithost, splitport, splituser, splitpasswd,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100106 splitattr, splitquery, splitvalue, splittag, to_bytes,
107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -0400113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
R David Murray4c7f9952015-04-16 16:36:18 -0400123 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800127 'UnknownHandler', 'HTTPErrorProcessor',
128 # Functions
129 'urlopen', 'install_opener', 'build_opener',
130 'pathname2url', 'url2pathname', 'getproxies',
131 # Legacy interface
132 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133]
134
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135# used in User-Agent header sent
136__version__ = sys.version[:3]
137
138_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000139def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800140 *, cafile=None, capath=None, cadefault=False, context=None):
Raymond Hettinger507343a2015-08-18 00:35:52 -0700141 '''Open the URL url, which can be either a string or a Request object.
142
143 *data* must be a bytes object specifying additional data to be sent to the
144 server, or None if no such data is needed. data may also be an iterable
145 object and in that case Content-Length value must be specified in the
146 headers. Currently HTTP requests are the only ones that use data; the HTTP
147 request will be a POST instead of a GET when the data parameter is
148 provided.
149
150 *data* should be a buffer in the standard application/x-www-form-urlencoded
151 format. The urllib.parse.urlencode() function takes a mapping or sequence
152 of 2-tuples and returns a string in this format. It should be encoded to
153 bytes before being used as the data parameter. The charset parameter in
154 Content-Type header may be used to specify the encoding. If charset
155 parameter is not sent with the Content-Type header, the server following
156 the HTTP 1.1 recommendation may assume that the data is encoded in
157 ISO-8859-1 encoding. It is advisable to use charset parameter with encoding
158 used in Content-Type header with the Request.
159
160 urllib.request module uses HTTP/1.1 and includes a "Connection:close"
161 header in its HTTP requests.
162
163 The optional *timeout* parameter specifies a timeout in seconds for
164 blocking operations like the connection attempt (if not specified, the
165 global default timeout setting will be used). This only works for HTTP,
166 HTTPS and FTP connections.
167
168 If *context* is specified, it must be a ssl.SSLContext instance describing
169 the various SSL options. See HTTPSConnection for more details.
170
171 The optional *cafile* and *capath* parameters specify a set of trusted CA
172 certificates for HTTPS requests. cafile should point to a single file
173 containing a bundle of CA certificates, whereas capath should point to a
174 directory of hashed certificate files. More information can be found in
175 ssl.SSLContext.load_verify_locations().
176
177 The *cadefault* parameter is ignored.
178
179 For http and https urls, this function returns a http.client.HTTPResponse
180 object which has the following HTTPResponse Objects methods.
181
182 For ftp, file, and data urls and requests explicitly handled by legacy
183 URLopener and FancyURLopener classes, this function returns a
184 urllib.response.addinfourl object which can work as context manager and has
185 methods such as:
186
187 * geturl() — return the URL of the resource retrieved, commonly used to
188 determine if a redirect was followed
189
190 * info() — return the meta-information of the page, such as headers, in the
191 form of an email.message_from_string() instance (see Quick Reference to
192 HTTP Headers)
193
194 * getcode() – return the HTTP status code of the response. Raises URLError
195 on errors.
196
197 Note that *None& may be returned if no handler handles the request (though
198 the default installed global OpenerDirector uses UnknownHandler to ensure
199 this never happens).
200
201 In addition, if proxy settings are detected (for example, when a *_proxy
202 environment variable like http_proxy is set), ProxyHandler is default
203 installed and makes sure the requests are handled through the proxy.
204
205 '''
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000206 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200207 if cafile or capath or cadefault:
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800208 if context is not None:
209 raise ValueError(
210 "You can't pass both context and any of cafile, capath, and "
211 "cadefault"
212 )
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000213 if not _have_ssl:
214 raise ValueError('SSL support not available')
Benjamin Petersonb6666972014-12-07 13:46:02 -0500215 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
Christian Heimes67986f92013-11-23 22:43:47 +0100216 cafile=cafile,
217 capath=capath)
Benjamin Petersonb6666972014-12-07 13:46:02 -0500218 https_handler = HTTPSHandler(context=context)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000219 opener = build_opener(https_handler)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800220 elif context:
221 https_handler = HTTPSHandler(context=context)
222 opener = build_opener(https_handler)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000223 elif _opener is None:
224 _opener = opener = build_opener()
225 else:
226 opener = _opener
227 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000228
229def install_opener(opener):
230 global _opener
231 _opener = opener
232
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700233_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700235 """
236 Retrieve a URL into a temporary location on disk.
237
238 Requires a URL argument. If a filename is passed, it is used as
239 the temporary file location. The reporthook argument should be
240 a callable that accepts a block number, a read size, and the
241 total file size of the URL target. The data argument should be
242 valid URL encoded data.
243
244 If a filename is passed and the URL points to a local resource,
245 the result is a copy from local file to new file.
246
247 Returns a tuple containing the path to the newly created
248 data file as well as the resulting HTTPMessage object.
249 """
250 url_type, path = splittype(url)
251
252 with contextlib.closing(urlopen(url, data)) as fp:
253 headers = fp.info()
254
255 # Just return the local path and the "headers" for file://
256 # URLs. No sense in performing a copy unless requested.
257 if url_type == "file" and not filename:
258 return os.path.normpath(path), headers
259
260 # Handle temporary file setup.
261 if filename:
262 tfp = open(filename, 'wb')
263 else:
264 tfp = tempfile.NamedTemporaryFile(delete=False)
265 filename = tfp.name
266 _url_tempfiles.append(filename)
267
268 with tfp:
269 result = filename, headers
270 bs = 1024*8
271 size = -1
272 read = 0
273 blocknum = 0
274 if "content-length" in headers:
275 size = int(headers["Content-Length"])
276
277 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800278 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700279
280 while True:
281 block = fp.read(bs)
282 if not block:
283 break
284 read += len(block)
285 tfp.write(block)
286 blocknum += 1
287 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800288 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700289
290 if size >= 0 and read < size:
291 raise ContentTooShortError(
292 "retrieval incomplete: got only %i out of %i bytes"
293 % (read, size), result)
294
295 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000296
297def urlcleanup():
Robert Collins2fee5c92015-08-04 12:52:06 +1200298 """Clean up temporary files from urlretrieve calls."""
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700299 for temp_file in _url_tempfiles:
300 try:
301 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200302 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700303 pass
304
305 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000306 global _opener
307 if _opener:
308 _opener = None
309
310# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000311_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000312def request_host(request):
313 """Return request-host, as defined by RFC 2965.
314
315 Variation from RFC: returned value is lowercased, for convenient
316 comparison.
317
318 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000319 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000320 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000321 if host == "":
322 host = request.get_header("Host", "")
323
324 # remove port, if present
325 host = _cut_port_re.sub("", host, 1)
326 return host.lower()
327
328class Request:
329
330 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800331 origin_req_host=None, unverifiable=False,
332 method=None):
Senthil Kumaran52380922013-04-25 05:45:48 -0700333 self.full_url = url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000334 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200335 self.unredirected_hdrs = {}
336 self._data = None
337 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000338 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000339 for key, value in headers.items():
340 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000341 if origin_req_host is None:
342 origin_req_host = request_host(self)
343 self.origin_req_host = origin_req_host
344 self.unverifiable = unverifiable
Jason R. Coombs7dc4f4b2013-09-08 12:47:07 -0400345 if method:
346 self.method = method
Senthil Kumaran52380922013-04-25 05:45:48 -0700347
348 @property
349 def full_url(self):
Senthil Kumaran83070752013-05-24 09:14:12 -0700350 if self.fragment:
351 return '{}#{}'.format(self._full_url, self.fragment)
Senthil Kumaran52380922013-04-25 05:45:48 -0700352 return self._full_url
353
354 @full_url.setter
355 def full_url(self, url):
356 # unwrap('<URL:type://host/path>') --> 'type://host/path'
357 self._full_url = unwrap(url)
358 self._full_url, self.fragment = splittag(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000359 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000360
Senthil Kumaran52380922013-04-25 05:45:48 -0700361 @full_url.deleter
362 def full_url(self):
363 self._full_url = None
364 self.fragment = None
365 self.selector = ''
366
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200367 @property
368 def data(self):
369 return self._data
370
371 @data.setter
372 def data(self, data):
373 if data != self._data:
374 self._data = data
375 # issue 16464
376 # if we change data we need to remove content-length header
377 # (cause it's most probably calculated for previous value)
378 if self.has_header("Content-length"):
379 self.remove_header("Content-length")
380
381 @data.deleter
382 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400383 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200384
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000385 def _parse(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700386 self.type, rest = splittype(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000387 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400388 raise ValueError("unknown url type: %r" % self.full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000389 self.host, self.selector = splithost(rest)
390 if self.host:
391 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000392
393 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800394 """Return a string indicating the HTTP request method."""
Jason R. Coombsaae6a1d2013-09-08 12:54:33 -0400395 default_method = "POST" if self.data is not None else "GET"
396 return getattr(self, 'method', default_method)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000397
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398 def get_full_url(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700399 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000401 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000402 if self.type == 'https' and not self._tunnel_host:
403 self._tunnel_host = self.host
404 else:
405 self.type= type
406 self.selector = self.full_url
407 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000408
409 def has_proxy(self):
410 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000411
412 def add_header(self, key, val):
413 # useful for something like authentication
414 self.headers[key.capitalize()] = val
415
416 def add_unredirected_header(self, key, val):
417 # will not be added to a redirected request
418 self.unredirected_hdrs[key.capitalize()] = val
419
420 def has_header(self, header_name):
421 return (header_name in self.headers or
422 header_name in self.unredirected_hdrs)
423
424 def get_header(self, header_name, default=None):
425 return self.headers.get(
426 header_name,
427 self.unredirected_hdrs.get(header_name, default))
428
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200429 def remove_header(self, header_name):
430 self.headers.pop(header_name, None)
431 self.unredirected_hdrs.pop(header_name, None)
432
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000433 def header_items(self):
434 hdrs = self.unredirected_hdrs.copy()
435 hdrs.update(self.headers)
436 return list(hdrs.items())
437
438class OpenerDirector:
439 def __init__(self):
440 client_version = "Python-urllib/%s" % __version__
441 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000442 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000443 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000444 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000445 self.handle_open = {}
446 self.handle_error = {}
447 self.process_response = {}
448 self.process_request = {}
449
450 def add_handler(self, handler):
451 if not hasattr(handler, "add_parent"):
452 raise TypeError("expected BaseHandler instance, got %r" %
453 type(handler))
454
455 added = False
456 for meth in dir(handler):
457 if meth in ["redirect_request", "do_open", "proxy_open"]:
458 # oops, coincidental match
459 continue
460
461 i = meth.find("_")
462 protocol = meth[:i]
463 condition = meth[i+1:]
464
465 if condition.startswith("error"):
466 j = condition.find("_") + i + 1
467 kind = meth[j+1:]
468 try:
469 kind = int(kind)
470 except ValueError:
471 pass
472 lookup = self.handle_error.get(protocol, {})
473 self.handle_error[protocol] = lookup
474 elif condition == "open":
475 kind = protocol
476 lookup = self.handle_open
477 elif condition == "response":
478 kind = protocol
479 lookup = self.process_response
480 elif condition == "request":
481 kind = protocol
482 lookup = self.process_request
483 else:
484 continue
485
486 handlers = lookup.setdefault(kind, [])
487 if handlers:
488 bisect.insort(handlers, handler)
489 else:
490 handlers.append(handler)
491 added = True
492
493 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000494 bisect.insort(self.handlers, handler)
495 handler.add_parent(self)
496
497 def close(self):
498 # Only exists for backwards compatibility.
499 pass
500
501 def _call_chain(self, chain, kind, meth_name, *args):
502 # Handlers raise an exception if no one else should try to handle
503 # the request, or return None if they can't but another handler
504 # could. Otherwise, they return the response.
505 handlers = chain.get(kind, ())
506 for handler in handlers:
507 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000508 result = func(*args)
509 if result is not None:
510 return result
511
512 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
513 # accept a URL or a Request object
514 if isinstance(fullurl, str):
515 req = Request(fullurl, data)
516 else:
517 req = fullurl
518 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000519 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000520
521 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000522 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000523
524 # pre-process request
525 meth_name = protocol+"_request"
526 for processor in self.process_request.get(protocol, []):
527 meth = getattr(processor, meth_name)
528 req = meth(req)
529
530 response = self._open(req, data)
531
532 # post-process response
533 meth_name = protocol+"_response"
534 for processor in self.process_response.get(protocol, []):
535 meth = getattr(processor, meth_name)
536 response = meth(req, response)
537
538 return response
539
540 def _open(self, req, data=None):
541 result = self._call_chain(self.handle_open, 'default',
542 'default_open', req)
543 if result:
544 return result
545
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000546 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000547 result = self._call_chain(self.handle_open, protocol, protocol +
548 '_open', req)
549 if result:
550 return result
551
552 return self._call_chain(self.handle_open, 'unknown',
553 'unknown_open', req)
554
555 def error(self, proto, *args):
556 if proto in ('http', 'https'):
557 # XXX http[s] protocols are special-cased
558 dict = self.handle_error['http'] # https is not different than http
559 proto = args[2] # YUCK!
560 meth_name = 'http_error_%s' % proto
561 http_err = 1
562 orig_args = args
563 else:
564 dict = self.handle_error
565 meth_name = proto + '_error'
566 http_err = 0
567 args = (dict, proto, meth_name) + args
568 result = self._call_chain(*args)
569 if result:
570 return result
571
572 if http_err:
573 args = (dict, 'default', 'http_error_default') + orig_args
574 return self._call_chain(*args)
575
576# XXX probably also want an abstract factory that knows when it makes
577# sense to skip a superclass in favor of a subclass and when it might
578# make sense to include both
579
580def build_opener(*handlers):
581 """Create an opener object from a list of handlers.
582
583 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000584 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000585
586 If any of the handlers passed as arguments are subclasses of the
587 default handlers, the default handlers will not be used.
588 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000589 opener = OpenerDirector()
590 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
591 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100592 FTPHandler, FileHandler, HTTPErrorProcessor,
593 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000594 if hasattr(http.client, "HTTPSConnection"):
595 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000596 skip = set()
597 for klass in default_classes:
598 for check in handlers:
Benjamin Peterson78c85382014-04-01 16:27:30 -0400599 if isinstance(check, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600 if issubclass(check, klass):
601 skip.add(klass)
602 elif isinstance(check, klass):
603 skip.add(klass)
604 for klass in skip:
605 default_classes.remove(klass)
606
607 for klass in default_classes:
608 opener.add_handler(klass())
609
610 for h in handlers:
Benjamin Peterson5dd3cae2014-04-01 14:20:56 -0400611 if isinstance(h, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000612 h = h()
613 opener.add_handler(h)
614 return opener
615
616class BaseHandler:
617 handler_order = 500
618
619 def add_parent(self, parent):
620 self.parent = parent
621
622 def close(self):
623 # Only exists for backwards compatibility
624 pass
625
626 def __lt__(self, other):
627 if not hasattr(other, "handler_order"):
628 # Try to preserve the old behavior of having custom classes
629 # inserted after default ones (works only for custom user
630 # classes which are not aware of handler_order).
631 return True
632 return self.handler_order < other.handler_order
633
634
635class HTTPErrorProcessor(BaseHandler):
636 """Process HTTP error responses."""
637 handler_order = 1000 # after all other processing
638
639 def http_response(self, request, response):
640 code, msg, hdrs = response.code, response.msg, response.info()
641
642 # According to RFC 2616, "2xx" code indicates that the client's
643 # request was successfully received, understood, and accepted.
644 if not (200 <= code < 300):
645 response = self.parent.error(
646 'http', request, response, code, msg, hdrs)
647
648 return response
649
650 https_response = http_response
651
652class HTTPDefaultErrorHandler(BaseHandler):
653 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000654 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000655
656class HTTPRedirectHandler(BaseHandler):
657 # maximum number of redirections to any single URL
658 # this is needed because of the state that cookies introduce
659 max_repeats = 4
660 # maximum total number of redirections (regardless of URL) before
661 # assuming we're in a loop
662 max_redirections = 10
663
664 def redirect_request(self, req, fp, code, msg, headers, newurl):
665 """Return a Request or None in response to a redirect.
666
667 This is called by the http_error_30x methods when a
668 redirection response is received. If a redirection should
669 take place, return a new Request to allow http_error_30x to
670 perform the redirect. Otherwise, raise HTTPError if no-one
671 else should try to handle this url. Return None if you can't
672 but another Handler might.
673 """
674 m = req.get_method()
675 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
676 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000677 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000678
679 # Strictly (according to RFC 2616), 301 or 302 in response to
680 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000681 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 # essentially all clients do redirect in this case, so we do
683 # the same.
684 # be conciliant with URIs containing a space
685 newurl = newurl.replace(' ', '%20')
686 CONTENT_HEADERS = ("content-length", "content-type")
687 newheaders = dict((k, v) for k, v in req.headers.items()
688 if k.lower() not in CONTENT_HEADERS)
689 return Request(newurl,
690 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000691 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000692 unverifiable=True)
693
694 # Implementation note: To avoid the server sending us into an
695 # infinite loop, the request object needs to track what URLs we
696 # have already seen. Do this by adding a handler-specific
697 # attribute to the Request object.
698 def http_error_302(self, req, fp, code, msg, headers):
699 # Some servers (incorrectly) return multiple Location headers
700 # (so probably same goes for URI). Use first header.
701 if "location" in headers:
702 newurl = headers["location"]
703 elif "uri" in headers:
704 newurl = headers["uri"]
705 else:
706 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000707
708 # fix a possible malformed URL
709 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700710
711 # For security reasons we don't allow redirection to anything other
712 # than http, https or ftp.
713
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800714 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800715 raise HTTPError(
716 newurl, code,
717 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
718 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700719
Facundo Batistaf24802c2008-08-17 03:36:03 +0000720 if not urlparts.path:
721 urlparts = list(urlparts)
722 urlparts[2] = "/"
723 newurl = urlunparse(urlparts)
724
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000725 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000726
727 # XXX Probably want to forget about the state of the current
728 # request, although that might interact poorly with other
729 # handlers that also use handler-specific request attributes
730 new = self.redirect_request(req, fp, code, msg, headers, newurl)
731 if new is None:
732 return
733
734 # loop detection
735 # .redirect_dict has a key url if url was previously visited.
736 if hasattr(req, 'redirect_dict'):
737 visited = new.redirect_dict = req.redirect_dict
738 if (visited.get(newurl, 0) >= self.max_repeats or
739 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000740 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000741 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000742 else:
743 visited = new.redirect_dict = req.redirect_dict = {}
744 visited[newurl] = visited.get(newurl, 0) + 1
745
746 # Don't close the fp until we are sure that we won't use it
747 # with HTTPError.
748 fp.read()
749 fp.close()
750
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000751 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000752
753 http_error_301 = http_error_303 = http_error_307 = http_error_302
754
755 inf_msg = "The HTTP server returned a redirect error that would " \
756 "lead to an infinite loop.\n" \
757 "The last 30x error message was:\n"
758
759
760def _parse_proxy(proxy):
761 """Return (scheme, user, password, host/port) given a URL or an authority.
762
763 If a URL is supplied, it must have an authority (host:port) component.
764 According to RFC 3986, having an authority component means the URL must
Senthil Kumarand8e24f12014-04-14 16:32:20 -0400765 have two slashes after the scheme.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000766 """
Georg Brandl13e89462008-07-01 19:56:00 +0000767 scheme, r_scheme = splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000768 if not r_scheme.startswith("/"):
769 # authority
770 scheme = None
771 authority = proxy
772 else:
773 # URL
774 if not r_scheme.startswith("//"):
775 raise ValueError("proxy URL with no authority: %r" % proxy)
776 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
777 # and 3.3.), path is empty or starts with '/'
778 end = r_scheme.find("/", 2)
779 if end == -1:
780 end = None
781 authority = r_scheme[2:end]
Georg Brandl13e89462008-07-01 19:56:00 +0000782 userinfo, hostport = splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000783 if userinfo is not None:
Georg Brandl13e89462008-07-01 19:56:00 +0000784 user, password = splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000785 else:
786 user = password = None
787 return scheme, user, password, hostport
788
789class ProxyHandler(BaseHandler):
790 # Proxies must be in front
791 handler_order = 100
792
793 def __init__(self, proxies=None):
794 if proxies is None:
795 proxies = getproxies()
796 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
797 self.proxies = proxies
798 for type, url in proxies.items():
799 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200800 lambda r, proxy=url, type=type, meth=self.proxy_open:
801 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802
803 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000804 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000805 proxy_type, user, password, hostport = _parse_proxy(proxy)
806 if proxy_type is None:
807 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000808
809 if req.host and proxy_bypass(req.host):
810 return None
811
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000813 user_pass = '%s:%s' % (unquote(user),
814 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 creds = base64.b64encode(user_pass.encode()).decode("ascii")
816 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000817 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000819 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000820 # let other handlers take care of it
821 return None
822 else:
823 # need to start over, because the other handlers don't
824 # grok the proxy's URL type
825 # e.g. if we have a constructor arg proxies like so:
826 # {'http': 'ftp://proxy.example.com'}, we may end up turning
827 # a request for http://acme.example.com/a into one for
828 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000829 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000830
831class HTTPPasswordMgr:
832
833 def __init__(self):
834 self.passwd = {}
835
836 def add_password(self, realm, uri, user, passwd):
837 # uri could be a single URI or a sequence
838 if isinstance(uri, str):
839 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800840 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000841 self.passwd[realm] = {}
842 for default_port in True, False:
843 reduced_uri = tuple(
844 [self.reduce_uri(u, default_port) for u in uri])
845 self.passwd[realm][reduced_uri] = (user, passwd)
846
847 def find_user_password(self, realm, authuri):
848 domains = self.passwd.get(realm, {})
849 for default_port in True, False:
850 reduced_authuri = self.reduce_uri(authuri, default_port)
851 for uris, authinfo in domains.items():
852 for uri in uris:
853 if self.is_suburi(uri, reduced_authuri):
854 return authinfo
855 return None, None
856
857 def reduce_uri(self, uri, default_port=True):
858 """Accept authority or URI and extract only the authority and path."""
859 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000860 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000861 if parts[1]:
862 # URI
863 scheme = parts[0]
864 authority = parts[1]
865 path = parts[2] or '/'
866 else:
867 # host or host:port
868 scheme = None
869 authority = uri
870 path = '/'
Georg Brandl13e89462008-07-01 19:56:00 +0000871 host, port = splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000872 if default_port and port is None and scheme is not None:
873 dport = {"http": 80,
874 "https": 443,
875 }.get(scheme)
876 if dport is not None:
877 authority = "%s:%d" % (host, dport)
878 return authority, path
879
880 def is_suburi(self, base, test):
881 """Check if test is below base in a URI tree
882
883 Both args must be URIs in reduced form.
884 """
885 if base == test:
886 return True
887 if base[0] != test[0]:
888 return False
889 common = posixpath.commonprefix((base[1], test[1]))
890 if len(common) == len(base[1]):
891 return True
892 return False
893
894
895class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
896
897 def find_user_password(self, realm, authuri):
898 user, password = HTTPPasswordMgr.find_user_password(self, realm,
899 authuri)
900 if user is not None:
901 return user, password
902 return HTTPPasswordMgr.find_user_password(self, None, authuri)
903
904
R David Murray4c7f9952015-04-16 16:36:18 -0400905class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
906
907 def __init__(self, *args, **kwargs):
908 self.authenticated = {}
909 super().__init__(*args, **kwargs)
910
911 def add_password(self, realm, uri, user, passwd, is_authenticated=False):
912 self.update_authenticated(uri, is_authenticated)
913 # Add a default for prior auth requests
914 if realm is not None:
915 super().add_password(None, uri, user, passwd)
916 super().add_password(realm, uri, user, passwd)
917
918 def update_authenticated(self, uri, is_authenticated=False):
919 # uri could be a single URI or a sequence
920 if isinstance(uri, str):
921 uri = [uri]
922
923 for default_port in True, False:
924 for u in uri:
925 reduced_uri = self.reduce_uri(u, default_port)
926 self.authenticated[reduced_uri] = is_authenticated
927
928 def is_authenticated(self, authuri):
929 for default_port in True, False:
930 reduced_authuri = self.reduce_uri(authuri, default_port)
931 for uri in self.authenticated:
932 if self.is_suburi(uri, reduced_authuri):
933 return self.authenticated[uri]
934
935
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000936class AbstractBasicAuthHandler:
937
938 # XXX this allows for multiple auth-schemes, but will stupidly pick
939 # the last one with a realm specified.
940
941 # allow for double- and single-quoted realm values
942 # (single quotes are a violation of the RFC, but appear in the wild)
943 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800944 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000945
946 # XXX could pre-emptively send auth info already accepted (RFC 2617,
947 # end of section 2, and section 1.2 immediately after "credentials"
948 # production).
949
950 def __init__(self, password_mgr=None):
951 if password_mgr is None:
952 password_mgr = HTTPPasswordMgr()
953 self.passwd = password_mgr
954 self.add_password = self.passwd.add_password
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000955
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000956 def http_error_auth_reqed(self, authreq, host, req, headers):
957 # host may be an authority (without userinfo) or a URL with an
958 # authority
959 # XXX could be multiple headers
960 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000961
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000962 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800963 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800964 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800965 raise ValueError("AbstractBasicAuthHandler does not"
966 " support the following scheme: '%s'" %
967 scheme)
968 else:
969 mo = AbstractBasicAuthHandler.rx.search(authreq)
970 if mo:
971 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800972 if quote not in ['"',"'"]:
973 warnings.warn("Basic Auth Realm was unquoted",
974 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800975 if scheme.lower() == 'basic':
Senthil Kumaran78373762014-08-20 07:53:58 +0530976 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000977
978 def retry_http_basic_auth(self, host, req, realm):
979 user, pw = self.passwd.find_user_password(realm, host)
980 if pw is not None:
981 raw = "%s:%s" % (user, pw)
982 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
Senthil Kumaran78373762014-08-20 07:53:58 +0530983 if req.get_header(self.auth_header, None) == auth:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000984 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000985 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000986 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987 else:
988 return None
989
R David Murray4c7f9952015-04-16 16:36:18 -0400990 def http_request(self, req):
991 if (not hasattr(self.passwd, 'is_authenticated') or
992 not self.passwd.is_authenticated(req.full_url)):
993 return req
994
995 if not req.has_header('Authorization'):
996 user, passwd = self.passwd.find_user_password(None, req.full_url)
997 credentials = '{0}:{1}'.format(user, passwd).encode()
998 auth_str = base64.standard_b64encode(credentials).decode()
999 req.add_unredirected_header('Authorization',
1000 'Basic {}'.format(auth_str.strip()))
1001 return req
1002
1003 def http_response(self, req, response):
1004 if hasattr(self.passwd, 'is_authenticated'):
1005 if 200 <= response.code < 300:
1006 self.passwd.update_authenticated(req.full_url, True)
1007 else:
1008 self.passwd.update_authenticated(req.full_url, False)
1009 return response
1010
1011 https_request = http_request
1012 https_response = http_response
1013
1014
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001015
1016class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1017
1018 auth_header = 'Authorization'
1019
1020 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001021 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001022 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001024 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001025
1026
1027class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1028
1029 auth_header = 'Proxy-authorization'
1030
1031 def http_error_407(self, req, fp, code, msg, headers):
1032 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +00001033 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001034 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1035 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001036 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001037 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001038 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001039 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001040
1041
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001042# Return n random bytes.
1043_randombytes = os.urandom
1044
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001045
1046class AbstractDigestAuthHandler:
1047 # Digest authentication is specified in RFC 2617.
1048
1049 # XXX The client does not inspect the Authentication-Info header
1050 # in a successful response.
1051
1052 # XXX It should be possible to test this implementation against
1053 # a mock server that just generates a static set of challenges.
1054
1055 # XXX qop="auth-int" supports is shaky
1056
1057 def __init__(self, passwd=None):
1058 if passwd is None:
1059 passwd = HTTPPasswordMgr()
1060 self.passwd = passwd
1061 self.add_password = self.passwd.add_password
1062 self.retried = 0
1063 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001064 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001065
1066 def reset_retry_count(self):
1067 self.retried = 0
1068
1069 def http_error_auth_reqed(self, auth_header, host, req, headers):
1070 authreq = headers.get(auth_header, None)
1071 if self.retried > 5:
1072 # Don't fail endlessly - if we failed once, we'll probably
1073 # fail a second time. Hm. Unless the Password Manager is
1074 # prompting for the information. Crap. This isn't great
1075 # but it's better than the current 'repeat until recursion
1076 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001077 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001078 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001079 else:
1080 self.retried += 1
1081 if authreq:
1082 scheme = authreq.split()[0]
1083 if scheme.lower() == 'digest':
1084 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001085 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001086 raise ValueError("AbstractDigestAuthHandler does not support"
1087 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001088
1089 def retry_http_digest_auth(self, req, auth):
1090 token, challenge = auth.split(' ', 1)
1091 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1092 auth = self.get_authorization(req, chal)
1093 if auth:
1094 auth_val = 'Digest %s' % auth
1095 if req.headers.get(self.auth_header, None) == auth_val:
1096 return None
1097 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001098 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001099 return resp
1100
1101 def get_cnonce(self, nonce):
1102 # The cnonce-value is an opaque
1103 # quoted string value provided by the client and used by both client
1104 # and server to avoid chosen plaintext attacks, to provide mutual
1105 # authentication, and to provide some message integrity protection.
1106 # This isn't a fabulous effort, but it's probably Good Enough.
1107 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001108 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001109 dig = hashlib.sha1(b).hexdigest()
1110 return dig[:16]
1111
1112 def get_authorization(self, req, chal):
1113 try:
1114 realm = chal['realm']
1115 nonce = chal['nonce']
1116 qop = chal.get('qop')
1117 algorithm = chal.get('algorithm', 'MD5')
1118 # mod_digest doesn't send an opaque, even though it isn't
1119 # supposed to be optional
1120 opaque = chal.get('opaque', None)
1121 except KeyError:
1122 return None
1123
1124 H, KD = self.get_algorithm_impls(algorithm)
1125 if H is None:
1126 return None
1127
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001128 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001129 if user is None:
1130 return None
1131
1132 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001133 if req.data is not None:
1134 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001135 else:
1136 entdig = None
1137
1138 A1 = "%s:%s:%s" % (user, realm, pw)
1139 A2 = "%s:%s" % (req.get_method(),
1140 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001141 req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001142 if qop == 'auth':
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001143 if nonce == self.last_nonce:
1144 self.nonce_count += 1
1145 else:
1146 self.nonce_count = 1
1147 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001148 ncvalue = '%08x' % self.nonce_count
1149 cnonce = self.get_cnonce(nonce)
1150 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1151 respdig = KD(H(A1), noncebit)
1152 elif qop is None:
1153 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1154 else:
1155 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001156 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001157
1158 # XXX should the partial digests be encoded too?
1159
1160 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001161 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001162 respdig)
1163 if opaque:
1164 base += ', opaque="%s"' % opaque
1165 if entdig:
1166 base += ', digest="%s"' % entdig
1167 base += ', algorithm="%s"' % algorithm
1168 if qop:
1169 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1170 return base
1171
1172 def get_algorithm_impls(self, algorithm):
1173 # lambdas assume digest modules are imported at the top level
1174 if algorithm == 'MD5':
1175 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1176 elif algorithm == 'SHA':
1177 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1178 # XXX MD5-sess
1179 KD = lambda s, d: H("%s:%s" % (s, d))
1180 return H, KD
1181
1182 def get_entity_digest(self, data, chal):
1183 # XXX not implemented yet
1184 return None
1185
1186
1187class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1188 """An authentication protocol defined by RFC 2069
1189
1190 Digest authentication improves on basic authentication because it
1191 does not transmit passwords in the clear.
1192 """
1193
1194 auth_header = 'Authorization'
1195 handler_order = 490 # before Basic auth
1196
1197 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001198 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001199 retry = self.http_error_auth_reqed('www-authenticate',
1200 host, req, headers)
1201 self.reset_retry_count()
1202 return retry
1203
1204
1205class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1206
1207 auth_header = 'Proxy-Authorization'
1208 handler_order = 490 # before Basic auth
1209
1210 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001211 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001212 retry = self.http_error_auth_reqed('proxy-authenticate',
1213 host, req, headers)
1214 self.reset_retry_count()
1215 return retry
1216
1217class AbstractHTTPHandler(BaseHandler):
1218
1219 def __init__(self, debuglevel=0):
1220 self._debuglevel = debuglevel
1221
1222 def set_http_debuglevel(self, level):
1223 self._debuglevel = level
1224
1225 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001226 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001227 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001228 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001229
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001230 if request.data is not None: # POST
1231 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001232 if isinstance(data, str):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001233 msg = "POST data should be bytes or an iterable of bytes. " \
1234 "It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001235 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001236 if not request.has_header('Content-type'):
1237 request.add_unredirected_header(
1238 'Content-type',
1239 'application/x-www-form-urlencoded')
1240 if not request.has_header('Content-length'):
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001241 try:
1242 mv = memoryview(data)
1243 except TypeError:
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001244 if isinstance(data, collections.Iterable):
Georg Brandl61536042011-02-03 07:46:41 +00001245 raise ValueError("Content-Length should be specified "
1246 "for iterable data of type %r %r" % (type(data),
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001247 data))
1248 else:
1249 request.add_unredirected_header(
Senthil Kumaran1e991f22010-12-24 04:03:59 +00001250 'Content-length', '%d' % (len(mv) * mv.itemsize))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001251
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001252 sel_host = host
1253 if request.has_proxy():
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001254 scheme, sel = splittype(request.selector)
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001255 sel_host, sel_path = splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001256 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001257 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001258 for name, value in self.parent.addheaders:
1259 name = name.capitalize()
1260 if not request.has_header(name):
1261 request.add_unredirected_header(name, value)
1262
1263 return request
1264
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001265 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001266 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001267
1268 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001269 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001270 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001271 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001272 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001273
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001274 # will parse host:port
1275 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001276
1277 headers = dict(req.unredirected_hdrs)
1278 headers.update(dict((k, v) for k, v in req.headers.items()
1279 if k not in headers))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001280
1281 # TODO(jhylton): Should this be redesigned to handle
1282 # persistent connections?
1283
1284 # We want to make an HTTP/1.1 request, but the addinfourl
1285 # class isn't prepared to deal with a persistent connection.
1286 # It will try to read all remaining data from the socket,
1287 # which will block while the server waits for the next request.
1288 # So make sure the connection gets closed after the (only)
1289 # request.
1290 headers["Connection"] = "close"
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001291 headers = dict((name.title(), val) for name, val in headers.items())
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001292
1293 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001294 tunnel_headers = {}
1295 proxy_auth_hdr = "Proxy-Authorization"
1296 if proxy_auth_hdr in headers:
1297 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1298 # Proxy-Authorization should not be sent to origin
1299 # server.
1300 del headers[proxy_auth_hdr]
1301 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001302
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001303 try:
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001304 try:
1305 h.request(req.get_method(), req.selector, req.data, headers)
1306 except OSError as err: # timeout error
1307 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001308 r = h.getresponse()
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001309 except:
1310 h.close()
1311 raise
1312
1313 # If the server does not send us a 'Connection: close' header,
1314 # HTTPConnection assumes the socket should be left open. Manually
1315 # mark the socket to be closed when this response object goes away.
1316 if h.sock:
1317 h.sock.close()
1318 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001319
Senthil Kumaran26430412011-04-13 07:01:19 +08001320 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001321 # This line replaces the .msg attribute of the HTTPResponse
1322 # with .headers, because urllib clients expect the response to
1323 # have the reason in .msg. It would be good to mark this
1324 # attribute is deprecated and get then to use info() or
1325 # .headers.
1326 r.msg = r.reason
1327 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001328
1329
1330class HTTPHandler(AbstractHTTPHandler):
1331
1332 def http_open(self, req):
1333 return self.do_open(http.client.HTTPConnection, req)
1334
1335 http_request = AbstractHTTPHandler.do_request_
1336
1337if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001338
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001339 class HTTPSHandler(AbstractHTTPHandler):
1340
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001341 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1342 AbstractHTTPHandler.__init__(self, debuglevel)
1343 self._context = context
1344 self._check_hostname = check_hostname
1345
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001346 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001347 return self.do_open(http.client.HTTPSConnection, req,
1348 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001349
1350 https_request = AbstractHTTPHandler.do_request_
1351
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001352 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001353
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001354class HTTPCookieProcessor(BaseHandler):
1355 def __init__(self, cookiejar=None):
1356 import http.cookiejar
1357 if cookiejar is None:
1358 cookiejar = http.cookiejar.CookieJar()
1359 self.cookiejar = cookiejar
1360
1361 def http_request(self, request):
1362 self.cookiejar.add_cookie_header(request)
1363 return request
1364
1365 def http_response(self, request, response):
1366 self.cookiejar.extract_cookies(response, request)
1367 return response
1368
1369 https_request = http_request
1370 https_response = http_response
1371
1372class UnknownHandler(BaseHandler):
1373 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001374 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001375 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001376
1377def parse_keqv_list(l):
1378 """Parse list of key=value strings where keys are not duplicated."""
1379 parsed = {}
1380 for elt in l:
1381 k, v = elt.split('=', 1)
1382 if v[0] == '"' and v[-1] == '"':
1383 v = v[1:-1]
1384 parsed[k] = v
1385 return parsed
1386
1387def parse_http_list(s):
1388 """Parse lists as described by RFC 2068 Section 2.
1389
1390 In particular, parse comma-separated lists where the elements of
1391 the list may include quoted-strings. A quoted-string could
1392 contain a comma. A non-quoted string could have quotes in the
1393 middle. Neither commas nor quotes count if they are escaped.
1394 Only double-quotes count, not single-quotes.
1395 """
1396 res = []
1397 part = ''
1398
1399 escape = quote = False
1400 for cur in s:
1401 if escape:
1402 part += cur
1403 escape = False
1404 continue
1405 if quote:
1406 if cur == '\\':
1407 escape = True
1408 continue
1409 elif cur == '"':
1410 quote = False
1411 part += cur
1412 continue
1413
1414 if cur == ',':
1415 res.append(part)
1416 part = ''
1417 continue
1418
1419 if cur == '"':
1420 quote = True
1421
1422 part += cur
1423
1424 # append last part
1425 if part:
1426 res.append(part)
1427
1428 return [part.strip() for part in res]
1429
1430class FileHandler(BaseHandler):
1431 # Use local file or FTP depending on form of URL
1432 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001433 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001434 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1435 req.host != 'localhost'):
Senthil Kumaranbc07ac52014-07-22 00:15:20 -07001436 if not req.host in self.get_names():
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001437 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001438 else:
1439 return self.open_local_file(req)
1440
1441 # names for the localhost
1442 names = None
1443 def get_names(self):
1444 if FileHandler.names is None:
1445 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001446 FileHandler.names = tuple(
1447 socket.gethostbyname_ex('localhost')[2] +
1448 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001449 except socket.gaierror:
1450 FileHandler.names = (socket.gethostbyname('localhost'),)
1451 return FileHandler.names
1452
1453 # not entirely sure what the rules are here
1454 def open_local_file(self, req):
1455 import email.utils
1456 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001457 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001458 filename = req.selector
1459 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001460 try:
1461 stats = os.stat(localfile)
1462 size = stats.st_size
1463 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001464 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001465 headers = email.message_from_string(
1466 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1467 (mtype or 'text/plain', size, modified))
1468 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001469 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001470 if not host or \
1471 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001472 if host:
1473 origurl = 'file://' + host + filename
1474 else:
1475 origurl = 'file://' + filename
1476 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001477 except OSError as exp:
Georg Brandl029986a2008-06-23 11:44:14 +00001478 # users shouldn't expect OSErrors coming from urlopen()
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001479 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001480 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001481
1482def _safe_gethostbyname(host):
1483 try:
1484 return socket.gethostbyname(host)
1485 except socket.gaierror:
1486 return None
1487
1488class FTPHandler(BaseHandler):
1489 def ftp_open(self, req):
1490 import ftplib
1491 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001492 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001493 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001494 raise URLError('ftp error: no host given')
1495 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001496 if port is None:
1497 port = ftplib.FTP_PORT
1498 else:
1499 port = int(port)
1500
1501 # username/password handling
Georg Brandl13e89462008-07-01 19:56:00 +00001502 user, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001503 if user:
Georg Brandl13e89462008-07-01 19:56:00 +00001504 user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001505 else:
1506 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001507 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001508 user = user or ''
1509 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001510
1511 try:
1512 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001513 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001514 raise URLError(msg)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001515 path, attrs = splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001516 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001517 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001518 dirs, file = dirs[:-1], dirs[-1]
1519 if dirs and not dirs[0]:
1520 dirs = dirs[1:]
1521 try:
1522 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1523 type = file and 'I' or 'D'
1524 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00001525 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001526 if attr.lower() == 'type' and \
1527 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1528 type = value.upper()
1529 fp, retrlen = fw.retrfile(file, type)
1530 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001531 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001532 if mtype:
1533 headers += "Content-type: %s\n" % mtype
1534 if retrlen is not None and retrlen >= 0:
1535 headers += "Content-length: %d\n" % retrlen
1536 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001537 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001538 except ftplib.all_errors as exp:
1539 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001540 raise exc.with_traceback(sys.exc_info()[2])
1541
1542 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001543 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1544 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001545
1546class CacheFTPHandler(FTPHandler):
1547 # XXX would be nice to have pluggable cache strategies
1548 # XXX this stuff is definitely not thread safe
1549 def __init__(self):
1550 self.cache = {}
1551 self.timeout = {}
1552 self.soonest = 0
1553 self.delay = 60
1554 self.max_conns = 16
1555
1556 def setTimeout(self, t):
1557 self.delay = t
1558
1559 def setMaxConns(self, m):
1560 self.max_conns = m
1561
1562 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1563 key = user, host, port, '/'.join(dirs), timeout
1564 if key in self.cache:
1565 self.timeout[key] = time.time() + self.delay
1566 else:
1567 self.cache[key] = ftpwrapper(user, passwd, host, port,
1568 dirs, timeout)
1569 self.timeout[key] = time.time() + self.delay
1570 self.check_cache()
1571 return self.cache[key]
1572
1573 def check_cache(self):
1574 # first check for old ones
1575 t = time.time()
1576 if self.soonest <= t:
1577 for k, v in list(self.timeout.items()):
1578 if v < t:
1579 self.cache[k].close()
1580 del self.cache[k]
1581 del self.timeout[k]
1582 self.soonest = min(list(self.timeout.values()))
1583
1584 # then check the size
1585 if len(self.cache) == self.max_conns:
1586 for k, v in list(self.timeout.items()):
1587 if v == self.soonest:
1588 del self.cache[k]
1589 del self.timeout[k]
1590 break
1591 self.soonest = min(list(self.timeout.values()))
1592
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001593 def clear_cache(self):
1594 for conn in self.cache.values():
1595 conn.close()
1596 self.cache.clear()
1597 self.timeout.clear()
1598
Antoine Pitroudf204be2012-11-24 17:59:08 +01001599class DataHandler(BaseHandler):
1600 def data_open(self, req):
1601 # data URLs as specified in RFC 2397.
1602 #
1603 # ignores POSTed data
1604 #
1605 # syntax:
1606 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1607 # mediatype := [ type "/" subtype ] *( ";" parameter )
1608 # data := *urlchar
1609 # parameter := attribute "=" value
1610 url = req.full_url
1611
1612 scheme, data = url.split(":",1)
1613 mediatype, data = data.split(",",1)
1614
1615 # even base64 encoded data URLs might be quoted so unquote in any case:
1616 data = unquote_to_bytes(data)
1617 if mediatype.endswith(";base64"):
1618 data = base64.decodebytes(data)
1619 mediatype = mediatype[:-7]
1620
1621 if not mediatype:
1622 mediatype = "text/plain;charset=US-ASCII"
1623
1624 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1625 (mediatype, len(data)))
1626
1627 return addinfourl(io.BytesIO(data), headers, url)
1628
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001629
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001630# Code move from the old urllib module
1631
1632MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1633
1634# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001635if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001636 from nturl2path import url2pathname, pathname2url
1637else:
1638 def url2pathname(pathname):
1639 """OS-specific conversion from a relative URL of the 'file' scheme
1640 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001641 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001642
1643 def pathname2url(pathname):
1644 """OS-specific conversion from a file system path to a relative URL
1645 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001646 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001647
1648# This really consists of two pieces:
1649# (1) a class which handles opening of all sorts of URLs
1650# (plus assorted utilities etc.)
1651# (2) a set of functions for parsing URLs
1652# XXX Should these be separated out into different modules?
1653
1654
1655ftpcache = {}
1656class URLopener:
1657 """Class to open URLs.
1658 This is a class rather than just a subroutine because we may need
1659 more than one set of global protocol-specific options.
1660 Note -- this is a base class for those who don't want the
1661 automatic handling of errors type 302 (relocated) and 401
1662 (authorization needed)."""
1663
1664 __tempfiles = None
1665
1666 version = "Python-urllib/%s" % __version__
1667
1668 # Constructor
1669 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001670 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001671 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1672 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001673 if proxies is None:
1674 proxies = getproxies()
1675 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1676 self.proxies = proxies
1677 self.key_file = x509.get('key_file')
1678 self.cert_file = x509.get('cert_file')
1679 self.addheaders = [('User-Agent', self.version)]
1680 self.__tempfiles = []
1681 self.__unlink = os.unlink # See cleanup()
1682 self.tempcache = None
1683 # Undocumented feature: if you assign {} to tempcache,
1684 # it is used to cache files retrieved with
1685 # self.retrieve(). This is not enabled by default
1686 # since it does not work for changing documents (and I
1687 # haven't got the logic to check expiration headers
1688 # yet).
1689 self.ftpcache = ftpcache
1690 # Undocumented feature: you can use a different
1691 # ftp cache by assigning to the .ftpcache member;
1692 # in case you want logically independent URL openers
1693 # XXX This is not threadsafe. Bah.
1694
1695 def __del__(self):
1696 self.close()
1697
1698 def close(self):
1699 self.cleanup()
1700
1701 def cleanup(self):
1702 # This code sometimes runs when the rest of this module
1703 # has already been deleted, so it can't use any globals
1704 # or import anything.
1705 if self.__tempfiles:
1706 for file in self.__tempfiles:
1707 try:
1708 self.__unlink(file)
1709 except OSError:
1710 pass
1711 del self.__tempfiles[:]
1712 if self.tempcache:
1713 self.tempcache.clear()
1714
1715 def addheader(self, *args):
1716 """Add a header to be used by the HTTP interface only
1717 e.g. u.addheader('Accept', 'sound/basic')"""
1718 self.addheaders.append(args)
1719
1720 # External interface
1721 def open(self, fullurl, data=None):
1722 """Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl13e89462008-07-01 19:56:00 +00001723 fullurl = unwrap(to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001724 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001725 if self.tempcache and fullurl in self.tempcache:
1726 filename, headers = self.tempcache[fullurl]
1727 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001728 return addinfourl(fp, headers, fullurl)
1729 urltype, url = splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001730 if not urltype:
1731 urltype = 'file'
1732 if urltype in self.proxies:
1733 proxy = self.proxies[urltype]
Georg Brandl13e89462008-07-01 19:56:00 +00001734 urltype, proxyhost = splittype(proxy)
1735 host, selector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001736 url = (host, fullurl) # Signal special case to open_*()
1737 else:
1738 proxy = None
1739 name = 'open_' + urltype
1740 self.type = urltype
1741 name = name.replace('-', '_')
1742 if not hasattr(self, name):
1743 if proxy:
1744 return self.open_unknown_proxy(proxy, fullurl, data)
1745 else:
1746 return self.open_unknown(fullurl, data)
1747 try:
1748 if data is None:
1749 return getattr(self, name)(url)
1750 else:
1751 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001752 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001753 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001754 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001755 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001756
1757 def open_unknown(self, fullurl, data=None):
1758 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001759 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001760 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001761
1762 def open_unknown_proxy(self, proxy, fullurl, data=None):
1763 """Overridable interface to open unknown URL type."""
Georg Brandl13e89462008-07-01 19:56:00 +00001764 type, url = splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001765 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001766
1767 # External interface
1768 def retrieve(self, url, filename=None, reporthook=None, data=None):
1769 """retrieve(url) returns (filename, headers) for a local object
1770 or (tempfilename, headers) for a remote object."""
Georg Brandl13e89462008-07-01 19:56:00 +00001771 url = unwrap(to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001772 if self.tempcache and url in self.tempcache:
1773 return self.tempcache[url]
Georg Brandl13e89462008-07-01 19:56:00 +00001774 type, url1 = splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001775 if filename is None and (not type or type == 'file'):
1776 try:
1777 fp = self.open_local_file(url1)
1778 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001779 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001780 return url2pathname(splithost(url1)[1]), hdrs
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001781 except OSError as msg:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001782 pass
1783 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001784 try:
1785 headers = fp.info()
1786 if filename:
1787 tfp = open(filename, 'wb')
1788 else:
1789 import tempfile
1790 garbage, path = splittype(url)
1791 garbage, path = splithost(path or "")
1792 path, garbage = splitquery(path or "")
1793 path, garbage = splitattr(path or "")
1794 suffix = os.path.splitext(path)[1]
1795 (fd, filename) = tempfile.mkstemp(suffix)
1796 self.__tempfiles.append(filename)
1797 tfp = os.fdopen(fd, 'wb')
1798 try:
1799 result = filename, headers
1800 if self.tempcache is not None:
1801 self.tempcache[url] = result
1802 bs = 1024*8
1803 size = -1
1804 read = 0
1805 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001806 if "content-length" in headers:
1807 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001808 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001809 reporthook(blocknum, bs, size)
1810 while 1:
1811 block = fp.read(bs)
1812 if not block:
1813 break
1814 read += len(block)
1815 tfp.write(block)
1816 blocknum += 1
1817 if reporthook:
1818 reporthook(blocknum, bs, size)
1819 finally:
1820 tfp.close()
1821 finally:
1822 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001823
1824 # raise exception if actual size does not match content-length header
1825 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001826 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001827 "retrieval incomplete: got only %i out of %i bytes"
1828 % (read, size), result)
1829
1830 return result
1831
1832 # Each method named open_<type> knows how to open that type of URL
1833
1834 def _open_generic_http(self, connection_factory, url, data):
1835 """Make an HTTP connection using connection_class.
1836
1837 This is an internal method that should be called from
1838 open_http() or open_https().
1839
1840 Arguments:
1841 - connection_factory should take a host name and return an
1842 HTTPConnection instance.
1843 - url is the url to retrieval or a host, relative-path pair.
1844 - data is payload for a POST request or None.
1845 """
1846
1847 user_passwd = None
1848 proxy_passwd= None
1849 if isinstance(url, str):
Georg Brandl13e89462008-07-01 19:56:00 +00001850 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001851 if host:
Georg Brandl13e89462008-07-01 19:56:00 +00001852 user_passwd, host = splituser(host)
1853 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001854 realhost = host
1855 else:
1856 host, selector = url
1857 # check whether the proxy contains authorization information
Georg Brandl13e89462008-07-01 19:56:00 +00001858 proxy_passwd, host = splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001859 # now we proceed with the url we want to obtain
Georg Brandl13e89462008-07-01 19:56:00 +00001860 urltype, rest = splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001861 url = rest
1862 user_passwd = None
1863 if urltype.lower() != 'http':
1864 realhost = None
1865 else:
Georg Brandl13e89462008-07-01 19:56:00 +00001866 realhost, rest = splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001867 if realhost:
Georg Brandl13e89462008-07-01 19:56:00 +00001868 user_passwd, realhost = splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001869 if user_passwd:
1870 selector = "%s://%s%s" % (urltype, realhost, rest)
1871 if proxy_bypass(realhost):
1872 host = realhost
1873
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001874 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001875
1876 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001877 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001878 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001879 else:
1880 proxy_auth = None
1881
1882 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001883 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001884 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001885 else:
1886 auth = None
1887 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001888 headers = {}
1889 if proxy_auth:
1890 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1891 if auth:
1892 headers["Authorization"] = "Basic %s" % auth
1893 if realhost:
1894 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001895
1896 # Add Connection:close as we don't support persistent connections yet.
1897 # This helps in closing the socket and avoiding ResourceWarning
1898
1899 headers["Connection"] = "close"
1900
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001901 for header, value in self.addheaders:
1902 headers[header] = value
1903
1904 if data is not None:
1905 headers["Content-Type"] = "application/x-www-form-urlencoded"
1906 http_conn.request("POST", selector, data, headers)
1907 else:
1908 http_conn.request("GET", selector, headers=headers)
1909
1910 try:
1911 response = http_conn.getresponse()
1912 except http.client.BadStatusLine:
1913 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001914 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001915
1916 # According to RFC 2616, "2xx" code indicates that the client's
1917 # request was successfully received, understood, and accepted.
1918 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001919 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001920 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001921 else:
1922 return self.http_error(
1923 url, response.fp,
1924 response.status, response.reason, response.msg, data)
1925
1926 def open_http(self, url, data=None):
1927 """Use HTTP protocol."""
1928 return self._open_generic_http(http.client.HTTPConnection, url, data)
1929
1930 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1931 """Handle http errors.
1932
1933 Derived class can override this, or provide specific handlers
1934 named http_error_DDD where DDD is the 3-digit error code."""
1935 # First check if there's a specific handler for this error
1936 name = 'http_error_%d' % errcode
1937 if hasattr(self, name):
1938 method = getattr(self, name)
1939 if data is None:
1940 result = method(url, fp, errcode, errmsg, headers)
1941 else:
1942 result = method(url, fp, errcode, errmsg, headers, data)
1943 if result: return result
1944 return self.http_error_default(url, fp, errcode, errmsg, headers)
1945
1946 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001947 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001948 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001949 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001950
1951 if _have_ssl:
1952 def _https_connection(self, host):
1953 return http.client.HTTPSConnection(host,
1954 key_file=self.key_file,
1955 cert_file=self.cert_file)
1956
1957 def open_https(self, url, data=None):
1958 """Use HTTPS protocol."""
1959 return self._open_generic_http(self._https_connection, url, data)
1960
1961 def open_file(self, url):
1962 """Use local file or FTP depending on form of URL."""
1963 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001964 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001965 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001966 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001967 else:
1968 return self.open_local_file(url)
1969
1970 def open_local_file(self, url):
1971 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001972 import email.utils
1973 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00001974 host, file = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001975 localname = url2pathname(file)
1976 try:
1977 stats = os.stat(localname)
1978 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001979 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001980 size = stats.st_size
1981 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1982 mtype = mimetypes.guess_type(url)[0]
1983 headers = email.message_from_string(
1984 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1985 (mtype or 'text/plain', size, modified))
1986 if not host:
1987 urlfile = file
1988 if file[:1] == '/':
1989 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001990 return addinfourl(open(localname, 'rb'), headers, urlfile)
1991 host, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001992 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001993 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001994 urlfile = file
1995 if file[:1] == '/':
1996 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001997 elif file[:2] == './':
1998 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00001999 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002000 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002001
2002 def open_ftp(self, url):
2003 """Use FTP protocol."""
2004 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002005 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002006 import mimetypes
Georg Brandl13e89462008-07-01 19:56:00 +00002007 host, path = splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002008 if not host: raise URLError('ftp error: no host given')
Georg Brandl13e89462008-07-01 19:56:00 +00002009 host, port = splitport(host)
2010 user, host = splituser(host)
2011 if user: user, passwd = splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002012 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00002013 host = unquote(host)
2014 user = unquote(user or '')
2015 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002016 host = socket.gethostbyname(host)
2017 if not port:
2018 import ftplib
2019 port = ftplib.FTP_PORT
2020 else:
2021 port = int(port)
Georg Brandl13e89462008-07-01 19:56:00 +00002022 path, attrs = splitattr(path)
2023 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002024 dirs = path.split('/')
2025 dirs, file = dirs[:-1], dirs[-1]
2026 if dirs and not dirs[0]: dirs = dirs[1:]
2027 if dirs and not dirs[0]: dirs[0] = '/'
2028 key = user, host, port, '/'.join(dirs)
2029 # XXX thread unsafe!
2030 if len(self.ftpcache) > MAXFTPCACHE:
2031 # Prune the cache, rather arbitrarily
Benjamin Peterson3c2dca62014-06-07 15:08:04 -07002032 for k in list(self.ftpcache):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002033 if k != key:
2034 v = self.ftpcache[k]
2035 del self.ftpcache[k]
2036 v.close()
2037 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002038 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002039 self.ftpcache[key] = \
2040 ftpwrapper(user, passwd, host, port, dirs)
2041 if not file: type = 'D'
2042 else: type = 'I'
2043 for attr in attrs:
Georg Brandl13e89462008-07-01 19:56:00 +00002044 attr, value = splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002045 if attr.lower() == 'type' and \
2046 value in ('a', 'A', 'i', 'I', 'd', 'D'):
2047 type = value.upper()
2048 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2049 mtype = mimetypes.guess_type("ftp:" + url)[0]
2050 headers = ""
2051 if mtype:
2052 headers += "Content-Type: %s\n" % mtype
2053 if retrlen is not None and retrlen >= 0:
2054 headers += "Content-Length: %d\n" % retrlen
2055 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00002056 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002057 except ftperrors() as exp:
2058 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002059
2060 def open_data(self, url, data=None):
2061 """Use "data" URL."""
2062 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002063 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002064 # ignore POSTed data
2065 #
2066 # syntax of data URLs:
2067 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2068 # mediatype := [ type "/" subtype ] *( ";" parameter )
2069 # data := *urlchar
2070 # parameter := attribute "=" value
2071 try:
2072 [type, data] = url.split(',', 1)
2073 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002074 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002075 if not type:
2076 type = 'text/plain;charset=US-ASCII'
2077 semi = type.rfind(';')
2078 if semi >= 0 and '=' not in type[semi:]:
2079 encoding = type[semi+1:]
2080 type = type[:semi]
2081 else:
2082 encoding = ''
2083 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002084 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002085 time.gmtime(time.time())))
2086 msg.append('Content-type: %s' % type)
2087 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002088 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002089 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002090 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002091 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002092 msg.append('Content-Length: %d' % len(data))
2093 msg.append('')
2094 msg.append(data)
2095 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002096 headers = email.message_from_string(msg)
2097 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002098 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002099 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002100
2101
2102class FancyURLopener(URLopener):
2103 """Derived class with handlers for errors we can handle (perhaps)."""
2104
2105 def __init__(self, *args, **kwargs):
2106 URLopener.__init__(self, *args, **kwargs)
2107 self.auth_cache = {}
2108 self.tries = 0
2109 self.maxtries = 10
2110
2111 def http_error_default(self, url, fp, errcode, errmsg, headers):
2112 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002113 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002114
2115 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2116 """Error 302 -- relocated (temporarily)."""
2117 self.tries += 1
2118 if self.maxtries and self.tries >= self.maxtries:
2119 if hasattr(self, "http_error_500"):
2120 meth = self.http_error_500
2121 else:
2122 meth = self.http_error_default
2123 self.tries = 0
2124 return meth(url, fp, 500,
2125 "Internal Server Error: Redirect Recursion", headers)
2126 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2127 data)
2128 self.tries = 0
2129 return result
2130
2131 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2132 if 'location' in headers:
2133 newurl = headers['location']
2134 elif 'uri' in headers:
2135 newurl = headers['uri']
2136 else:
2137 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002138 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002139
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002140 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002141 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002142
2143 urlparts = urlparse(newurl)
2144
2145 # For security reasons, we don't allow redirection to anything other
2146 # than http, https and ftp.
2147
2148 # We are using newer HTTPError with older redirect_internal method
2149 # This older method will get deprecated in 3.3
2150
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002151 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002152 raise HTTPError(newurl, errcode,
2153 errmsg +
2154 " Redirection to url '%s' is not allowed." % newurl,
2155 headers, fp)
2156
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002157 return self.open(newurl)
2158
2159 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2160 """Error 301 -- also relocated (permanently)."""
2161 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2162
2163 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2164 """Error 303 -- also relocated (essentially identical to 302)."""
2165 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2166
2167 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2168 """Error 307 -- relocated, but turn POST into error."""
2169 if data is None:
2170 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2171 else:
2172 return self.http_error_default(url, fp, errcode, errmsg, headers)
2173
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002174 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2175 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002176 """Error 401 -- authentication required.
2177 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002178 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002179 URLopener.http_error_default(self, url, fp,
2180 errcode, errmsg, headers)
2181 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002182 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2183 if not match:
2184 URLopener.http_error_default(self, url, fp,
2185 errcode, errmsg, headers)
2186 scheme, realm = match.groups()
2187 if scheme.lower() != 'basic':
2188 URLopener.http_error_default(self, url, fp,
2189 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002190 if not retry:
2191 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2192 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002193 name = 'retry_' + self.type + '_basic_auth'
2194 if data is None:
2195 return getattr(self,name)(url, realm)
2196 else:
2197 return getattr(self,name)(url, realm, data)
2198
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002199 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2200 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002201 """Error 407 -- proxy authentication required.
2202 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002203 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002204 URLopener.http_error_default(self, url, fp,
2205 errcode, errmsg, headers)
2206 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002207 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2208 if not match:
2209 URLopener.http_error_default(self, url, fp,
2210 errcode, errmsg, headers)
2211 scheme, realm = match.groups()
2212 if scheme.lower() != 'basic':
2213 URLopener.http_error_default(self, url, fp,
2214 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002215 if not retry:
2216 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2217 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002218 name = 'retry_proxy_' + self.type + '_basic_auth'
2219 if data is None:
2220 return getattr(self,name)(url, realm)
2221 else:
2222 return getattr(self,name)(url, realm, data)
2223
2224 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002225 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002226 newurl = 'http://' + host + selector
2227 proxy = self.proxies['http']
Georg Brandl13e89462008-07-01 19:56:00 +00002228 urltype, proxyhost = splittype(proxy)
2229 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002230 i = proxyhost.find('@') + 1
2231 proxyhost = proxyhost[i:]
2232 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2233 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002234 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002235 quote(passwd, safe=''), proxyhost)
2236 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2237 if data is None:
2238 return self.open(newurl)
2239 else:
2240 return self.open(newurl, data)
2241
2242 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002243 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002244 newurl = 'https://' + host + selector
2245 proxy = self.proxies['https']
Georg Brandl13e89462008-07-01 19:56:00 +00002246 urltype, proxyhost = splittype(proxy)
2247 proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002248 i = proxyhost.find('@') + 1
2249 proxyhost = proxyhost[i:]
2250 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2251 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002252 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002253 quote(passwd, safe=''), proxyhost)
2254 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2255 if data is None:
2256 return self.open(newurl)
2257 else:
2258 return self.open(newurl, data)
2259
2260 def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002261 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002262 i = host.find('@') + 1
2263 host = host[i:]
2264 user, passwd = self.get_user_passwd(host, realm, i)
2265 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002266 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002267 quote(passwd, safe=''), host)
2268 newurl = 'http://' + host + selector
2269 if data is None:
2270 return self.open(newurl)
2271 else:
2272 return self.open(newurl, data)
2273
2274 def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl13e89462008-07-01 19:56:00 +00002275 host, selector = splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002276 i = host.find('@') + 1
2277 host = host[i:]
2278 user, passwd = self.get_user_passwd(host, realm, i)
2279 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002280 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002281 quote(passwd, safe=''), host)
2282 newurl = 'https://' + host + selector
2283 if data is None:
2284 return self.open(newurl)
2285 else:
2286 return self.open(newurl, data)
2287
Florent Xicluna757445b2010-05-17 17:24:07 +00002288 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002289 key = realm + '@' + host.lower()
2290 if key in self.auth_cache:
2291 if clear_cache:
2292 del self.auth_cache[key]
2293 else:
2294 return self.auth_cache[key]
2295 user, passwd = self.prompt_user_passwd(host, realm)
2296 if user or passwd: self.auth_cache[key] = (user, passwd)
2297 return user, passwd
2298
2299 def prompt_user_passwd(self, host, realm):
2300 """Override this in a GUI environment!"""
2301 import getpass
2302 try:
2303 user = input("Enter username for %s at %s: " % (realm, host))
2304 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2305 (user, realm, host))
2306 return user, passwd
2307 except KeyboardInterrupt:
2308 print()
2309 return None, None
2310
2311
2312# Utility functions
2313
2314_localhost = None
2315def localhost():
2316 """Return the IP address of the magic hostname 'localhost'."""
2317 global _localhost
2318 if _localhost is None:
2319 _localhost = socket.gethostbyname('localhost')
2320 return _localhost
2321
2322_thishost = None
2323def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002324 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002325 global _thishost
2326 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002327 try:
2328 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2329 except socket.gaierror:
2330 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002331 return _thishost
2332
2333_ftperrors = None
2334def ftperrors():
2335 """Return the set of errors raised by the FTP class."""
2336 global _ftperrors
2337 if _ftperrors is None:
2338 import ftplib
2339 _ftperrors = ftplib.all_errors
2340 return _ftperrors
2341
2342_noheaders = None
2343def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002344 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002345 global _noheaders
2346 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002347 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002348 return _noheaders
2349
2350
2351# Utility classes
2352
2353class ftpwrapper:
2354 """Class used by open_ftp() for cache of open FTP connections."""
2355
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002356 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2357 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002358 self.user = user
2359 self.passwd = passwd
2360 self.host = host
2361 self.port = port
2362 self.dirs = dirs
2363 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002364 self.refcount = 0
2365 self.keepalive = persistent
Victor Stinnerab73e652015-04-07 12:49:27 +02002366 try:
2367 self.init()
2368 except:
2369 self.close()
2370 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002371
2372 def init(self):
2373 import ftplib
2374 self.busy = 0
2375 self.ftp = ftplib.FTP()
2376 self.ftp.connect(self.host, self.port, self.timeout)
2377 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002378 _target = '/'.join(self.dirs)
2379 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002380
2381 def retrfile(self, file, type):
2382 import ftplib
2383 self.endtransfer()
2384 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2385 else: cmd = 'TYPE ' + type; isdir = 0
2386 try:
2387 self.ftp.voidcmd(cmd)
2388 except ftplib.all_errors:
2389 self.init()
2390 self.ftp.voidcmd(cmd)
2391 conn = None
2392 if file and not isdir:
2393 # Try to retrieve as a file
2394 try:
2395 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002396 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002397 except ftplib.error_perm as reason:
2398 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002399 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002400 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002401 if not conn:
2402 # Set transfer mode to ASCII!
2403 self.ftp.voidcmd('TYPE A')
2404 # Try a directory listing. Verify that directory exists.
2405 if file:
2406 pwd = self.ftp.pwd()
2407 try:
2408 try:
2409 self.ftp.cwd(file)
2410 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002411 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002412 finally:
2413 self.ftp.cwd(pwd)
2414 cmd = 'LIST ' + file
2415 else:
2416 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002417 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002418 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002419
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002420 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2421 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002422 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002423 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002424 return (ftpobj, retrlen)
2425
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002426 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002427 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002428
2429 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002430 self.keepalive = False
2431 if self.refcount <= 0:
2432 self.real_close()
2433
2434 def file_close(self):
2435 self.endtransfer()
2436 self.refcount -= 1
2437 if self.refcount <= 0 and not self.keepalive:
2438 self.real_close()
2439
2440 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002441 self.endtransfer()
2442 try:
2443 self.ftp.close()
2444 except ftperrors():
2445 pass
2446
2447# Proxy handling
2448def getproxies_environment():
2449 """Return a dictionary of scheme -> proxy server URL mappings.
2450
2451 Scan the environment for variables named <scheme>_proxy;
2452 this seems to be the standard convention. If you need a
2453 different way, you can pass a proxies dictionary to the
2454 [Fancy]URLopener constructor.
2455
2456 """
2457 proxies = {}
2458 for name, value in os.environ.items():
2459 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002460 if value and name[-6:] == '_proxy':
2461 proxies[name[:-6]] = value
2462 return proxies
2463
2464def proxy_bypass_environment(host):
2465 """Test if proxies should not be used for a particular host.
2466
2467 Checks the environment for a variable named no_proxy, which should
2468 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2469 """
2470 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2471 # '*' is special case for always bypass
2472 if no_proxy == '*':
2473 return 1
2474 # strip port off host
Georg Brandl13e89462008-07-01 19:56:00 +00002475 hostonly, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002476 # check if the host ends with any of the DNS suffixes
Senthil Kumaran89976f12011-08-06 12:27:40 +08002477 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2478 for name in no_proxy_list:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002479 if name and (hostonly.endswith(name) or host.endswith(name)):
2480 return 1
2481 # otherwise, don't bypass
2482 return 0
2483
2484
Ronald Oussorene72e1612011-03-14 18:15:25 -04002485# This code tests an OSX specific data structure but is testable on all
2486# platforms
2487def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2488 """
2489 Return True iff this host shouldn't be accessed using a proxy
2490
2491 This function uses the MacOSX framework SystemConfiguration
2492 to fetch the proxy information.
2493
2494 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2495 { 'exclude_simple': bool,
2496 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2497 }
2498 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002499 from fnmatch import fnmatch
2500
2501 hostonly, port = splitport(host)
2502
2503 def ip2num(ipAddr):
2504 parts = ipAddr.split('.')
2505 parts = list(map(int, parts))
2506 if len(parts) != 4:
2507 parts = (parts + [0, 0, 0, 0])[:4]
2508 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2509
2510 # Check for simple host names:
2511 if '.' not in host:
2512 if proxy_settings['exclude_simple']:
2513 return True
2514
2515 hostIP = None
2516
2517 for value in proxy_settings.get('exceptions', ()):
2518 # Items in the list are strings like these: *.local, 169.254/16
2519 if not value: continue
2520
2521 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2522 if m is not None:
2523 if hostIP is None:
2524 try:
2525 hostIP = socket.gethostbyname(hostonly)
2526 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002527 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002528 continue
2529
2530 base = ip2num(m.group(1))
2531 mask = m.group(2)
2532 if mask is None:
2533 mask = 8 * (m.group(1).count('.') + 1)
2534 else:
2535 mask = int(mask[1:])
2536 mask = 32 - mask
2537
2538 if (hostIP >> mask) == (base >> mask):
2539 return True
2540
2541 elif fnmatch(host, value):
2542 return True
2543
2544 return False
2545
2546
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002547if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002548 from _scproxy import _get_proxy_settings, _get_proxies
2549
2550 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002551 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002552 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002553
2554 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002555 """Return a dictionary of scheme -> proxy server URL mappings.
2556
Ronald Oussoren84151202010-04-18 20:46:11 +00002557 This function uses the MacOSX framework SystemConfiguration
2558 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002559 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002560 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002561
Ronald Oussoren84151202010-04-18 20:46:11 +00002562
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002563
2564 def proxy_bypass(host):
2565 if getproxies_environment():
2566 return proxy_bypass_environment(host)
2567 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002568 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002569
2570 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002571 return getproxies_environment() or getproxies_macosx_sysconf()
2572
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002573
2574elif os.name == 'nt':
2575 def getproxies_registry():
2576 """Return a dictionary of scheme -> proxy server URL mappings.
2577
2578 Win32 uses the registry to store proxies.
2579
2580 """
2581 proxies = {}
2582 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002583 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002584 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002585 # Std module, so should be around - but you never know!
2586 return proxies
2587 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002588 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002589 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002590 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002591 'ProxyEnable')[0]
2592 if proxyEnable:
2593 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002594 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002595 'ProxyServer')[0])
2596 if '=' in proxyServer:
2597 # Per-protocol settings
2598 for p in proxyServer.split(';'):
2599 protocol, address = p.split('=', 1)
2600 # See if address has a type:// prefix
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002601 if not re.match('^([^/:]+)://', address):
2602 address = '%s://%s' % (protocol, address)
2603 proxies[protocol] = address
2604 else:
2605 # Use one setting for all protocols
2606 if proxyServer[:5] == 'http:':
2607 proxies['http'] = proxyServer
2608 else:
2609 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002610 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002611 proxies['ftp'] = 'ftp://%s' % proxyServer
2612 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002613 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002614 # Either registry key not found etc, or the value in an
2615 # unexpected format.
2616 # proxies already set up to be empty so nothing to do
2617 pass
2618 return proxies
2619
2620 def getproxies():
2621 """Return a dictionary of scheme -> proxy server URL mappings.
2622
2623 Returns settings gathered from the environment, if specified,
2624 or the registry.
2625
2626 """
2627 return getproxies_environment() or getproxies_registry()
2628
2629 def proxy_bypass_registry(host):
2630 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002631 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002632 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002633 # Std modules, so should be around - but you never know!
2634 return 0
2635 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002636 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002637 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002638 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002639 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002640 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002641 'ProxyOverride')[0])
2642 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002643 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002644 return 0
2645 if not proxyEnable or not proxyOverride:
2646 return 0
2647 # try to make a host list from name and IP address.
Georg Brandl13e89462008-07-01 19:56:00 +00002648 rawHost, port = splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002649 host = [rawHost]
2650 try:
2651 addr = socket.gethostbyname(rawHost)
2652 if addr != rawHost:
2653 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002654 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002655 pass
2656 try:
2657 fqdn = socket.getfqdn(rawHost)
2658 if fqdn != rawHost:
2659 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002660 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002661 pass
2662 # make a check value list from the registry entry: replace the
2663 # '<local>' string by the localhost entry and the corresponding
2664 # canonical entry.
2665 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002666 # now check if we match one of the registry values.
2667 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002668 if test == '<local>':
2669 if '.' not in rawHost:
2670 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002671 test = test.replace(".", r"\.") # mask dots
2672 test = test.replace("*", r".*") # change glob sequence
2673 test = test.replace("?", r".") # change glob char
2674 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002675 if re.match(test, val, re.I):
2676 return 1
2677 return 0
2678
2679 def proxy_bypass(host):
2680 """Return a dictionary of scheme -> proxy server URL mappings.
2681
2682 Returns settings gathered from the environment, if specified,
2683 or the registry.
2684
2685 """
2686 if getproxies_environment():
2687 return proxy_bypass_environment(host)
2688 else:
2689 return proxy_bypass_registry(host)
2690
2691else:
2692 # By default use environment variables
2693 getproxies = getproxies_environment
2694 proxy_bypass = proxy_bypass_environment