blob: a6d350a97a45299c3d4f563c019df490538d98d0 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
Martin Pantere6f06092016-05-16 01:14:20 +000094import string
Jeremy Hylton1afc1692008-06-18 20:49:58 +000095import sys
96import time
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
Rémi Lapeyre674ee122019-05-27 15:43:45 +0200104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
Cheryl Sabella0250de42018-04-25 16:51:54 -0700105 _splittype, _splithost, _splitport, _splituser, _splitpasswd,
106 _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -0400113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
R David Murray4c7f9952015-04-16 16:36:18 -0400123 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800127 'UnknownHandler', 'HTTPErrorProcessor',
128 # Functions
129 'urlopen', 'install_opener', 'build_opener',
130 'pathname2url', 'url2pathname', 'getproxies',
131 # Legacy interface
132 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133]
134
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135# used in User-Agent header sent
Serhiy Storchaka885bdc42016-02-11 13:10:36 +0200136__version__ = '%d.%d' % sys.version_info[:2]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000137
138_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000139def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800140 *, cafile=None, capath=None, cadefault=False, context=None):
Raymond Hettinger507343a2015-08-18 00:35:52 -0700141 '''Open the URL url, which can be either a string or a Request object.
142
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000143 *data* must be an object specifying additional data to be sent to
144 the server, or None if no such data is needed. See Request for
145 details.
Raymond Hettinger507343a2015-08-18 00:35:52 -0700146
147 urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148 header in its HTTP requests.
149
150 The optional *timeout* parameter specifies a timeout in seconds for
151 blocking operations like the connection attempt (if not specified, the
152 global default timeout setting will be used). This only works for HTTP,
153 HTTPS and FTP connections.
154
155 If *context* is specified, it must be a ssl.SSLContext instance describing
156 the various SSL options. See HTTPSConnection for more details.
157
158 The optional *cafile* and *capath* parameters specify a set of trusted CA
159 certificates for HTTPS requests. cafile should point to a single file
160 containing a bundle of CA certificates, whereas capath should point to a
161 directory of hashed certificate files. More information can be found in
162 ssl.SSLContext.load_verify_locations().
163
164 The *cadefault* parameter is ignored.
165
Raymond Hettinger507343a2015-08-18 00:35:52 -0700166
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700167 This function always returns an object which can work as a
168 context manager and has the properties url, headers, and status.
169 See urllib.response.addinfourl for more detail on these properties.
Raymond Hettinger507343a2015-08-18 00:35:52 -0700170
Martin Panter29f256902016-06-04 05:06:34 +0000171 For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
172 object slightly modified. In addition to the three new methods above, the
173 msg attribute contains the same information as the reason attribute ---
174 the reason phrase returned by the server --- instead of the response
175 headers as it is specified in the documentation for HTTPResponse.
R David Murrayd2367c62016-06-03 20:16:06 -0400176
Martin Panter29f256902016-06-04 05:06:34 +0000177 For FTP, file, and data URLs and requests explicitly handled by legacy
178 URLopener and FancyURLopener classes, this function returns a
179 urllib.response.addinfourl object.
180
181 Note that None may be returned if no handler handles the request (though
Raymond Hettinger507343a2015-08-18 00:35:52 -0700182 the default installed global OpenerDirector uses UnknownHandler to ensure
183 this never happens).
184
185 In addition, if proxy settings are detected (for example, when a *_proxy
186 environment variable like http_proxy is set), ProxyHandler is default
187 installed and makes sure the requests are handled through the proxy.
188
189 '''
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000190 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200191 if cafile or capath or cadefault:
Christian Heimesd0486372016-09-10 23:23:33 +0200192 import warnings
Boštjan Mejak15869582018-11-25 19:32:50 +0100193 warnings.warn("cafile, capath and cadefault are deprecated, use a "
Christian Heimesd0486372016-09-10 23:23:33 +0200194 "custom context instead.", DeprecationWarning, 2)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800195 if context is not None:
196 raise ValueError(
197 "You can't pass both context and any of cafile, capath, and "
198 "cadefault"
199 )
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000200 if not _have_ssl:
201 raise ValueError('SSL support not available')
Benjamin Petersonb6666972014-12-07 13:46:02 -0500202 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
Christian Heimes67986f92013-11-23 22:43:47 +0100203 cafile=cafile,
204 capath=capath)
Benjamin Petersonb6666972014-12-07 13:46:02 -0500205 https_handler = HTTPSHandler(context=context)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000206 opener = build_opener(https_handler)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800207 elif context:
208 https_handler = HTTPSHandler(context=context)
209 opener = build_opener(https_handler)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000210 elif _opener is None:
211 _opener = opener = build_opener()
212 else:
213 opener = _opener
214 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000215
216def install_opener(opener):
217 global _opener
218 _opener = opener
219
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700220_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000221def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700222 """
223 Retrieve a URL into a temporary location on disk.
224
225 Requires a URL argument. If a filename is passed, it is used as
226 the temporary file location. The reporthook argument should be
227 a callable that accepts a block number, a read size, and the
228 total file size of the URL target. The data argument should be
229 valid URL encoded data.
230
231 If a filename is passed and the URL points to a local resource,
232 the result is a copy from local file to new file.
233
234 Returns a tuple containing the path to the newly created
235 data file as well as the resulting HTTPMessage object.
236 """
Cheryl Sabella0250de42018-04-25 16:51:54 -0700237 url_type, path = _splittype(url)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700238
239 with contextlib.closing(urlopen(url, data)) as fp:
240 headers = fp.info()
241
242 # Just return the local path and the "headers" for file://
243 # URLs. No sense in performing a copy unless requested.
244 if url_type == "file" and not filename:
245 return os.path.normpath(path), headers
246
247 # Handle temporary file setup.
248 if filename:
249 tfp = open(filename, 'wb')
250 else:
251 tfp = tempfile.NamedTemporaryFile(delete=False)
252 filename = tfp.name
253 _url_tempfiles.append(filename)
254
255 with tfp:
256 result = filename, headers
257 bs = 1024*8
258 size = -1
259 read = 0
260 blocknum = 0
261 if "content-length" in headers:
262 size = int(headers["Content-Length"])
263
264 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800265 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700266
267 while True:
268 block = fp.read(bs)
269 if not block:
270 break
271 read += len(block)
272 tfp.write(block)
273 blocknum += 1
274 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800275 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700276
277 if size >= 0 and read < size:
278 raise ContentTooShortError(
279 "retrieval incomplete: got only %i out of %i bytes"
280 % (read, size), result)
281
282 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000283
284def urlcleanup():
Robert Collins2fee5c92015-08-04 12:52:06 +1200285 """Clean up temporary files from urlretrieve calls."""
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700286 for temp_file in _url_tempfiles:
287 try:
288 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200289 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700290 pass
291
292 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000293 global _opener
294 if _opener:
295 _opener = None
296
297# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000298_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000299def request_host(request):
300 """Return request-host, as defined by RFC 2965.
301
302 Variation from RFC: returned value is lowercased, for convenient
303 comparison.
304
305 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000306 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000307 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000308 if host == "":
309 host = request.get_header("Host", "")
310
311 # remove port, if present
312 host = _cut_port_re.sub("", host, 1)
313 return host.lower()
314
315class Request:
316
317 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800318 origin_req_host=None, unverifiable=False,
319 method=None):
Senthil Kumaran52380922013-04-25 05:45:48 -0700320 self.full_url = url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000321 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200322 self.unredirected_hdrs = {}
323 self._data = None
324 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000325 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000326 for key, value in headers.items():
327 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000328 if origin_req_host is None:
329 origin_req_host = request_host(self)
330 self.origin_req_host = origin_req_host
331 self.unverifiable = unverifiable
Jason R. Coombs7dc4f4b2013-09-08 12:47:07 -0400332 if method:
333 self.method = method
Senthil Kumaran52380922013-04-25 05:45:48 -0700334
335 @property
336 def full_url(self):
Senthil Kumaran83070752013-05-24 09:14:12 -0700337 if self.fragment:
338 return '{}#{}'.format(self._full_url, self.fragment)
Senthil Kumaran52380922013-04-25 05:45:48 -0700339 return self._full_url
340
341 @full_url.setter
342 def full_url(self, url):
343 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Rémi Lapeyre674ee122019-05-27 15:43:45 +0200344 self._full_url = unwrap(url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700345 self._full_url, self.fragment = _splittag(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000346 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000347
Senthil Kumaran52380922013-04-25 05:45:48 -0700348 @full_url.deleter
349 def full_url(self):
350 self._full_url = None
351 self.fragment = None
352 self.selector = ''
353
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200354 @property
355 def data(self):
356 return self._data
357
358 @data.setter
359 def data(self, data):
360 if data != self._data:
361 self._data = data
362 # issue 16464
363 # if we change data we need to remove content-length header
364 # (cause it's most probably calculated for previous value)
365 if self.has_header("Content-length"):
366 self.remove_header("Content-length")
367
368 @data.deleter
369 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400370 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200371
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000372 def _parse(self):
Cheryl Sabella0250de42018-04-25 16:51:54 -0700373 self.type, rest = _splittype(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000374 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400375 raise ValueError("unknown url type: %r" % self.full_url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700376 self.host, self.selector = _splithost(rest)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000377 if self.host:
378 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000379
380 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800381 """Return a string indicating the HTTP request method."""
Jason R. Coombsaae6a1d2013-09-08 12:54:33 -0400382 default_method = "POST" if self.data is not None else "GET"
383 return getattr(self, 'method', default_method)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000384
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000385 def get_full_url(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700386 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000387
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000388 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000389 if self.type == 'https' and not self._tunnel_host:
390 self._tunnel_host = self.host
391 else:
392 self.type= type
393 self.selector = self.full_url
394 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000395
396 def has_proxy(self):
397 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398
399 def add_header(self, key, val):
400 # useful for something like authentication
401 self.headers[key.capitalize()] = val
402
403 def add_unredirected_header(self, key, val):
404 # will not be added to a redirected request
405 self.unredirected_hdrs[key.capitalize()] = val
406
407 def has_header(self, header_name):
408 return (header_name in self.headers or
409 header_name in self.unredirected_hdrs)
410
411 def get_header(self, header_name, default=None):
412 return self.headers.get(
413 header_name,
414 self.unredirected_hdrs.get(header_name, default))
415
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200416 def remove_header(self, header_name):
417 self.headers.pop(header_name, None)
418 self.unredirected_hdrs.pop(header_name, None)
419
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000420 def header_items(self):
Serhiy Storchakada084702019-03-27 08:02:28 +0200421 hdrs = {**self.unredirected_hdrs, **self.headers}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000422 return list(hdrs.items())
423
424class OpenerDirector:
425 def __init__(self):
426 client_version = "Python-urllib/%s" % __version__
427 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000428 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000429 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000430 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000431 self.handle_open = {}
432 self.handle_error = {}
433 self.process_response = {}
434 self.process_request = {}
435
436 def add_handler(self, handler):
437 if not hasattr(handler, "add_parent"):
438 raise TypeError("expected BaseHandler instance, got %r" %
439 type(handler))
440
441 added = False
442 for meth in dir(handler):
443 if meth in ["redirect_request", "do_open", "proxy_open"]:
444 # oops, coincidental match
445 continue
446
447 i = meth.find("_")
448 protocol = meth[:i]
449 condition = meth[i+1:]
450
451 if condition.startswith("error"):
452 j = condition.find("_") + i + 1
453 kind = meth[j+1:]
454 try:
455 kind = int(kind)
456 except ValueError:
457 pass
458 lookup = self.handle_error.get(protocol, {})
459 self.handle_error[protocol] = lookup
460 elif condition == "open":
461 kind = protocol
462 lookup = self.handle_open
463 elif condition == "response":
464 kind = protocol
465 lookup = self.process_response
466 elif condition == "request":
467 kind = protocol
468 lookup = self.process_request
469 else:
470 continue
471
472 handlers = lookup.setdefault(kind, [])
473 if handlers:
474 bisect.insort(handlers, handler)
475 else:
476 handlers.append(handler)
477 added = True
478
479 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000480 bisect.insort(self.handlers, handler)
481 handler.add_parent(self)
482
483 def close(self):
484 # Only exists for backwards compatibility.
485 pass
486
487 def _call_chain(self, chain, kind, meth_name, *args):
488 # Handlers raise an exception if no one else should try to handle
489 # the request, or return None if they can't but another handler
490 # could. Otherwise, they return the response.
491 handlers = chain.get(kind, ())
492 for handler in handlers:
493 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000494 result = func(*args)
495 if result is not None:
496 return result
497
498 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
499 # accept a URL or a Request object
500 if isinstance(fullurl, str):
501 req = Request(fullurl, data)
502 else:
503 req = fullurl
504 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000505 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000506
507 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000508 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000509
510 # pre-process request
511 meth_name = protocol+"_request"
512 for processor in self.process_request.get(protocol, []):
513 meth = getattr(processor, meth_name)
514 req = meth(req)
515
Steve Dowerb82e17e2019-05-23 08:45:22 -0700516 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000517 response = self._open(req, data)
518
519 # post-process response
520 meth_name = protocol+"_response"
521 for processor in self.process_response.get(protocol, []):
522 meth = getattr(processor, meth_name)
523 response = meth(req, response)
524
525 return response
526
527 def _open(self, req, data=None):
528 result = self._call_chain(self.handle_open, 'default',
529 'default_open', req)
530 if result:
531 return result
532
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000533 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000534 result = self._call_chain(self.handle_open, protocol, protocol +
535 '_open', req)
536 if result:
537 return result
538
539 return self._call_chain(self.handle_open, 'unknown',
540 'unknown_open', req)
541
542 def error(self, proto, *args):
543 if proto in ('http', 'https'):
544 # XXX http[s] protocols are special-cased
545 dict = self.handle_error['http'] # https is not different than http
546 proto = args[2] # YUCK!
547 meth_name = 'http_error_%s' % proto
548 http_err = 1
549 orig_args = args
550 else:
551 dict = self.handle_error
552 meth_name = proto + '_error'
553 http_err = 0
554 args = (dict, proto, meth_name) + args
555 result = self._call_chain(*args)
556 if result:
557 return result
558
559 if http_err:
560 args = (dict, 'default', 'http_error_default') + orig_args
561 return self._call_chain(*args)
562
563# XXX probably also want an abstract factory that knows when it makes
564# sense to skip a superclass in favor of a subclass and when it might
565# make sense to include both
566
567def build_opener(*handlers):
568 """Create an opener object from a list of handlers.
569
570 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000571 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000572
573 If any of the handlers passed as arguments are subclasses of the
574 default handlers, the default handlers will not be used.
575 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000576 opener = OpenerDirector()
577 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
578 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100579 FTPHandler, FileHandler, HTTPErrorProcessor,
580 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000581 if hasattr(http.client, "HTTPSConnection"):
582 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000583 skip = set()
584 for klass in default_classes:
585 for check in handlers:
Benjamin Peterson78c85382014-04-01 16:27:30 -0400586 if isinstance(check, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000587 if issubclass(check, klass):
588 skip.add(klass)
589 elif isinstance(check, klass):
590 skip.add(klass)
591 for klass in skip:
592 default_classes.remove(klass)
593
594 for klass in default_classes:
595 opener.add_handler(klass())
596
597 for h in handlers:
Benjamin Peterson5dd3cae2014-04-01 14:20:56 -0400598 if isinstance(h, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000599 h = h()
600 opener.add_handler(h)
601 return opener
602
603class BaseHandler:
604 handler_order = 500
605
606 def add_parent(self, parent):
607 self.parent = parent
608
609 def close(self):
610 # Only exists for backwards compatibility
611 pass
612
613 def __lt__(self, other):
614 if not hasattr(other, "handler_order"):
615 # Try to preserve the old behavior of having custom classes
616 # inserted after default ones (works only for custom user
617 # classes which are not aware of handler_order).
618 return True
619 return self.handler_order < other.handler_order
620
621
622class HTTPErrorProcessor(BaseHandler):
623 """Process HTTP error responses."""
624 handler_order = 1000 # after all other processing
625
626 def http_response(self, request, response):
627 code, msg, hdrs = response.code, response.msg, response.info()
628
629 # According to RFC 2616, "2xx" code indicates that the client's
630 # request was successfully received, understood, and accepted.
631 if not (200 <= code < 300):
632 response = self.parent.error(
633 'http', request, response, code, msg, hdrs)
634
635 return response
636
637 https_response = http_response
638
639class HTTPDefaultErrorHandler(BaseHandler):
640 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000641 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000642
643class HTTPRedirectHandler(BaseHandler):
644 # maximum number of redirections to any single URL
645 # this is needed because of the state that cookies introduce
646 max_repeats = 4
647 # maximum total number of redirections (regardless of URL) before
648 # assuming we're in a loop
649 max_redirections = 10
650
651 def redirect_request(self, req, fp, code, msg, headers, newurl):
652 """Return a Request or None in response to a redirect.
653
654 This is called by the http_error_30x methods when a
655 redirection response is received. If a redirection should
656 take place, return a new Request to allow http_error_30x to
657 perform the redirect. Otherwise, raise HTTPError if no-one
658 else should try to handle this url. Return None if you can't
659 but another Handler might.
660 """
661 m = req.get_method()
662 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
663 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000664 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000665
666 # Strictly (according to RFC 2616), 301 or 302 in response to
667 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000668 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000669 # essentially all clients do redirect in this case, so we do
670 # the same.
Martin Pantere6f06092016-05-16 01:14:20 +0000671
672 # Be conciliant with URIs containing a space. This is mainly
673 # redundant with the more complete encoding done in http_error_302(),
674 # but it is kept for compatibility with other callers.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000675 newurl = newurl.replace(' ', '%20')
Martin Pantere6f06092016-05-16 01:14:20 +0000676
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000677 CONTENT_HEADERS = ("content-length", "content-type")
Jon Dufresne39726282017-05-18 07:35:54 -0700678 newheaders = {k: v for k, v in req.headers.items()
679 if k.lower() not in CONTENT_HEADERS}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000680 return Request(newurl,
681 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000682 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 unverifiable=True)
684
685 # Implementation note: To avoid the server sending us into an
686 # infinite loop, the request object needs to track what URLs we
687 # have already seen. Do this by adding a handler-specific
688 # attribute to the Request object.
689 def http_error_302(self, req, fp, code, msg, headers):
690 # Some servers (incorrectly) return multiple Location headers
691 # (so probably same goes for URI). Use first header.
692 if "location" in headers:
693 newurl = headers["location"]
694 elif "uri" in headers:
695 newurl = headers["uri"]
696 else:
697 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000698
699 # fix a possible malformed URL
700 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700701
702 # For security reasons we don't allow redirection to anything other
703 # than http, https or ftp.
704
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800705 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800706 raise HTTPError(
707 newurl, code,
708 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
709 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700710
Martin Panterce6e0682016-05-16 01:07:13 +0000711 if not urlparts.path and urlparts.netloc:
Facundo Batistaf24802c2008-08-17 03:36:03 +0000712 urlparts = list(urlparts)
713 urlparts[2] = "/"
714 newurl = urlunparse(urlparts)
715
Martin Pantere6f06092016-05-16 01:14:20 +0000716 # http.client.parse_headers() decodes as ISO-8859-1. Recover the
717 # original bytes and percent-encode non-ASCII bytes, and any special
718 # characters such as the space.
719 newurl = quote(
720 newurl, encoding="iso-8859-1", safe=string.punctuation)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000721 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000722
723 # XXX Probably want to forget about the state of the current
724 # request, although that might interact poorly with other
725 # handlers that also use handler-specific request attributes
726 new = self.redirect_request(req, fp, code, msg, headers, newurl)
727 if new is None:
728 return
729
730 # loop detection
731 # .redirect_dict has a key url if url was previously visited.
732 if hasattr(req, 'redirect_dict'):
733 visited = new.redirect_dict = req.redirect_dict
734 if (visited.get(newurl, 0) >= self.max_repeats or
735 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000736 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000737 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000738 else:
739 visited = new.redirect_dict = req.redirect_dict = {}
740 visited[newurl] = visited.get(newurl, 0) + 1
741
742 # Don't close the fp until we are sure that we won't use it
743 # with HTTPError.
744 fp.read()
745 fp.close()
746
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000747 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000748
749 http_error_301 = http_error_303 = http_error_307 = http_error_302
750
751 inf_msg = "The HTTP server returned a redirect error that would " \
752 "lead to an infinite loop.\n" \
753 "The last 30x error message was:\n"
754
755
756def _parse_proxy(proxy):
757 """Return (scheme, user, password, host/port) given a URL or an authority.
758
759 If a URL is supplied, it must have an authority (host:port) component.
760 According to RFC 3986, having an authority component means the URL must
Senthil Kumarand8e24f12014-04-14 16:32:20 -0400761 have two slashes after the scheme.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000762 """
Cheryl Sabella0250de42018-04-25 16:51:54 -0700763 scheme, r_scheme = _splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 if not r_scheme.startswith("/"):
765 # authority
766 scheme = None
767 authority = proxy
768 else:
769 # URL
770 if not r_scheme.startswith("//"):
771 raise ValueError("proxy URL with no authority: %r" % proxy)
772 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
773 # and 3.3.), path is empty or starts with '/'
774 end = r_scheme.find("/", 2)
775 if end == -1:
776 end = None
777 authority = r_scheme[2:end]
Cheryl Sabella0250de42018-04-25 16:51:54 -0700778 userinfo, hostport = _splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000779 if userinfo is not None:
Cheryl Sabella0250de42018-04-25 16:51:54 -0700780 user, password = _splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000781 else:
782 user = password = None
783 return scheme, user, password, hostport
784
785class ProxyHandler(BaseHandler):
786 # Proxies must be in front
787 handler_order = 100
788
789 def __init__(self, proxies=None):
790 if proxies is None:
791 proxies = getproxies()
792 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
793 self.proxies = proxies
794 for type, url in proxies.items():
Zackery Spytzb761e3a2019-09-13 08:07:07 -0600795 type = type.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000796 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200797 lambda r, proxy=url, type=type, meth=self.proxy_open:
798 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000799
800 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000801 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 proxy_type, user, password, hostport = _parse_proxy(proxy)
803 if proxy_type is None:
804 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000805
806 if req.host and proxy_bypass(req.host):
807 return None
808
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000809 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000810 user_pass = '%s:%s' % (unquote(user),
811 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 creds = base64.b64encode(user_pass.encode()).decode("ascii")
813 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000814 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000816 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000817 # let other handlers take care of it
818 return None
819 else:
820 # need to start over, because the other handlers don't
821 # grok the proxy's URL type
822 # e.g. if we have a constructor arg proxies like so:
823 # {'http': 'ftp://proxy.example.com'}, we may end up turning
824 # a request for http://acme.example.com/a into one for
825 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000826 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000827
828class HTTPPasswordMgr:
829
830 def __init__(self):
831 self.passwd = {}
832
833 def add_password(self, realm, uri, user, passwd):
834 # uri could be a single URI or a sequence
835 if isinstance(uri, str):
836 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800837 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000838 self.passwd[realm] = {}
839 for default_port in True, False:
840 reduced_uri = tuple(
Jon Dufresne39726282017-05-18 07:35:54 -0700841 self.reduce_uri(u, default_port) for u in uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000842 self.passwd[realm][reduced_uri] = (user, passwd)
843
844 def find_user_password(self, realm, authuri):
845 domains = self.passwd.get(realm, {})
846 for default_port in True, False:
847 reduced_authuri = self.reduce_uri(authuri, default_port)
848 for uris, authinfo in domains.items():
849 for uri in uris:
850 if self.is_suburi(uri, reduced_authuri):
851 return authinfo
852 return None, None
853
854 def reduce_uri(self, uri, default_port=True):
855 """Accept authority or URI and extract only the authority and path."""
856 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000857 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000858 if parts[1]:
859 # URI
860 scheme = parts[0]
861 authority = parts[1]
862 path = parts[2] or '/'
863 else:
864 # host or host:port
865 scheme = None
866 authority = uri
867 path = '/'
Cheryl Sabella0250de42018-04-25 16:51:54 -0700868 host, port = _splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000869 if default_port and port is None and scheme is not None:
870 dport = {"http": 80,
871 "https": 443,
872 }.get(scheme)
873 if dport is not None:
874 authority = "%s:%d" % (host, dport)
875 return authority, path
876
877 def is_suburi(self, base, test):
878 """Check if test is below base in a URI tree
879
880 Both args must be URIs in reduced form.
881 """
882 if base == test:
883 return True
884 if base[0] != test[0]:
885 return False
886 common = posixpath.commonprefix((base[1], test[1]))
887 if len(common) == len(base[1]):
888 return True
889 return False
890
891
892class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
893
894 def find_user_password(self, realm, authuri):
895 user, password = HTTPPasswordMgr.find_user_password(self, realm,
896 authuri)
897 if user is not None:
898 return user, password
899 return HTTPPasswordMgr.find_user_password(self, None, authuri)
900
901
R David Murray4c7f9952015-04-16 16:36:18 -0400902class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
903
904 def __init__(self, *args, **kwargs):
905 self.authenticated = {}
906 super().__init__(*args, **kwargs)
907
908 def add_password(self, realm, uri, user, passwd, is_authenticated=False):
909 self.update_authenticated(uri, is_authenticated)
910 # Add a default for prior auth requests
911 if realm is not None:
912 super().add_password(None, uri, user, passwd)
913 super().add_password(realm, uri, user, passwd)
914
915 def update_authenticated(self, uri, is_authenticated=False):
916 # uri could be a single URI or a sequence
917 if isinstance(uri, str):
918 uri = [uri]
919
920 for default_port in True, False:
921 for u in uri:
922 reduced_uri = self.reduce_uri(u, default_port)
923 self.authenticated[reduced_uri] = is_authenticated
924
925 def is_authenticated(self, authuri):
926 for default_port in True, False:
927 reduced_authuri = self.reduce_uri(authuri, default_port)
928 for uri in self.authenticated:
929 if self.is_suburi(uri, reduced_authuri):
930 return self.authenticated[uri]
931
932
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933class AbstractBasicAuthHandler:
934
935 # XXX this allows for multiple auth-schemes, but will stupidly pick
936 # the last one with a realm specified.
937
938 # allow for double- and single-quoted realm values
939 # (single quotes are a violation of the RFC, but appear in the wild)
940 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +0800941 'realm=(["\']?)([^"\']*)\\2', re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000942
943 # XXX could pre-emptively send auth info already accepted (RFC 2617,
944 # end of section 2, and section 1.2 immediately after "credentials"
945 # production).
946
947 def __init__(self, password_mgr=None):
948 if password_mgr is None:
949 password_mgr = HTTPPasswordMgr()
950 self.passwd = password_mgr
951 self.add_password = self.passwd.add_password
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000952
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000953 def http_error_auth_reqed(self, authreq, host, req, headers):
954 # host may be an authority (without userinfo) or a URL with an
955 # authority
956 # XXX could be multiple headers
957 authreq = headers.get(authreq, None)
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000958
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000959 if authreq:
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800960 scheme = authreq.split()[0]
Senthil Kumaran1a129c82011-10-20 02:50:13 +0800961 if scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800962 raise ValueError("AbstractBasicAuthHandler does not"
963 " support the following scheme: '%s'" %
964 scheme)
965 else:
966 mo = AbstractBasicAuthHandler.rx.search(authreq)
967 if mo:
968 scheme, quote, realm = mo.groups()
Senthil Kumaran92a5bf02012-05-16 00:03:29 +0800969 if quote not in ['"',"'"]:
970 warnings.warn("Basic Auth Realm was unquoted",
971 UserWarning, 2)
Senthil Kumaran4de00a22011-05-11 21:17:57 +0800972 if scheme.lower() == 'basic':
Senthil Kumaran78373762014-08-20 07:53:58 +0530973 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000974
975 def retry_http_basic_auth(self, host, req, realm):
976 user, pw = self.passwd.find_user_password(realm, host)
977 if pw is not None:
978 raw = "%s:%s" % (user, pw)
979 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
Senthil Kumaran78373762014-08-20 07:53:58 +0530980 if req.get_header(self.auth_header, None) == auth:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +0000982 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000983 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000984 else:
985 return None
986
R David Murray4c7f9952015-04-16 16:36:18 -0400987 def http_request(self, req):
988 if (not hasattr(self.passwd, 'is_authenticated') or
989 not self.passwd.is_authenticated(req.full_url)):
990 return req
991
992 if not req.has_header('Authorization'):
993 user, passwd = self.passwd.find_user_password(None, req.full_url)
994 credentials = '{0}:{1}'.format(user, passwd).encode()
995 auth_str = base64.standard_b64encode(credentials).decode()
996 req.add_unredirected_header('Authorization',
997 'Basic {}'.format(auth_str.strip()))
998 return req
999
1000 def http_response(self, req, response):
1001 if hasattr(self.passwd, 'is_authenticated'):
1002 if 200 <= response.code < 300:
1003 self.passwd.update_authenticated(req.full_url, True)
1004 else:
1005 self.passwd.update_authenticated(req.full_url, False)
1006 return response
1007
1008 https_request = http_request
1009 https_response = http_response
1010
1011
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001012
1013class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1014
1015 auth_header = 'Authorization'
1016
1017 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001018 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001019 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001021 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001022
1023
1024class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1025
1026 auth_header = 'Proxy-authorization'
1027
1028 def http_error_407(self, req, fp, code, msg, headers):
1029 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +00001030 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001031 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1032 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001033 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001034 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001035 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001036 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001037
1038
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001039# Return n random bytes.
1040_randombytes = os.urandom
1041
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001042
1043class AbstractDigestAuthHandler:
1044 # Digest authentication is specified in RFC 2617.
1045
1046 # XXX The client does not inspect the Authentication-Info header
1047 # in a successful response.
1048
1049 # XXX It should be possible to test this implementation against
1050 # a mock server that just generates a static set of challenges.
1051
1052 # XXX qop="auth-int" supports is shaky
1053
1054 def __init__(self, passwd=None):
1055 if passwd is None:
1056 passwd = HTTPPasswordMgr()
1057 self.passwd = passwd
1058 self.add_password = self.passwd.add_password
1059 self.retried = 0
1060 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001061 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001062
1063 def reset_retry_count(self):
1064 self.retried = 0
1065
1066 def http_error_auth_reqed(self, auth_header, host, req, headers):
1067 authreq = headers.get(auth_header, None)
1068 if self.retried > 5:
1069 # Don't fail endlessly - if we failed once, we'll probably
1070 # fail a second time. Hm. Unless the Password Manager is
1071 # prompting for the information. Crap. This isn't great
1072 # but it's better than the current 'repeat until recursion
1073 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001074 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001075 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001076 else:
1077 self.retried += 1
1078 if authreq:
1079 scheme = authreq.split()[0]
1080 if scheme.lower() == 'digest':
1081 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001082 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001083 raise ValueError("AbstractDigestAuthHandler does not support"
1084 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001085
1086 def retry_http_digest_auth(self, req, auth):
1087 token, challenge = auth.split(' ', 1)
1088 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1089 auth = self.get_authorization(req, chal)
1090 if auth:
1091 auth_val = 'Digest %s' % auth
1092 if req.headers.get(self.auth_header, None) == auth_val:
1093 return None
1094 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001095 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 return resp
1097
1098 def get_cnonce(self, nonce):
1099 # The cnonce-value is an opaque
1100 # quoted string value provided by the client and used by both client
1101 # and server to avoid chosen plaintext attacks, to provide mutual
1102 # authentication, and to provide some message integrity protection.
1103 # This isn't a fabulous effort, but it's probably Good Enough.
1104 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001105 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001106 dig = hashlib.sha1(b).hexdigest()
1107 return dig[:16]
1108
1109 def get_authorization(self, req, chal):
1110 try:
1111 realm = chal['realm']
1112 nonce = chal['nonce']
1113 qop = chal.get('qop')
1114 algorithm = chal.get('algorithm', 'MD5')
1115 # mod_digest doesn't send an opaque, even though it isn't
1116 # supposed to be optional
1117 opaque = chal.get('opaque', None)
1118 except KeyError:
1119 return None
1120
1121 H, KD = self.get_algorithm_impls(algorithm)
1122 if H is None:
1123 return None
1124
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001125 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001126 if user is None:
1127 return None
1128
1129 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001130 if req.data is not None:
1131 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001132 else:
1133 entdig = None
1134
1135 A1 = "%s:%s:%s" % (user, realm, pw)
1136 A2 = "%s:%s" % (req.get_method(),
1137 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001138 req.selector)
PypeBros14a89c42019-11-23 00:19:08 +01001139 # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1140 # or `auth-int` to the response back. we use `auth` to send the response back.
1141 if 'auth' in qop.split(','):
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001142 if nonce == self.last_nonce:
1143 self.nonce_count += 1
1144 else:
1145 self.nonce_count = 1
1146 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001147 ncvalue = '%08x' % self.nonce_count
1148 cnonce = self.get_cnonce(nonce)
PypeBros14a89c42019-11-23 00:19:08 +01001149 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001150 respdig = KD(H(A1), noncebit)
1151 elif qop is None:
1152 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1153 else:
1154 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001155 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001156
1157 # XXX should the partial digests be encoded too?
1158
1159 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001160 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001161 respdig)
1162 if opaque:
1163 base += ', opaque="%s"' % opaque
1164 if entdig:
1165 base += ', digest="%s"' % entdig
1166 base += ', algorithm="%s"' % algorithm
1167 if qop:
1168 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1169 return base
1170
1171 def get_algorithm_impls(self, algorithm):
1172 # lambdas assume digest modules are imported at the top level
1173 if algorithm == 'MD5':
1174 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1175 elif algorithm == 'SHA':
1176 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1177 # XXX MD5-sess
Berker Peksage88dd1c2016-03-06 16:16:40 +02001178 else:
1179 raise ValueError("Unsupported digest authentication "
1180 "algorithm %r" % algorithm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 KD = lambda s, d: H("%s:%s" % (s, d))
1182 return H, KD
1183
1184 def get_entity_digest(self, data, chal):
1185 # XXX not implemented yet
1186 return None
1187
1188
1189class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1190 """An authentication protocol defined by RFC 2069
1191
1192 Digest authentication improves on basic authentication because it
1193 does not transmit passwords in the clear.
1194 """
1195
1196 auth_header = 'Authorization'
1197 handler_order = 490 # before Basic auth
1198
1199 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001200 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001201 retry = self.http_error_auth_reqed('www-authenticate',
1202 host, req, headers)
1203 self.reset_retry_count()
1204 return retry
1205
1206
1207class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1208
1209 auth_header = 'Proxy-Authorization'
1210 handler_order = 490 # before Basic auth
1211
1212 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001213 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001214 retry = self.http_error_auth_reqed('proxy-authenticate',
1215 host, req, headers)
1216 self.reset_retry_count()
1217 return retry
1218
1219class AbstractHTTPHandler(BaseHandler):
1220
1221 def __init__(self, debuglevel=0):
1222 self._debuglevel = debuglevel
1223
1224 def set_http_debuglevel(self, level):
1225 self._debuglevel = level
1226
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001227 def _get_content_length(self, request):
1228 return http.client.HTTPConnection._get_content_length(
1229 request.data,
1230 request.get_method())
1231
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001232 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001233 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001234 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001235 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001236
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001237 if request.data is not None: # POST
1238 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001239 if isinstance(data, str):
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001240 msg = "POST data should be bytes, an iterable of bytes, " \
1241 "or a file object. It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001242 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001243 if not request.has_header('Content-type'):
1244 request.add_unredirected_header(
1245 'Content-type',
1246 'application/x-www-form-urlencoded')
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001247 if (not request.has_header('Content-length')
1248 and not request.has_header('Transfer-encoding')):
1249 content_length = self._get_content_length(request)
1250 if content_length is not None:
1251 request.add_unredirected_header(
1252 'Content-length', str(content_length))
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001253 else:
1254 request.add_unredirected_header(
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001255 'Transfer-encoding', 'chunked')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001256
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001257 sel_host = host
1258 if request.has_proxy():
Cheryl Sabella0250de42018-04-25 16:51:54 -07001259 scheme, sel = _splittype(request.selector)
1260 sel_host, sel_path = _splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001261 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001262 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001263 for name, value in self.parent.addheaders:
1264 name = name.capitalize()
1265 if not request.has_header(name):
1266 request.add_unredirected_header(name, value)
1267
1268 return request
1269
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001270 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001271 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001272
1273 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001274 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001275 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001276 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001277 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001278
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001279 # will parse host:port
1280 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran9642eed2016-05-13 01:32:42 -07001281 h.set_debuglevel(self._debuglevel)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001282
1283 headers = dict(req.unredirected_hdrs)
Serhiy Storchaka3f2e6f12018-02-26 16:50:11 +02001284 headers.update({k: v for k, v in req.headers.items()
1285 if k not in headers})
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001286
1287 # TODO(jhylton): Should this be redesigned to handle
1288 # persistent connections?
1289
1290 # We want to make an HTTP/1.1 request, but the addinfourl
1291 # class isn't prepared to deal with a persistent connection.
1292 # It will try to read all remaining data from the socket,
1293 # which will block while the server waits for the next request.
1294 # So make sure the connection gets closed after the (only)
1295 # request.
1296 headers["Connection"] = "close"
Jon Dufresne39726282017-05-18 07:35:54 -07001297 headers = {name.title(): val for name, val in headers.items()}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001298
1299 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001300 tunnel_headers = {}
1301 proxy_auth_hdr = "Proxy-Authorization"
1302 if proxy_auth_hdr in headers:
1303 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1304 # Proxy-Authorization should not be sent to origin
1305 # server.
1306 del headers[proxy_auth_hdr]
1307 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001308
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309 try:
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001310 try:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001311 h.request(req.get_method(), req.selector, req.data, headers,
1312 encode_chunked=req.has_header('Transfer-encoding'))
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001313 except OSError as err: # timeout error
1314 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001315 r = h.getresponse()
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001316 except:
1317 h.close()
1318 raise
1319
1320 # If the server does not send us a 'Connection: close' header,
1321 # HTTPConnection assumes the socket should be left open. Manually
1322 # mark the socket to be closed when this response object goes away.
1323 if h.sock:
1324 h.sock.close()
1325 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001326
Senthil Kumaran26430412011-04-13 07:01:19 +08001327 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001328 # This line replaces the .msg attribute of the HTTPResponse
1329 # with .headers, because urllib clients expect the response to
1330 # have the reason in .msg. It would be good to mark this
1331 # attribute is deprecated and get then to use info() or
1332 # .headers.
1333 r.msg = r.reason
1334 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001335
1336
1337class HTTPHandler(AbstractHTTPHandler):
1338
1339 def http_open(self, req):
1340 return self.do_open(http.client.HTTPConnection, req)
1341
1342 http_request = AbstractHTTPHandler.do_request_
1343
1344if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001345
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001346 class HTTPSHandler(AbstractHTTPHandler):
1347
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001348 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1349 AbstractHTTPHandler.__init__(self, debuglevel)
1350 self._context = context
1351 self._check_hostname = check_hostname
1352
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001353 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001354 return self.do_open(http.client.HTTPSConnection, req,
1355 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001356
1357 https_request = AbstractHTTPHandler.do_request_
1358
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001359 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001360
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001361class HTTPCookieProcessor(BaseHandler):
1362 def __init__(self, cookiejar=None):
1363 import http.cookiejar
1364 if cookiejar is None:
1365 cookiejar = http.cookiejar.CookieJar()
1366 self.cookiejar = cookiejar
1367
1368 def http_request(self, request):
1369 self.cookiejar.add_cookie_header(request)
1370 return request
1371
1372 def http_response(self, request, response):
1373 self.cookiejar.extract_cookies(response, request)
1374 return response
1375
1376 https_request = http_request
1377 https_response = http_response
1378
1379class UnknownHandler(BaseHandler):
1380 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001381 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001382 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001383
1384def parse_keqv_list(l):
1385 """Parse list of key=value strings where keys are not duplicated."""
1386 parsed = {}
1387 for elt in l:
1388 k, v = elt.split('=', 1)
1389 if v[0] == '"' and v[-1] == '"':
1390 v = v[1:-1]
1391 parsed[k] = v
1392 return parsed
1393
1394def parse_http_list(s):
1395 """Parse lists as described by RFC 2068 Section 2.
1396
1397 In particular, parse comma-separated lists where the elements of
1398 the list may include quoted-strings. A quoted-string could
1399 contain a comma. A non-quoted string could have quotes in the
1400 middle. Neither commas nor quotes count if they are escaped.
1401 Only double-quotes count, not single-quotes.
1402 """
1403 res = []
1404 part = ''
1405
1406 escape = quote = False
1407 for cur in s:
1408 if escape:
1409 part += cur
1410 escape = False
1411 continue
1412 if quote:
1413 if cur == '\\':
1414 escape = True
1415 continue
1416 elif cur == '"':
1417 quote = False
1418 part += cur
1419 continue
1420
1421 if cur == ',':
1422 res.append(part)
1423 part = ''
1424 continue
1425
1426 if cur == '"':
1427 quote = True
1428
1429 part += cur
1430
1431 # append last part
1432 if part:
1433 res.append(part)
1434
1435 return [part.strip() for part in res]
1436
1437class FileHandler(BaseHandler):
1438 # Use local file or FTP depending on form of URL
1439 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001440 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001441 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1442 req.host != 'localhost'):
Senthil Kumaranbc07ac52014-07-22 00:15:20 -07001443 if not req.host in self.get_names():
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001444 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001445 else:
1446 return self.open_local_file(req)
1447
1448 # names for the localhost
1449 names = None
1450 def get_names(self):
1451 if FileHandler.names is None:
1452 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001453 FileHandler.names = tuple(
1454 socket.gethostbyname_ex('localhost')[2] +
1455 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001456 except socket.gaierror:
1457 FileHandler.names = (socket.gethostbyname('localhost'),)
1458 return FileHandler.names
1459
1460 # not entirely sure what the rules are here
1461 def open_local_file(self, req):
1462 import email.utils
1463 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001464 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001465 filename = req.selector
1466 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001467 try:
1468 stats = os.stat(localfile)
1469 size = stats.st_size
1470 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001471 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001472 headers = email.message_from_string(
1473 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1474 (mtype or 'text/plain', size, modified))
1475 if host:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001476 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001477 if not host or \
1478 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001479 if host:
1480 origurl = 'file://' + host + filename
1481 else:
1482 origurl = 'file://' + filename
1483 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001484 except OSError as exp:
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001485 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001486 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001487
1488def _safe_gethostbyname(host):
1489 try:
1490 return socket.gethostbyname(host)
1491 except socket.gaierror:
1492 return None
1493
1494class FTPHandler(BaseHandler):
1495 def ftp_open(self, req):
1496 import ftplib
1497 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001498 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001499 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001500 raise URLError('ftp error: no host given')
Cheryl Sabella0250de42018-04-25 16:51:54 -07001501 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001502 if port is None:
1503 port = ftplib.FTP_PORT
1504 else:
1505 port = int(port)
1506
1507 # username/password handling
Cheryl Sabella0250de42018-04-25 16:51:54 -07001508 user, host = _splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001509 if user:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001510 user, passwd = _splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001511 else:
1512 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001513 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001514 user = user or ''
1515 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001516
1517 try:
1518 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001519 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001520 raise URLError(msg)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001521 path, attrs = _splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001522 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001523 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001524 dirs, file = dirs[:-1], dirs[-1]
1525 if dirs and not dirs[0]:
1526 dirs = dirs[1:]
1527 try:
1528 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1529 type = file and 'I' or 'D'
1530 for attr in attrs:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001531 attr, value = _splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001532 if attr.lower() == 'type' and \
1533 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1534 type = value.upper()
1535 fp, retrlen = fw.retrfile(file, type)
1536 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001537 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001538 if mtype:
1539 headers += "Content-type: %s\n" % mtype
1540 if retrlen is not None and retrlen >= 0:
1541 headers += "Content-length: %d\n" % retrlen
1542 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001543 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001544 except ftplib.all_errors as exp:
1545 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001546 raise exc.with_traceback(sys.exc_info()[2])
1547
1548 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001549 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1550 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001551
1552class CacheFTPHandler(FTPHandler):
1553 # XXX would be nice to have pluggable cache strategies
1554 # XXX this stuff is definitely not thread safe
1555 def __init__(self):
1556 self.cache = {}
1557 self.timeout = {}
1558 self.soonest = 0
1559 self.delay = 60
1560 self.max_conns = 16
1561
1562 def setTimeout(self, t):
1563 self.delay = t
1564
1565 def setMaxConns(self, m):
1566 self.max_conns = m
1567
1568 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1569 key = user, host, port, '/'.join(dirs), timeout
1570 if key in self.cache:
1571 self.timeout[key] = time.time() + self.delay
1572 else:
1573 self.cache[key] = ftpwrapper(user, passwd, host, port,
1574 dirs, timeout)
1575 self.timeout[key] = time.time() + self.delay
1576 self.check_cache()
1577 return self.cache[key]
1578
1579 def check_cache(self):
1580 # first check for old ones
1581 t = time.time()
1582 if self.soonest <= t:
1583 for k, v in list(self.timeout.items()):
1584 if v < t:
1585 self.cache[k].close()
1586 del self.cache[k]
1587 del self.timeout[k]
1588 self.soonest = min(list(self.timeout.values()))
1589
1590 # then check the size
1591 if len(self.cache) == self.max_conns:
1592 for k, v in list(self.timeout.items()):
1593 if v == self.soonest:
1594 del self.cache[k]
1595 del self.timeout[k]
1596 break
1597 self.soonest = min(list(self.timeout.values()))
1598
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001599 def clear_cache(self):
1600 for conn in self.cache.values():
1601 conn.close()
1602 self.cache.clear()
1603 self.timeout.clear()
1604
Antoine Pitroudf204be2012-11-24 17:59:08 +01001605class DataHandler(BaseHandler):
1606 def data_open(self, req):
1607 # data URLs as specified in RFC 2397.
1608 #
1609 # ignores POSTed data
1610 #
1611 # syntax:
1612 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1613 # mediatype := [ type "/" subtype ] *( ";" parameter )
1614 # data := *urlchar
1615 # parameter := attribute "=" value
1616 url = req.full_url
1617
1618 scheme, data = url.split(":",1)
1619 mediatype, data = data.split(",",1)
1620
1621 # even base64 encoded data URLs might be quoted so unquote in any case:
1622 data = unquote_to_bytes(data)
1623 if mediatype.endswith(";base64"):
1624 data = base64.decodebytes(data)
1625 mediatype = mediatype[:-7]
1626
1627 if not mediatype:
1628 mediatype = "text/plain;charset=US-ASCII"
1629
1630 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1631 (mediatype, len(data)))
1632
1633 return addinfourl(io.BytesIO(data), headers, url)
1634
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001635
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001636# Code move from the old urllib module
1637
1638MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1639
1640# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001641if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001642 from nturl2path import url2pathname, pathname2url
1643else:
1644 def url2pathname(pathname):
1645 """OS-specific conversion from a relative URL of the 'file' scheme
1646 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001647 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001648
1649 def pathname2url(pathname):
1650 """OS-specific conversion from a file system path to a relative URL
1651 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001652 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001653
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001654
1655ftpcache = {}
Senthil Kumarana2a9ddd2017-04-08 23:27:25 -07001656
1657
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001658class URLopener:
1659 """Class to open URLs.
1660 This is a class rather than just a subroutine because we may need
1661 more than one set of global protocol-specific options.
1662 Note -- this is a base class for those who don't want the
1663 automatic handling of errors type 302 (relocated) and 401
1664 (authorization needed)."""
1665
1666 __tempfiles = None
1667
1668 version = "Python-urllib/%s" % __version__
1669
1670 # Constructor
1671 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001672 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001673 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1674 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001675 if proxies is None:
1676 proxies = getproxies()
1677 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1678 self.proxies = proxies
1679 self.key_file = x509.get('key_file')
1680 self.cert_file = x509.get('cert_file')
Raymond Hettingerb7f3c942016-09-09 16:44:53 -07001681 self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001682 self.__tempfiles = []
1683 self.__unlink = os.unlink # See cleanup()
1684 self.tempcache = None
1685 # Undocumented feature: if you assign {} to tempcache,
1686 # it is used to cache files retrieved with
1687 # self.retrieve(). This is not enabled by default
1688 # since it does not work for changing documents (and I
1689 # haven't got the logic to check expiration headers
1690 # yet).
1691 self.ftpcache = ftpcache
1692 # Undocumented feature: you can use a different
1693 # ftp cache by assigning to the .ftpcache member;
1694 # in case you want logically independent URL openers
1695 # XXX This is not threadsafe. Bah.
1696
1697 def __del__(self):
1698 self.close()
1699
1700 def close(self):
1701 self.cleanup()
1702
1703 def cleanup(self):
1704 # This code sometimes runs when the rest of this module
1705 # has already been deleted, so it can't use any globals
1706 # or import anything.
1707 if self.__tempfiles:
1708 for file in self.__tempfiles:
1709 try:
1710 self.__unlink(file)
1711 except OSError:
1712 pass
1713 del self.__tempfiles[:]
1714 if self.tempcache:
1715 self.tempcache.clear()
1716
1717 def addheader(self, *args):
1718 """Add a header to be used by the HTTP interface only
1719 e.g. u.addheader('Accept', 'sound/basic')"""
1720 self.addheaders.append(args)
1721
1722 # External interface
1723 def open(self, fullurl, data=None):
1724 """Use URLopener().open(file) instead of open(file, 'r')."""
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001725 fullurl = unwrap(_to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001726 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001727 if self.tempcache and fullurl in self.tempcache:
1728 filename, headers = self.tempcache[fullurl]
1729 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001730 return addinfourl(fp, headers, fullurl)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001731 urltype, url = _splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001732 if not urltype:
1733 urltype = 'file'
1734 if urltype in self.proxies:
1735 proxy = self.proxies[urltype]
Cheryl Sabella0250de42018-04-25 16:51:54 -07001736 urltype, proxyhost = _splittype(proxy)
1737 host, selector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001738 url = (host, fullurl) # Signal special case to open_*()
1739 else:
1740 proxy = None
1741 name = 'open_' + urltype
1742 self.type = urltype
1743 name = name.replace('-', '_')
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001744 if not hasattr(self, name) or name == 'open_local_file':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001745 if proxy:
1746 return self.open_unknown_proxy(proxy, fullurl, data)
1747 else:
1748 return self.open_unknown(fullurl, data)
1749 try:
1750 if data is None:
1751 return getattr(self, name)(url)
1752 else:
1753 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001754 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001755 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001756 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001757 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001758
1759 def open_unknown(self, fullurl, data=None):
1760 """Overridable interface to open unknown URL type."""
Cheryl Sabella0250de42018-04-25 16:51:54 -07001761 type, url = _splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001762 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001763
1764 def open_unknown_proxy(self, proxy, fullurl, data=None):
1765 """Overridable interface to open unknown URL type."""
Cheryl Sabella0250de42018-04-25 16:51:54 -07001766 type, url = _splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001767 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001768
1769 # External interface
1770 def retrieve(self, url, filename=None, reporthook=None, data=None):
1771 """retrieve(url) returns (filename, headers) for a local object
1772 or (tempfilename, headers) for a remote object."""
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001773 url = unwrap(_to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001774 if self.tempcache and url in self.tempcache:
1775 return self.tempcache[url]
Cheryl Sabella0250de42018-04-25 16:51:54 -07001776 type, url1 = _splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001777 if filename is None and (not type or type == 'file'):
1778 try:
1779 fp = self.open_local_file(url1)
1780 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001781 fp.close()
Xtreakc661b302019-05-19 19:10:06 +05301782 return url2pathname(_splithost(url1)[1]), hdrs
Pablo Galindo293dd232019-11-19 21:34:03 +00001783 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001784 pass
1785 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001786 try:
1787 headers = fp.info()
1788 if filename:
1789 tfp = open(filename, 'wb')
1790 else:
Xtreakc661b302019-05-19 19:10:06 +05301791 garbage, path = _splittype(url)
1792 garbage, path = _splithost(path or "")
1793 path, garbage = _splitquery(path or "")
1794 path, garbage = _splitattr(path or "")
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001795 suffix = os.path.splitext(path)[1]
1796 (fd, filename) = tempfile.mkstemp(suffix)
1797 self.__tempfiles.append(filename)
1798 tfp = os.fdopen(fd, 'wb')
1799 try:
1800 result = filename, headers
1801 if self.tempcache is not None:
1802 self.tempcache[url] = result
1803 bs = 1024*8
1804 size = -1
1805 read = 0
1806 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001807 if "content-length" in headers:
1808 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001809 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001810 reporthook(blocknum, bs, size)
1811 while 1:
1812 block = fp.read(bs)
1813 if not block:
1814 break
1815 read += len(block)
1816 tfp.write(block)
1817 blocknum += 1
1818 if reporthook:
1819 reporthook(blocknum, bs, size)
1820 finally:
1821 tfp.close()
1822 finally:
1823 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001824
1825 # raise exception if actual size does not match content-length header
1826 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001827 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001828 "retrieval incomplete: got only %i out of %i bytes"
1829 % (read, size), result)
1830
1831 return result
1832
1833 # Each method named open_<type> knows how to open that type of URL
1834
1835 def _open_generic_http(self, connection_factory, url, data):
1836 """Make an HTTP connection using connection_class.
1837
1838 This is an internal method that should be called from
1839 open_http() or open_https().
1840
1841 Arguments:
1842 - connection_factory should take a host name and return an
1843 HTTPConnection instance.
1844 - url is the url to retrieval or a host, relative-path pair.
1845 - data is payload for a POST request or None.
1846 """
1847
1848 user_passwd = None
1849 proxy_passwd= None
1850 if isinstance(url, str):
Cheryl Sabella0250de42018-04-25 16:51:54 -07001851 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001852 if host:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001853 user_passwd, host = _splituser(host)
Georg Brandl13e89462008-07-01 19:56:00 +00001854 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001855 realhost = host
1856 else:
1857 host, selector = url
1858 # check whether the proxy contains authorization information
Cheryl Sabella0250de42018-04-25 16:51:54 -07001859 proxy_passwd, host = _splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001860 # now we proceed with the url we want to obtain
Cheryl Sabella0250de42018-04-25 16:51:54 -07001861 urltype, rest = _splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001862 url = rest
1863 user_passwd = None
1864 if urltype.lower() != 'http':
1865 realhost = None
1866 else:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001867 realhost, rest = _splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001868 if realhost:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001869 user_passwd, realhost = _splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001870 if user_passwd:
1871 selector = "%s://%s%s" % (urltype, realhost, rest)
1872 if proxy_bypass(realhost):
1873 host = realhost
1874
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001875 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001876
1877 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001878 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001879 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001880 else:
1881 proxy_auth = None
1882
1883 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001884 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001885 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001886 else:
1887 auth = None
1888 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001889 headers = {}
1890 if proxy_auth:
1891 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1892 if auth:
1893 headers["Authorization"] = "Basic %s" % auth
1894 if realhost:
1895 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001896
1897 # Add Connection:close as we don't support persistent connections yet.
1898 # This helps in closing the socket and avoiding ResourceWarning
1899
1900 headers["Connection"] = "close"
1901
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001902 for header, value in self.addheaders:
1903 headers[header] = value
1904
1905 if data is not None:
1906 headers["Content-Type"] = "application/x-www-form-urlencoded"
1907 http_conn.request("POST", selector, data, headers)
1908 else:
1909 http_conn.request("GET", selector, headers=headers)
1910
1911 try:
1912 response = http_conn.getresponse()
1913 except http.client.BadStatusLine:
1914 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001915 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001916
1917 # According to RFC 2616, "2xx" code indicates that the client's
1918 # request was successfully received, understood, and accepted.
1919 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001920 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001921 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001922 else:
1923 return self.http_error(
1924 url, response.fp,
1925 response.status, response.reason, response.msg, data)
1926
1927 def open_http(self, url, data=None):
1928 """Use HTTP protocol."""
1929 return self._open_generic_http(http.client.HTTPConnection, url, data)
1930
1931 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1932 """Handle http errors.
1933
1934 Derived class can override this, or provide specific handlers
1935 named http_error_DDD where DDD is the 3-digit error code."""
1936 # First check if there's a specific handler for this error
1937 name = 'http_error_%d' % errcode
1938 if hasattr(self, name):
1939 method = getattr(self, name)
1940 if data is None:
1941 result = method(url, fp, errcode, errmsg, headers)
1942 else:
1943 result = method(url, fp, errcode, errmsg, headers, data)
1944 if result: return result
1945 return self.http_error_default(url, fp, errcode, errmsg, headers)
1946
1947 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001948 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001949 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001950 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001951
1952 if _have_ssl:
1953 def _https_connection(self, host):
1954 return http.client.HTTPSConnection(host,
1955 key_file=self.key_file,
1956 cert_file=self.cert_file)
1957
1958 def open_https(self, url, data=None):
1959 """Use HTTPS protocol."""
1960 return self._open_generic_http(self._https_connection, url, data)
1961
1962 def open_file(self, url):
1963 """Use local file or FTP depending on form of URL."""
1964 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001965 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001966 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001967 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001968 else:
1969 return self.open_local_file(url)
1970
1971 def open_local_file(self, url):
1972 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001973 import email.utils
1974 import mimetypes
Cheryl Sabella0250de42018-04-25 16:51:54 -07001975 host, file = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001976 localname = url2pathname(file)
1977 try:
1978 stats = os.stat(localname)
1979 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07001980 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001981 size = stats.st_size
1982 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1983 mtype = mimetypes.guess_type(url)[0]
1984 headers = email.message_from_string(
1985 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1986 (mtype or 'text/plain', size, modified))
1987 if not host:
1988 urlfile = file
1989 if file[:1] == '/':
1990 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00001991 return addinfourl(open(localname, 'rb'), headers, urlfile)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001992 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001993 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07001994 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001995 urlfile = file
1996 if file[:1] == '/':
1997 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08001998 elif file[:2] == './':
1999 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00002000 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002001 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002002
2003 def open_ftp(self, url):
2004 """Use FTP protocol."""
2005 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002006 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002007 import mimetypes
Cheryl Sabella0250de42018-04-25 16:51:54 -07002008 host, path = _splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002009 if not host: raise URLError('ftp error: no host given')
Cheryl Sabella0250de42018-04-25 16:51:54 -07002010 host, port = _splitport(host)
2011 user, host = _splituser(host)
2012 if user: user, passwd = _splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002013 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00002014 host = unquote(host)
2015 user = unquote(user or '')
2016 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002017 host = socket.gethostbyname(host)
2018 if not port:
2019 import ftplib
2020 port = ftplib.FTP_PORT
2021 else:
2022 port = int(port)
Cheryl Sabella0250de42018-04-25 16:51:54 -07002023 path, attrs = _splitattr(path)
Georg Brandl13e89462008-07-01 19:56:00 +00002024 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002025 dirs = path.split('/')
2026 dirs, file = dirs[:-1], dirs[-1]
2027 if dirs and not dirs[0]: dirs = dirs[1:]
2028 if dirs and not dirs[0]: dirs[0] = '/'
2029 key = user, host, port, '/'.join(dirs)
2030 # XXX thread unsafe!
2031 if len(self.ftpcache) > MAXFTPCACHE:
2032 # Prune the cache, rather arbitrarily
Benjamin Peterson3c2dca62014-06-07 15:08:04 -07002033 for k in list(self.ftpcache):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002034 if k != key:
2035 v = self.ftpcache[k]
2036 del self.ftpcache[k]
2037 v.close()
2038 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002039 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002040 self.ftpcache[key] = \
2041 ftpwrapper(user, passwd, host, port, dirs)
2042 if not file: type = 'D'
2043 else: type = 'I'
2044 for attr in attrs:
Cheryl Sabella0250de42018-04-25 16:51:54 -07002045 attr, value = _splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002046 if attr.lower() == 'type' and \
2047 value in ('a', 'A', 'i', 'I', 'd', 'D'):
2048 type = value.upper()
2049 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2050 mtype = mimetypes.guess_type("ftp:" + url)[0]
2051 headers = ""
2052 if mtype:
2053 headers += "Content-Type: %s\n" % mtype
2054 if retrlen is not None and retrlen >= 0:
2055 headers += "Content-Length: %d\n" % retrlen
2056 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00002057 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002058 except ftperrors() as exp:
2059 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002060
2061 def open_data(self, url, data=None):
2062 """Use "data" URL."""
2063 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002064 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002065 # ignore POSTed data
2066 #
2067 # syntax of data URLs:
2068 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2069 # mediatype := [ type "/" subtype ] *( ";" parameter )
2070 # data := *urlchar
2071 # parameter := attribute "=" value
2072 try:
2073 [type, data] = url.split(',', 1)
2074 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002075 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002076 if not type:
2077 type = 'text/plain;charset=US-ASCII'
2078 semi = type.rfind(';')
2079 if semi >= 0 and '=' not in type[semi:]:
2080 encoding = type[semi+1:]
2081 type = type[:semi]
2082 else:
2083 encoding = ''
2084 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002085 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002086 time.gmtime(time.time())))
2087 msg.append('Content-type: %s' % type)
2088 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002089 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002090 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002091 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002092 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002093 msg.append('Content-Length: %d' % len(data))
2094 msg.append('')
2095 msg.append(data)
2096 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002097 headers = email.message_from_string(msg)
2098 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002099 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002100 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002101
2102
2103class FancyURLopener(URLopener):
2104 """Derived class with handlers for errors we can handle (perhaps)."""
2105
2106 def __init__(self, *args, **kwargs):
2107 URLopener.__init__(self, *args, **kwargs)
2108 self.auth_cache = {}
2109 self.tries = 0
2110 self.maxtries = 10
2111
2112 def http_error_default(self, url, fp, errcode, errmsg, headers):
2113 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002114 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002115
2116 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2117 """Error 302 -- relocated (temporarily)."""
2118 self.tries += 1
Martin Pantera0370222016-02-04 06:01:35 +00002119 try:
2120 if self.maxtries and self.tries >= self.maxtries:
2121 if hasattr(self, "http_error_500"):
2122 meth = self.http_error_500
2123 else:
2124 meth = self.http_error_default
2125 return meth(url, fp, 500,
2126 "Internal Server Error: Redirect Recursion",
2127 headers)
2128 result = self.redirect_internal(url, fp, errcode, errmsg,
2129 headers, data)
2130 return result
2131 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002132 self.tries = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002133
2134 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2135 if 'location' in headers:
2136 newurl = headers['location']
2137 elif 'uri' in headers:
2138 newurl = headers['uri']
2139 else:
2140 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002141 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002142
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002143 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002144 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002145
2146 urlparts = urlparse(newurl)
2147
2148 # For security reasons, we don't allow redirection to anything other
2149 # than http, https and ftp.
2150
2151 # We are using newer HTTPError with older redirect_internal method
2152 # This older method will get deprecated in 3.3
2153
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002154 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002155 raise HTTPError(newurl, errcode,
2156 errmsg +
2157 " Redirection to url '%s' is not allowed." % newurl,
2158 headers, fp)
2159
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002160 return self.open(newurl)
2161
2162 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2163 """Error 301 -- also relocated (permanently)."""
2164 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2165
2166 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2167 """Error 303 -- also relocated (essentially identical to 302)."""
2168 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2169
2170 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2171 """Error 307 -- relocated, but turn POST into error."""
2172 if data is None:
2173 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2174 else:
2175 return self.http_error_default(url, fp, errcode, errmsg, headers)
2176
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002177 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2178 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002179 """Error 401 -- authentication required.
2180 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002181 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002182 URLopener.http_error_default(self, url, fp,
2183 errcode, errmsg, headers)
2184 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002185 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2186 if not match:
2187 URLopener.http_error_default(self, url, fp,
2188 errcode, errmsg, headers)
2189 scheme, realm = match.groups()
2190 if scheme.lower() != 'basic':
2191 URLopener.http_error_default(self, url, fp,
2192 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002193 if not retry:
2194 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2195 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002196 name = 'retry_' + self.type + '_basic_auth'
2197 if data is None:
2198 return getattr(self,name)(url, realm)
2199 else:
2200 return getattr(self,name)(url, realm, data)
2201
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002202 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2203 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002204 """Error 407 -- proxy authentication required.
2205 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002206 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002207 URLopener.http_error_default(self, url, fp,
2208 errcode, errmsg, headers)
2209 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002210 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2211 if not match:
2212 URLopener.http_error_default(self, url, fp,
2213 errcode, errmsg, headers)
2214 scheme, realm = match.groups()
2215 if scheme.lower() != 'basic':
2216 URLopener.http_error_default(self, url, fp,
2217 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002218 if not retry:
2219 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2220 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002221 name = 'retry_proxy_' + self.type + '_basic_auth'
2222 if data is None:
2223 return getattr(self,name)(url, realm)
2224 else:
2225 return getattr(self,name)(url, realm, data)
2226
2227 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002228 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002229 newurl = 'http://' + host + selector
2230 proxy = self.proxies['http']
Cheryl Sabella0250de42018-04-25 16:51:54 -07002231 urltype, proxyhost = _splittype(proxy)
2232 proxyhost, proxyselector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002233 i = proxyhost.find('@') + 1
2234 proxyhost = proxyhost[i:]
2235 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2236 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002237 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002238 quote(passwd, safe=''), proxyhost)
2239 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2240 if data is None:
2241 return self.open(newurl)
2242 else:
2243 return self.open(newurl, data)
2244
2245 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002246 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002247 newurl = 'https://' + host + selector
2248 proxy = self.proxies['https']
Cheryl Sabella0250de42018-04-25 16:51:54 -07002249 urltype, proxyhost = _splittype(proxy)
2250 proxyhost, proxyselector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002251 i = proxyhost.find('@') + 1
2252 proxyhost = proxyhost[i:]
2253 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2254 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002255 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002256 quote(passwd, safe=''), proxyhost)
2257 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2258 if data is None:
2259 return self.open(newurl)
2260 else:
2261 return self.open(newurl, data)
2262
2263 def retry_http_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002264 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002265 i = host.find('@') + 1
2266 host = host[i:]
2267 user, passwd = self.get_user_passwd(host, realm, i)
2268 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002269 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002270 quote(passwd, safe=''), host)
2271 newurl = 'http://' + host + selector
2272 if data is None:
2273 return self.open(newurl)
2274 else:
2275 return self.open(newurl, data)
2276
2277 def retry_https_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002278 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002279 i = host.find('@') + 1
2280 host = host[i:]
2281 user, passwd = self.get_user_passwd(host, realm, i)
2282 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002283 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002284 quote(passwd, safe=''), host)
2285 newurl = 'https://' + host + selector
2286 if data is None:
2287 return self.open(newurl)
2288 else:
2289 return self.open(newurl, data)
2290
Florent Xicluna757445b2010-05-17 17:24:07 +00002291 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002292 key = realm + '@' + host.lower()
2293 if key in self.auth_cache:
2294 if clear_cache:
2295 del self.auth_cache[key]
2296 else:
2297 return self.auth_cache[key]
2298 user, passwd = self.prompt_user_passwd(host, realm)
2299 if user or passwd: self.auth_cache[key] = (user, passwd)
2300 return user, passwd
2301
2302 def prompt_user_passwd(self, host, realm):
2303 """Override this in a GUI environment!"""
2304 import getpass
2305 try:
2306 user = input("Enter username for %s at %s: " % (realm, host))
2307 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2308 (user, realm, host))
2309 return user, passwd
2310 except KeyboardInterrupt:
2311 print()
2312 return None, None
2313
2314
2315# Utility functions
2316
2317_localhost = None
2318def localhost():
2319 """Return the IP address of the magic hostname 'localhost'."""
2320 global _localhost
2321 if _localhost is None:
2322 _localhost = socket.gethostbyname('localhost')
2323 return _localhost
2324
2325_thishost = None
2326def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002327 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002328 global _thishost
2329 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002330 try:
2331 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2332 except socket.gaierror:
2333 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002334 return _thishost
2335
2336_ftperrors = None
2337def ftperrors():
2338 """Return the set of errors raised by the FTP class."""
2339 global _ftperrors
2340 if _ftperrors is None:
2341 import ftplib
2342 _ftperrors = ftplib.all_errors
2343 return _ftperrors
2344
2345_noheaders = None
2346def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002347 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002348 global _noheaders
2349 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002350 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002351 return _noheaders
2352
2353
2354# Utility classes
2355
2356class ftpwrapper:
2357 """Class used by open_ftp() for cache of open FTP connections."""
2358
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002359 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2360 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002361 self.user = user
2362 self.passwd = passwd
2363 self.host = host
2364 self.port = port
2365 self.dirs = dirs
2366 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002367 self.refcount = 0
2368 self.keepalive = persistent
Victor Stinnerab73e652015-04-07 12:49:27 +02002369 try:
2370 self.init()
2371 except:
2372 self.close()
2373 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002374
2375 def init(self):
2376 import ftplib
2377 self.busy = 0
2378 self.ftp = ftplib.FTP()
2379 self.ftp.connect(self.host, self.port, self.timeout)
2380 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002381 _target = '/'.join(self.dirs)
2382 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002383
2384 def retrfile(self, file, type):
2385 import ftplib
2386 self.endtransfer()
2387 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2388 else: cmd = 'TYPE ' + type; isdir = 0
2389 try:
2390 self.ftp.voidcmd(cmd)
2391 except ftplib.all_errors:
2392 self.init()
2393 self.ftp.voidcmd(cmd)
2394 conn = None
2395 if file and not isdir:
2396 # Try to retrieve as a file
2397 try:
2398 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002399 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002400 except ftplib.error_perm as reason:
2401 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002402 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002403 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002404 if not conn:
2405 # Set transfer mode to ASCII!
2406 self.ftp.voidcmd('TYPE A')
2407 # Try a directory listing. Verify that directory exists.
2408 if file:
2409 pwd = self.ftp.pwd()
2410 try:
2411 try:
2412 self.ftp.cwd(file)
2413 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002414 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002415 finally:
2416 self.ftp.cwd(pwd)
2417 cmd = 'LIST ' + file
2418 else:
2419 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002420 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002421 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002422
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002423 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2424 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002425 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002426 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002427 return (ftpobj, retrlen)
2428
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002429 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002430 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002431
2432 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002433 self.keepalive = False
2434 if self.refcount <= 0:
2435 self.real_close()
2436
2437 def file_close(self):
2438 self.endtransfer()
2439 self.refcount -= 1
2440 if self.refcount <= 0 and not self.keepalive:
2441 self.real_close()
2442
2443 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002444 self.endtransfer()
2445 try:
2446 self.ftp.close()
2447 except ftperrors():
2448 pass
2449
2450# Proxy handling
2451def getproxies_environment():
2452 """Return a dictionary of scheme -> proxy server URL mappings.
2453
2454 Scan the environment for variables named <scheme>_proxy;
2455 this seems to be the standard convention. If you need a
2456 different way, you can pass a proxies dictionary to the
2457 [Fancy]URLopener constructor.
2458
2459 """
2460 proxies = {}
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002461 # in order to prefer lowercase variables, process environment in
2462 # two passes: first matches any, second pass matches lowercase only
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463 for name, value in os.environ.items():
2464 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002465 if value and name[-6:] == '_proxy':
2466 proxies[name[:-6]] = value
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002467 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2468 # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2469 # header from the client
Senthil Kumaran17742f22016-07-30 23:39:06 -07002470 # If "proxy" is lowercase, it will still be used thanks to the next block
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002471 if 'REQUEST_METHOD' in os.environ:
2472 proxies.pop('http', None)
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002473 for name, value in os.environ.items():
2474 if name[-6:] == '_proxy':
2475 name = name.lower()
2476 if value:
2477 proxies[name[:-6]] = value
2478 else:
2479 proxies.pop(name[:-6], None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002480 return proxies
2481
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002482def proxy_bypass_environment(host, proxies=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002483 """Test if proxies should not be used for a particular host.
2484
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002485 Checks the proxy dict for the value of no_proxy, which should
2486 be a list of comma separated DNS suffixes, or '*' for all hosts.
2487
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002488 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002489 if proxies is None:
2490 proxies = getproxies_environment()
2491 # don't bypass, if no_proxy isn't specified
2492 try:
2493 no_proxy = proxies['no']
2494 except KeyError:
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002495 return False
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002496 # '*' is special case for always bypass
2497 if no_proxy == '*':
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002498 return True
2499 host = host.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002500 # strip port off host
Cheryl Sabella0250de42018-04-25 16:51:54 -07002501 hostonly, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002502 # check if the host ends with any of the DNS suffixes
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002503 for name in no_proxy.split(','):
2504 name = name.strip()
Martin Panteraa279822016-04-30 01:03:40 +00002505 if name:
Xiang Zhang959ff7f2017-01-09 11:47:55 +08002506 name = name.lstrip('.') # ignore leading dots
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002507 name = name.lower()
2508 if hostonly == name or host == name:
2509 return True
2510 name = '.' + name
2511 if hostonly.endswith(name) or host.endswith(name):
2512 return True
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002513 # otherwise, don't bypass
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002514 return False
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002515
2516
Ronald Oussorene72e1612011-03-14 18:15:25 -04002517# This code tests an OSX specific data structure but is testable on all
2518# platforms
2519def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2520 """
2521 Return True iff this host shouldn't be accessed using a proxy
2522
2523 This function uses the MacOSX framework SystemConfiguration
2524 to fetch the proxy information.
2525
2526 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2527 { 'exclude_simple': bool,
2528 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2529 }
2530 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002531 from fnmatch import fnmatch
2532
Cheryl Sabella0250de42018-04-25 16:51:54 -07002533 hostonly, port = _splitport(host)
Ronald Oussorene72e1612011-03-14 18:15:25 -04002534
2535 def ip2num(ipAddr):
2536 parts = ipAddr.split('.')
2537 parts = list(map(int, parts))
2538 if len(parts) != 4:
2539 parts = (parts + [0, 0, 0, 0])[:4]
2540 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2541
2542 # Check for simple host names:
2543 if '.' not in host:
2544 if proxy_settings['exclude_simple']:
2545 return True
2546
2547 hostIP = None
2548
2549 for value in proxy_settings.get('exceptions', ()):
2550 # Items in the list are strings like these: *.local, 169.254/16
2551 if not value: continue
2552
2553 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2554 if m is not None:
2555 if hostIP is None:
2556 try:
2557 hostIP = socket.gethostbyname(hostonly)
2558 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002559 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002560 continue
2561
2562 base = ip2num(m.group(1))
2563 mask = m.group(2)
2564 if mask is None:
2565 mask = 8 * (m.group(1).count('.') + 1)
2566 else:
2567 mask = int(mask[1:])
2568 mask = 32 - mask
2569
2570 if (hostIP >> mask) == (base >> mask):
2571 return True
2572
2573 elif fnmatch(host, value):
2574 return True
2575
2576 return False
2577
2578
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002579if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002580 from _scproxy import _get_proxy_settings, _get_proxies
2581
2582 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002583 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002584 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002585
2586 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002587 """Return a dictionary of scheme -> proxy server URL mappings.
2588
Ronald Oussoren84151202010-04-18 20:46:11 +00002589 This function uses the MacOSX framework SystemConfiguration
2590 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002591 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002592 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002593
Ronald Oussoren84151202010-04-18 20:46:11 +00002594
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002595
2596 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002597 """Return True, if host should be bypassed.
2598
2599 Checks proxy settings gathered from the environment, if specified,
2600 or from the MacOSX framework SystemConfiguration.
2601
2602 """
2603 proxies = getproxies_environment()
2604 if proxies:
2605 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002606 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002607 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002608
2609 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002610 return getproxies_environment() or getproxies_macosx_sysconf()
2611
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002612
2613elif os.name == 'nt':
2614 def getproxies_registry():
2615 """Return a dictionary of scheme -> proxy server URL mappings.
2616
2617 Win32 uses the registry to store proxies.
2618
2619 """
2620 proxies = {}
2621 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002622 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002623 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002624 # Std module, so should be around - but you never know!
2625 return proxies
2626 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002627 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002628 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002629 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002630 'ProxyEnable')[0]
2631 if proxyEnable:
2632 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002633 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002634 'ProxyServer')[0])
2635 if '=' in proxyServer:
2636 # Per-protocol settings
2637 for p in proxyServer.split(';'):
2638 protocol, address = p.split('=', 1)
2639 # See if address has a type:// prefix
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002640 if not re.match('(?:[^/:]+)://', address):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002641 address = '%s://%s' % (protocol, address)
2642 proxies[protocol] = address
2643 else:
2644 # Use one setting for all protocols
2645 if proxyServer[:5] == 'http:':
2646 proxies['http'] = proxyServer
2647 else:
2648 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002649 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002650 proxies['ftp'] = 'ftp://%s' % proxyServer
2651 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002652 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002653 # Either registry key not found etc, or the value in an
2654 # unexpected format.
2655 # proxies already set up to be empty so nothing to do
2656 pass
2657 return proxies
2658
2659 def getproxies():
2660 """Return a dictionary of scheme -> proxy server URL mappings.
2661
2662 Returns settings gathered from the environment, if specified,
2663 or the registry.
2664
2665 """
2666 return getproxies_environment() or getproxies_registry()
2667
2668 def proxy_bypass_registry(host):
2669 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002670 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002671 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002672 # Std modules, so should be around - but you never know!
2673 return 0
2674 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002675 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002676 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002677 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002678 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002679 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002680 'ProxyOverride')[0])
2681 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002682 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002683 return 0
2684 if not proxyEnable or not proxyOverride:
2685 return 0
2686 # try to make a host list from name and IP address.
Cheryl Sabella0250de42018-04-25 16:51:54 -07002687 rawHost, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002688 host = [rawHost]
2689 try:
2690 addr = socket.gethostbyname(rawHost)
2691 if addr != rawHost:
2692 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002693 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002694 pass
2695 try:
2696 fqdn = socket.getfqdn(rawHost)
2697 if fqdn != rawHost:
2698 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002699 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002700 pass
2701 # make a check value list from the registry entry: replace the
2702 # '<local>' string by the localhost entry and the corresponding
2703 # canonical entry.
2704 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002705 # now check if we match one of the registry values.
2706 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002707 if test == '<local>':
2708 if '.' not in rawHost:
2709 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002710 test = test.replace(".", r"\.") # mask dots
2711 test = test.replace("*", r".*") # change glob sequence
2712 test = test.replace("?", r".") # change glob char
2713 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002714 if re.match(test, val, re.I):
2715 return 1
2716 return 0
2717
2718 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002719 """Return True, if host should be bypassed.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002720
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002721 Checks proxy settings gathered from the environment, if specified,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002722 or the registry.
2723
2724 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002725 proxies = getproxies_environment()
2726 if proxies:
2727 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002728 else:
2729 return proxy_bypass_registry(host)
2730
2731else:
2732 # By default use environment variables
2733 getproxies = getproxies_environment
2734 proxy_bypass = proxy_bypass_environment