blob: 39974d975ee1e0fe87cfbe401302befc7c0e9e96 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Georg Brandl029986a2008-06-23 11:44:14 +000067f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
Martin Pantere6f06092016-05-16 01:14:20 +000094import string
Jeremy Hylton1afc1692008-06-18 20:49:58 +000095import sys
96import time
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
Rémi Lapeyre674ee122019-05-27 15:43:45 +0200104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
Cheryl Sabella0250de42018-04-25 16:51:54 -0700105 _splittype, _splithost, _splitport, _splituser, _splitpasswd,
106 _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -0400113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
R David Murray4c7f9952015-04-16 16:36:18 -0400123 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800127 'UnknownHandler', 'HTTPErrorProcessor',
128 # Functions
129 'urlopen', 'install_opener', 'build_opener',
130 'pathname2url', 'url2pathname', 'getproxies',
131 # Legacy interface
132 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133]
134
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135# used in User-Agent header sent
Serhiy Storchaka885bdc42016-02-11 13:10:36 +0200136__version__ = '%d.%d' % sys.version_info[:2]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000137
138_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000139def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800140 *, cafile=None, capath=None, cadefault=False, context=None):
Raymond Hettinger507343a2015-08-18 00:35:52 -0700141 '''Open the URL url, which can be either a string or a Request object.
142
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000143 *data* must be an object specifying additional data to be sent to
144 the server, or None if no such data is needed. See Request for
145 details.
Raymond Hettinger507343a2015-08-18 00:35:52 -0700146
147 urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148 header in its HTTP requests.
149
150 The optional *timeout* parameter specifies a timeout in seconds for
151 blocking operations like the connection attempt (if not specified, the
152 global default timeout setting will be used). This only works for HTTP,
153 HTTPS and FTP connections.
154
155 If *context* is specified, it must be a ssl.SSLContext instance describing
156 the various SSL options. See HTTPSConnection for more details.
157
158 The optional *cafile* and *capath* parameters specify a set of trusted CA
159 certificates for HTTPS requests. cafile should point to a single file
160 containing a bundle of CA certificates, whereas capath should point to a
161 directory of hashed certificate files. More information can be found in
162 ssl.SSLContext.load_verify_locations().
163
164 The *cadefault* parameter is ignored.
165
Raymond Hettinger507343a2015-08-18 00:35:52 -0700166
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700167 This function always returns an object which can work as a
168 context manager and has the properties url, headers, and status.
169 See urllib.response.addinfourl for more detail on these properties.
Raymond Hettinger507343a2015-08-18 00:35:52 -0700170
Martin Panter29f256902016-06-04 05:06:34 +0000171 For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
172 object slightly modified. In addition to the three new methods above, the
173 msg attribute contains the same information as the reason attribute ---
174 the reason phrase returned by the server --- instead of the response
175 headers as it is specified in the documentation for HTTPResponse.
R David Murrayd2367c62016-06-03 20:16:06 -0400176
Martin Panter29f256902016-06-04 05:06:34 +0000177 For FTP, file, and data URLs and requests explicitly handled by legacy
178 URLopener and FancyURLopener classes, this function returns a
179 urllib.response.addinfourl object.
180
181 Note that None may be returned if no handler handles the request (though
Raymond Hettinger507343a2015-08-18 00:35:52 -0700182 the default installed global OpenerDirector uses UnknownHandler to ensure
183 this never happens).
184
185 In addition, if proxy settings are detected (for example, when a *_proxy
186 environment variable like http_proxy is set), ProxyHandler is default
187 installed and makes sure the requests are handled through the proxy.
188
189 '''
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000190 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200191 if cafile or capath or cadefault:
Christian Heimesd0486372016-09-10 23:23:33 +0200192 import warnings
Boštjan Mejak15869582018-11-25 19:32:50 +0100193 warnings.warn("cafile, capath and cadefault are deprecated, use a "
Christian Heimesd0486372016-09-10 23:23:33 +0200194 "custom context instead.", DeprecationWarning, 2)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800195 if context is not None:
196 raise ValueError(
197 "You can't pass both context and any of cafile, capath, and "
198 "cadefault"
199 )
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000200 if not _have_ssl:
201 raise ValueError('SSL support not available')
Benjamin Petersonb6666972014-12-07 13:46:02 -0500202 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
Christian Heimes67986f92013-11-23 22:43:47 +0100203 cafile=cafile,
204 capath=capath)
Christian Heimesf97406b2020-11-13 16:37:52 +0100205 # send ALPN extension to indicate HTTP/1.1 protocol
206 context.set_alpn_protocols(['http/1.1'])
Benjamin Petersonb6666972014-12-07 13:46:02 -0500207 https_handler = HTTPSHandler(context=context)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000208 opener = build_opener(https_handler)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800209 elif context:
210 https_handler = HTTPSHandler(context=context)
211 opener = build_opener(https_handler)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000212 elif _opener is None:
213 _opener = opener = build_opener()
214 else:
215 opener = _opener
216 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000217
218def install_opener(opener):
219 global _opener
220 _opener = opener
221
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700222_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000223def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700224 """
225 Retrieve a URL into a temporary location on disk.
226
227 Requires a URL argument. If a filename is passed, it is used as
228 the temporary file location. The reporthook argument should be
229 a callable that accepts a block number, a read size, and the
230 total file size of the URL target. The data argument should be
231 valid URL encoded data.
232
233 If a filename is passed and the URL points to a local resource,
234 the result is a copy from local file to new file.
235
236 Returns a tuple containing the path to the newly created
237 data file as well as the resulting HTTPMessage object.
238 """
Cheryl Sabella0250de42018-04-25 16:51:54 -0700239 url_type, path = _splittype(url)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700240
241 with contextlib.closing(urlopen(url, data)) as fp:
242 headers = fp.info()
243
244 # Just return the local path and the "headers" for file://
245 # URLs. No sense in performing a copy unless requested.
246 if url_type == "file" and not filename:
247 return os.path.normpath(path), headers
248
249 # Handle temporary file setup.
250 if filename:
251 tfp = open(filename, 'wb')
252 else:
253 tfp = tempfile.NamedTemporaryFile(delete=False)
254 filename = tfp.name
255 _url_tempfiles.append(filename)
256
257 with tfp:
258 result = filename, headers
259 bs = 1024*8
260 size = -1
261 read = 0
262 blocknum = 0
263 if "content-length" in headers:
264 size = int(headers["Content-Length"])
265
266 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800267 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700268
269 while True:
270 block = fp.read(bs)
271 if not block:
272 break
273 read += len(block)
274 tfp.write(block)
275 blocknum += 1
276 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800277 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700278
279 if size >= 0 and read < size:
280 raise ContentTooShortError(
281 "retrieval incomplete: got only %i out of %i bytes"
282 % (read, size), result)
283
284 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000285
286def urlcleanup():
Robert Collins2fee5c92015-08-04 12:52:06 +1200287 """Clean up temporary files from urlretrieve calls."""
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700288 for temp_file in _url_tempfiles:
289 try:
290 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200291 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700292 pass
293
294 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000295 global _opener
296 if _opener:
297 _opener = None
298
299# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000300_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000301def request_host(request):
302 """Return request-host, as defined by RFC 2965.
303
304 Variation from RFC: returned value is lowercased, for convenient
305 comparison.
306
307 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000308 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000309 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000310 if host == "":
311 host = request.get_header("Host", "")
312
313 # remove port, if present
314 host = _cut_port_re.sub("", host, 1)
315 return host.lower()
316
317class Request:
318
319 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800320 origin_req_host=None, unverifiable=False,
321 method=None):
Senthil Kumaran52380922013-04-25 05:45:48 -0700322 self.full_url = url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000323 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200324 self.unredirected_hdrs = {}
325 self._data = None
326 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000327 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000328 for key, value in headers.items():
329 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330 if origin_req_host is None:
331 origin_req_host = request_host(self)
332 self.origin_req_host = origin_req_host
333 self.unverifiable = unverifiable
Jason R. Coombs7dc4f4b2013-09-08 12:47:07 -0400334 if method:
335 self.method = method
Senthil Kumaran52380922013-04-25 05:45:48 -0700336
337 @property
338 def full_url(self):
Senthil Kumaran83070752013-05-24 09:14:12 -0700339 if self.fragment:
340 return '{}#{}'.format(self._full_url, self.fragment)
Senthil Kumaran52380922013-04-25 05:45:48 -0700341 return self._full_url
342
343 @full_url.setter
344 def full_url(self, url):
345 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Rémi Lapeyre674ee122019-05-27 15:43:45 +0200346 self._full_url = unwrap(url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700347 self._full_url, self.fragment = _splittag(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000348 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349
Senthil Kumaran52380922013-04-25 05:45:48 -0700350 @full_url.deleter
351 def full_url(self):
352 self._full_url = None
353 self.fragment = None
354 self.selector = ''
355
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200356 @property
357 def data(self):
358 return self._data
359
360 @data.setter
361 def data(self, data):
362 if data != self._data:
363 self._data = data
364 # issue 16464
365 # if we change data we need to remove content-length header
366 # (cause it's most probably calculated for previous value)
367 if self.has_header("Content-length"):
368 self.remove_header("Content-length")
369
370 @data.deleter
371 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400372 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200373
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000374 def _parse(self):
Cheryl Sabella0250de42018-04-25 16:51:54 -0700375 self.type, rest = _splittype(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000376 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400377 raise ValueError("unknown url type: %r" % self.full_url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700378 self.host, self.selector = _splithost(rest)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000379 if self.host:
380 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000381
382 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800383 """Return a string indicating the HTTP request method."""
Jason R. Coombsaae6a1d2013-09-08 12:54:33 -0400384 default_method = "POST" if self.data is not None else "GET"
385 return getattr(self, 'method', default_method)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000386
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000387 def get_full_url(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700388 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000389
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000390 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000391 if self.type == 'https' and not self._tunnel_host:
392 self._tunnel_host = self.host
393 else:
394 self.type= type
395 self.selector = self.full_url
396 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000397
398 def has_proxy(self):
399 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400
401 def add_header(self, key, val):
402 # useful for something like authentication
403 self.headers[key.capitalize()] = val
404
405 def add_unredirected_header(self, key, val):
406 # will not be added to a redirected request
407 self.unredirected_hdrs[key.capitalize()] = val
408
409 def has_header(self, header_name):
410 return (header_name in self.headers or
411 header_name in self.unredirected_hdrs)
412
413 def get_header(self, header_name, default=None):
414 return self.headers.get(
415 header_name,
416 self.unredirected_hdrs.get(header_name, default))
417
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200418 def remove_header(self, header_name):
419 self.headers.pop(header_name, None)
420 self.unredirected_hdrs.pop(header_name, None)
421
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000422 def header_items(self):
Serhiy Storchakada084702019-03-27 08:02:28 +0200423 hdrs = {**self.unredirected_hdrs, **self.headers}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000424 return list(hdrs.items())
425
426class OpenerDirector:
427 def __init__(self):
428 client_version = "Python-urllib/%s" % __version__
429 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000430 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000431 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000432 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000433 self.handle_open = {}
434 self.handle_error = {}
435 self.process_response = {}
436 self.process_request = {}
437
438 def add_handler(self, handler):
439 if not hasattr(handler, "add_parent"):
440 raise TypeError("expected BaseHandler instance, got %r" %
441 type(handler))
442
443 added = False
444 for meth in dir(handler):
445 if meth in ["redirect_request", "do_open", "proxy_open"]:
446 # oops, coincidental match
447 continue
448
449 i = meth.find("_")
450 protocol = meth[:i]
451 condition = meth[i+1:]
452
453 if condition.startswith("error"):
454 j = condition.find("_") + i + 1
455 kind = meth[j+1:]
456 try:
457 kind = int(kind)
458 except ValueError:
459 pass
460 lookup = self.handle_error.get(protocol, {})
461 self.handle_error[protocol] = lookup
462 elif condition == "open":
463 kind = protocol
464 lookup = self.handle_open
465 elif condition == "response":
466 kind = protocol
467 lookup = self.process_response
468 elif condition == "request":
469 kind = protocol
470 lookup = self.process_request
471 else:
472 continue
473
474 handlers = lookup.setdefault(kind, [])
475 if handlers:
476 bisect.insort(handlers, handler)
477 else:
478 handlers.append(handler)
479 added = True
480
481 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000482 bisect.insort(self.handlers, handler)
483 handler.add_parent(self)
484
485 def close(self):
486 # Only exists for backwards compatibility.
487 pass
488
489 def _call_chain(self, chain, kind, meth_name, *args):
490 # Handlers raise an exception if no one else should try to handle
491 # the request, or return None if they can't but another handler
492 # could. Otherwise, they return the response.
493 handlers = chain.get(kind, ())
494 for handler in handlers:
495 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000496 result = func(*args)
497 if result is not None:
498 return result
499
500 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
501 # accept a URL or a Request object
502 if isinstance(fullurl, str):
503 req = Request(fullurl, data)
504 else:
505 req = fullurl
506 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000507 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000508
509 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000510 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000511
512 # pre-process request
513 meth_name = protocol+"_request"
514 for processor in self.process_request.get(protocol, []):
515 meth = getattr(processor, meth_name)
516 req = meth(req)
517
Steve Dowerb82e17e2019-05-23 08:45:22 -0700518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000519 response = self._open(req, data)
520
521 # post-process response
522 meth_name = protocol+"_response"
523 for processor in self.process_response.get(protocol, []):
524 meth = getattr(processor, meth_name)
525 response = meth(req, response)
526
527 return response
528
529 def _open(self, req, data=None):
530 result = self._call_chain(self.handle_open, 'default',
531 'default_open', req)
532 if result:
533 return result
534
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000535 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
539 return result
540
541 return self._call_chain(self.handle_open, 'unknown',
542 'unknown_open', req)
543
544 def error(self, proto, *args):
545 if proto in ('http', 'https'):
546 # XXX http[s] protocols are special-cased
547 dict = self.handle_error['http'] # https is not different than http
548 proto = args[2] # YUCK!
549 meth_name = 'http_error_%s' % proto
550 http_err = 1
551 orig_args = args
552 else:
553 dict = self.handle_error
554 meth_name = proto + '_error'
555 http_err = 0
556 args = (dict, proto, meth_name) + args
557 result = self._call_chain(*args)
558 if result:
559 return result
560
561 if http_err:
562 args = (dict, 'default', 'http_error_default') + orig_args
563 return self._call_chain(*args)
564
565# XXX probably also want an abstract factory that knows when it makes
566# sense to skip a superclass in favor of a subclass and when it might
567# make sense to include both
568
569def build_opener(*handlers):
570 """Create an opener object from a list of handlers.
571
572 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000573 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000574
575 If any of the handlers passed as arguments are subclasses of the
576 default handlers, the default handlers will not be used.
577 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000578 opener = OpenerDirector()
579 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
580 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100581 FTPHandler, FileHandler, HTTPErrorProcessor,
582 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000583 if hasattr(http.client, "HTTPSConnection"):
584 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000585 skip = set()
586 for klass in default_classes:
587 for check in handlers:
Benjamin Peterson78c85382014-04-01 16:27:30 -0400588 if isinstance(check, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000589 if issubclass(check, klass):
590 skip.add(klass)
591 elif isinstance(check, klass):
592 skip.add(klass)
593 for klass in skip:
594 default_classes.remove(klass)
595
596 for klass in default_classes:
597 opener.add_handler(klass())
598
599 for h in handlers:
Benjamin Peterson5dd3cae2014-04-01 14:20:56 -0400600 if isinstance(h, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000601 h = h()
602 opener.add_handler(h)
603 return opener
604
605class BaseHandler:
606 handler_order = 500
607
608 def add_parent(self, parent):
609 self.parent = parent
610
611 def close(self):
612 # Only exists for backwards compatibility
613 pass
614
615 def __lt__(self, other):
616 if not hasattr(other, "handler_order"):
617 # Try to preserve the old behavior of having custom classes
618 # inserted after default ones (works only for custom user
619 # classes which are not aware of handler_order).
620 return True
621 return self.handler_order < other.handler_order
622
623
624class HTTPErrorProcessor(BaseHandler):
625 """Process HTTP error responses."""
626 handler_order = 1000 # after all other processing
627
628 def http_response(self, request, response):
629 code, msg, hdrs = response.code, response.msg, response.info()
630
631 # According to RFC 2616, "2xx" code indicates that the client's
632 # request was successfully received, understood, and accepted.
633 if not (200 <= code < 300):
634 response = self.parent.error(
635 'http', request, response, code, msg, hdrs)
636
637 return response
638
639 https_response = http_response
640
641class HTTPDefaultErrorHandler(BaseHandler):
642 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000643 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000644
645class HTTPRedirectHandler(BaseHandler):
646 # maximum number of redirections to any single URL
647 # this is needed because of the state that cookies introduce
648 max_repeats = 4
649 # maximum total number of redirections (regardless of URL) before
650 # assuming we're in a loop
651 max_redirections = 10
652
653 def redirect_request(self, req, fp, code, msg, headers, newurl):
654 """Return a Request or None in response to a redirect.
655
656 This is called by the http_error_30x methods when a
657 redirection response is received. If a redirection should
658 take place, return a new Request to allow http_error_30x to
659 perform the redirect. Otherwise, raise HTTPError if no-one
660 else should try to handle this url. Return None if you can't
661 but another Handler might.
662 """
663 m = req.get_method()
664 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
665 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000666 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000667
668 # Strictly (according to RFC 2616), 301 or 302 in response to
669 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000670 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000671 # essentially all clients do redirect in this case, so we do
672 # the same.
Martin Pantere6f06092016-05-16 01:14:20 +0000673
674 # Be conciliant with URIs containing a space. This is mainly
675 # redundant with the more complete encoding done in http_error_302(),
676 # but it is kept for compatibility with other callers.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000677 newurl = newurl.replace(' ', '%20')
Martin Pantere6f06092016-05-16 01:14:20 +0000678
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 CONTENT_HEADERS = ("content-length", "content-type")
Jon Dufresne39726282017-05-18 07:35:54 -0700680 newheaders = {k: v for k, v in req.headers.items()
681 if k.lower() not in CONTENT_HEADERS}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 return Request(newurl,
683 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000684 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000685 unverifiable=True)
686
687 # Implementation note: To avoid the server sending us into an
688 # infinite loop, the request object needs to track what URLs we
689 # have already seen. Do this by adding a handler-specific
690 # attribute to the Request object.
691 def http_error_302(self, req, fp, code, msg, headers):
692 # Some servers (incorrectly) return multiple Location headers
693 # (so probably same goes for URI). Use first header.
694 if "location" in headers:
695 newurl = headers["location"]
696 elif "uri" in headers:
697 newurl = headers["uri"]
698 else:
699 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000700
701 # fix a possible malformed URL
702 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700703
704 # For security reasons we don't allow redirection to anything other
705 # than http, https or ftp.
706
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800707 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800708 raise HTTPError(
709 newurl, code,
710 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
711 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700712
Martin Panterce6e0682016-05-16 01:07:13 +0000713 if not urlparts.path and urlparts.netloc:
Facundo Batistaf24802c2008-08-17 03:36:03 +0000714 urlparts = list(urlparts)
715 urlparts[2] = "/"
716 newurl = urlunparse(urlparts)
717
Martin Pantere6f06092016-05-16 01:14:20 +0000718 # http.client.parse_headers() decodes as ISO-8859-1. Recover the
719 # original bytes and percent-encode non-ASCII bytes, and any special
720 # characters such as the space.
721 newurl = quote(
722 newurl, encoding="iso-8859-1", safe=string.punctuation)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000723 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000724
725 # XXX Probably want to forget about the state of the current
726 # request, although that might interact poorly with other
727 # handlers that also use handler-specific request attributes
728 new = self.redirect_request(req, fp, code, msg, headers, newurl)
729 if new is None:
730 return
731
732 # loop detection
733 # .redirect_dict has a key url if url was previously visited.
734 if hasattr(req, 'redirect_dict'):
735 visited = new.redirect_dict = req.redirect_dict
736 if (visited.get(newurl, 0) >= self.max_repeats or
737 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000738 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000739 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000740 else:
741 visited = new.redirect_dict = req.redirect_dict = {}
742 visited[newurl] = visited.get(newurl, 0) + 1
743
744 # Don't close the fp until we are sure that we won't use it
745 # with HTTPError.
746 fp.read()
747 fp.close()
748
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000749 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750
751 http_error_301 = http_error_303 = http_error_307 = http_error_302
752
753 inf_msg = "The HTTP server returned a redirect error that would " \
754 "lead to an infinite loop.\n" \
755 "The last 30x error message was:\n"
756
757
758def _parse_proxy(proxy):
759 """Return (scheme, user, password, host/port) given a URL or an authority.
760
761 If a URL is supplied, it must have an authority (host:port) component.
762 According to RFC 3986, having an authority component means the URL must
Senthil Kumarand8e24f12014-04-14 16:32:20 -0400763 have two slashes after the scheme.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 """
Cheryl Sabella0250de42018-04-25 16:51:54 -0700765 scheme, r_scheme = _splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000766 if not r_scheme.startswith("/"):
767 # authority
768 scheme = None
769 authority = proxy
770 else:
771 # URL
772 if not r_scheme.startswith("//"):
773 raise ValueError("proxy URL with no authority: %r" % proxy)
774 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
775 # and 3.3.), path is empty or starts with '/'
776 end = r_scheme.find("/", 2)
777 if end == -1:
778 end = None
779 authority = r_scheme[2:end]
Cheryl Sabella0250de42018-04-25 16:51:54 -0700780 userinfo, hostport = _splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000781 if userinfo is not None:
Cheryl Sabella0250de42018-04-25 16:51:54 -0700782 user, password = _splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000783 else:
784 user = password = None
785 return scheme, user, password, hostport
786
787class ProxyHandler(BaseHandler):
788 # Proxies must be in front
789 handler_order = 100
790
791 def __init__(self, proxies=None):
792 if proxies is None:
793 proxies = getproxies()
794 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
795 self.proxies = proxies
796 for type, url in proxies.items():
Zackery Spytzb761e3a2019-09-13 08:07:07 -0600797 type = type.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000798 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200799 lambda r, proxy=url, type=type, meth=self.proxy_open:
800 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000801
802 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000803 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 proxy_type, user, password, hostport = _parse_proxy(proxy)
805 if proxy_type is None:
806 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000807
808 if req.host and proxy_bypass(req.host):
809 return None
810
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000811 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000812 user_pass = '%s:%s' % (unquote(user),
813 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000814 creds = base64.b64encode(user_pass.encode()).decode("ascii")
815 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000816 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000817 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000818 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000819 # let other handlers take care of it
820 return None
821 else:
822 # need to start over, because the other handlers don't
823 # grok the proxy's URL type
824 # e.g. if we have a constructor arg proxies like so:
825 # {'http': 'ftp://proxy.example.com'}, we may end up turning
826 # a request for http://acme.example.com/a into one for
827 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000828 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000829
830class HTTPPasswordMgr:
831
832 def __init__(self):
833 self.passwd = {}
834
835 def add_password(self, realm, uri, user, passwd):
836 # uri could be a single URI or a sequence
837 if isinstance(uri, str):
838 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800839 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000840 self.passwd[realm] = {}
841 for default_port in True, False:
842 reduced_uri = tuple(
Jon Dufresne39726282017-05-18 07:35:54 -0700843 self.reduce_uri(u, default_port) for u in uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 self.passwd[realm][reduced_uri] = (user, passwd)
845
846 def find_user_password(self, realm, authuri):
847 domains = self.passwd.get(realm, {})
848 for default_port in True, False:
849 reduced_authuri = self.reduce_uri(authuri, default_port)
850 for uris, authinfo in domains.items():
851 for uri in uris:
852 if self.is_suburi(uri, reduced_authuri):
853 return authinfo
854 return None, None
855
856 def reduce_uri(self, uri, default_port=True):
857 """Accept authority or URI and extract only the authority and path."""
858 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000859 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000860 if parts[1]:
861 # URI
862 scheme = parts[0]
863 authority = parts[1]
864 path = parts[2] or '/'
865 else:
866 # host or host:port
867 scheme = None
868 authority = uri
869 path = '/'
Cheryl Sabella0250de42018-04-25 16:51:54 -0700870 host, port = _splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000871 if default_port and port is None and scheme is not None:
872 dport = {"http": 80,
873 "https": 443,
874 }.get(scheme)
875 if dport is not None:
876 authority = "%s:%d" % (host, dport)
877 return authority, path
878
879 def is_suburi(self, base, test):
880 """Check if test is below base in a URI tree
881
882 Both args must be URIs in reduced form.
883 """
884 if base == test:
885 return True
886 if base[0] != test[0]:
887 return False
888 common = posixpath.commonprefix((base[1], test[1]))
889 if len(common) == len(base[1]):
890 return True
891 return False
892
893
894class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
895
896 def find_user_password(self, realm, authuri):
897 user, password = HTTPPasswordMgr.find_user_password(self, realm,
898 authuri)
899 if user is not None:
900 return user, password
901 return HTTPPasswordMgr.find_user_password(self, None, authuri)
902
903
R David Murray4c7f9952015-04-16 16:36:18 -0400904class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
905
906 def __init__(self, *args, **kwargs):
907 self.authenticated = {}
908 super().__init__(*args, **kwargs)
909
910 def add_password(self, realm, uri, user, passwd, is_authenticated=False):
911 self.update_authenticated(uri, is_authenticated)
912 # Add a default for prior auth requests
913 if realm is not None:
914 super().add_password(None, uri, user, passwd)
915 super().add_password(realm, uri, user, passwd)
916
917 def update_authenticated(self, uri, is_authenticated=False):
918 # uri could be a single URI or a sequence
919 if isinstance(uri, str):
920 uri = [uri]
921
922 for default_port in True, False:
923 for u in uri:
924 reduced_uri = self.reduce_uri(u, default_port)
925 self.authenticated[reduced_uri] = is_authenticated
926
927 def is_authenticated(self, authuri):
928 for default_port in True, False:
929 reduced_authuri = self.reduce_uri(authuri, default_port)
930 for uri in self.authenticated:
931 if self.is_suburi(uri, reduced_authuri):
932 return self.authenticated[uri]
933
934
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000935class AbstractBasicAuthHandler:
936
937 # XXX this allows for multiple auth-schemes, but will stupidly pick
938 # the last one with a realm specified.
939
940 # allow for double- and single-quoted realm values
941 # (single quotes are a violation of the RFC, but appear in the wild)
Victor Stinner0b297d42020-04-02 02:52:20 +0200942 rx = re.compile('(?:^|,)' # start of the string or ','
943 '[ \t]*' # optional whitespaces
944 '([^ \t]+)' # scheme like "Basic"
945 '[ \t]+' # mandatory whitespaces
946 # realm=xxx
947 # realm='xxx'
948 # realm="xxx"
949 'realm=(["\']?)([^"\']*)\\2',
950 re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000951
952 # XXX could pre-emptively send auth info already accepted (RFC 2617,
953 # end of section 2, and section 1.2 immediately after "credentials"
954 # production).
955
956 def __init__(self, password_mgr=None):
957 if password_mgr is None:
958 password_mgr = HTTPPasswordMgr()
959 self.passwd = password_mgr
960 self.add_password = self.passwd.add_password
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000961
Victor Stinner0b297d42020-04-02 02:52:20 +0200962 def _parse_realm(self, header):
963 # parse WWW-Authenticate header: accept multiple challenges per header
964 found_challenge = False
965 for mo in AbstractBasicAuthHandler.rx.finditer(header):
966 scheme, quote, realm = mo.groups()
967 if quote not in ['"', "'"]:
968 warnings.warn("Basic Auth Realm was unquoted",
969 UserWarning, 3)
970
971 yield (scheme, realm)
972
973 found_challenge = True
974
975 if not found_challenge:
976 if header:
977 scheme = header.split()[0]
978 else:
979 scheme = ''
980 yield (scheme, None)
981
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000982 def http_error_auth_reqed(self, authreq, host, req, headers):
983 # host may be an authority (without userinfo) or a URL with an
984 # authority
Victor Stinner0b297d42020-04-02 02:52:20 +0200985 headers = headers.get_all(authreq)
986 if not headers:
987 # no header found
988 return
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000989
Victor Stinner0b297d42020-04-02 02:52:20 +0200990 unsupported = None
991 for header in headers:
992 for scheme, realm in self._parse_realm(header):
993 if scheme.lower() != 'basic':
994 unsupported = scheme
995 continue
996
997 if realm is not None:
998 # Use the first matching Basic challenge.
999 # Ignore following challenges even if they use the Basic
1000 # scheme.
1001 return self.retry_http_basic_auth(host, req, realm)
1002
1003 if unsupported is not None:
1004 raise ValueError("AbstractBasicAuthHandler does not "
1005 "support the following scheme: %r"
1006 % (scheme,))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001007
1008 def retry_http_basic_auth(self, host, req, realm):
1009 user, pw = self.passwd.find_user_password(realm, host)
1010 if pw is not None:
1011 raw = "%s:%s" % (user, pw)
1012 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
Senthil Kumaran78373762014-08-20 07:53:58 +05301013 if req.get_header(self.auth_header, None) == auth:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001014 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001015 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001016 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001017 else:
1018 return None
1019
R David Murray4c7f9952015-04-16 16:36:18 -04001020 def http_request(self, req):
1021 if (not hasattr(self.passwd, 'is_authenticated') or
1022 not self.passwd.is_authenticated(req.full_url)):
1023 return req
1024
1025 if not req.has_header('Authorization'):
1026 user, passwd = self.passwd.find_user_password(None, req.full_url)
1027 credentials = '{0}:{1}'.format(user, passwd).encode()
1028 auth_str = base64.standard_b64encode(credentials).decode()
1029 req.add_unredirected_header('Authorization',
1030 'Basic {}'.format(auth_str.strip()))
1031 return req
1032
1033 def http_response(self, req, response):
1034 if hasattr(self.passwd, 'is_authenticated'):
1035 if 200 <= response.code < 300:
1036 self.passwd.update_authenticated(req.full_url, True)
1037 else:
1038 self.passwd.update_authenticated(req.full_url, False)
1039 return response
1040
1041 https_request = http_request
1042 https_response = http_response
1043
1044
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001045
1046class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1047
1048 auth_header = 'Authorization'
1049
1050 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001051 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001052 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001053 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001054 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001055
1056
1057class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1058
1059 auth_header = 'Proxy-authorization'
1060
1061 def http_error_407(self, req, fp, code, msg, headers):
1062 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +00001063 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001064 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1065 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001066 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001067 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001068 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001069 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001070
1071
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001072# Return n random bytes.
1073_randombytes = os.urandom
1074
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001075
1076class AbstractDigestAuthHandler:
1077 # Digest authentication is specified in RFC 2617.
1078
1079 # XXX The client does not inspect the Authentication-Info header
1080 # in a successful response.
1081
1082 # XXX It should be possible to test this implementation against
1083 # a mock server that just generates a static set of challenges.
1084
1085 # XXX qop="auth-int" supports is shaky
1086
1087 def __init__(self, passwd=None):
1088 if passwd is None:
1089 passwd = HTTPPasswordMgr()
1090 self.passwd = passwd
1091 self.add_password = self.passwd.add_password
1092 self.retried = 0
1093 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001094 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001095
1096 def reset_retry_count(self):
1097 self.retried = 0
1098
1099 def http_error_auth_reqed(self, auth_header, host, req, headers):
1100 authreq = headers.get(auth_header, None)
1101 if self.retried > 5:
1102 # Don't fail endlessly - if we failed once, we'll probably
1103 # fail a second time. Hm. Unless the Password Manager is
1104 # prompting for the information. Crap. This isn't great
1105 # but it's better than the current 'repeat until recursion
1106 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001107 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001108 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001109 else:
1110 self.retried += 1
1111 if authreq:
1112 scheme = authreq.split()[0]
1113 if scheme.lower() == 'digest':
1114 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001115 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001116 raise ValueError("AbstractDigestAuthHandler does not support"
1117 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001118
1119 def retry_http_digest_auth(self, req, auth):
1120 token, challenge = auth.split(' ', 1)
1121 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1122 auth = self.get_authorization(req, chal)
1123 if auth:
1124 auth_val = 'Digest %s' % auth
1125 if req.headers.get(self.auth_header, None) == auth_val:
1126 return None
1127 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001128 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001129 return resp
1130
1131 def get_cnonce(self, nonce):
1132 # The cnonce-value is an opaque
1133 # quoted string value provided by the client and used by both client
1134 # and server to avoid chosen plaintext attacks, to provide mutual
1135 # authentication, and to provide some message integrity protection.
1136 # This isn't a fabulous effort, but it's probably Good Enough.
1137 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001138 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001139 dig = hashlib.sha1(b).hexdigest()
1140 return dig[:16]
1141
1142 def get_authorization(self, req, chal):
1143 try:
1144 realm = chal['realm']
1145 nonce = chal['nonce']
1146 qop = chal.get('qop')
1147 algorithm = chal.get('algorithm', 'MD5')
1148 # mod_digest doesn't send an opaque, even though it isn't
1149 # supposed to be optional
1150 opaque = chal.get('opaque', None)
1151 except KeyError:
1152 return None
1153
1154 H, KD = self.get_algorithm_impls(algorithm)
1155 if H is None:
1156 return None
1157
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001158 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001159 if user is None:
1160 return None
1161
1162 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001163 if req.data is not None:
1164 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001165 else:
1166 entdig = None
1167
1168 A1 = "%s:%s:%s" % (user, realm, pw)
1169 A2 = "%s:%s" % (req.get_method(),
1170 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001171 req.selector)
PypeBros14a89c42019-11-23 00:19:08 +01001172 # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1173 # or `auth-int` to the response back. we use `auth` to send the response back.
Stephen Balousek5e260e02020-02-29 13:31:58 -07001174 if qop is None:
1175 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1176 elif 'auth' in qop.split(','):
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001177 if nonce == self.last_nonce:
1178 self.nonce_count += 1
1179 else:
1180 self.nonce_count = 1
1181 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001182 ncvalue = '%08x' % self.nonce_count
1183 cnonce = self.get_cnonce(nonce)
PypeBros14a89c42019-11-23 00:19:08 +01001184 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001185 respdig = KD(H(A1), noncebit)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001186 else:
1187 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001188 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001189
1190 # XXX should the partial digests be encoded too?
1191
1192 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001193 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001194 respdig)
1195 if opaque:
1196 base += ', opaque="%s"' % opaque
1197 if entdig:
1198 base += ', digest="%s"' % entdig
1199 base += ', algorithm="%s"' % algorithm
1200 if qop:
1201 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1202 return base
1203
1204 def get_algorithm_impls(self, algorithm):
1205 # lambdas assume digest modules are imported at the top level
1206 if algorithm == 'MD5':
1207 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1208 elif algorithm == 'SHA':
1209 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1210 # XXX MD5-sess
Berker Peksage88dd1c2016-03-06 16:16:40 +02001211 else:
1212 raise ValueError("Unsupported digest authentication "
1213 "algorithm %r" % algorithm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001214 KD = lambda s, d: H("%s:%s" % (s, d))
1215 return H, KD
1216
1217 def get_entity_digest(self, data, chal):
1218 # XXX not implemented yet
1219 return None
1220
1221
1222class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1223 """An authentication protocol defined by RFC 2069
1224
1225 Digest authentication improves on basic authentication because it
1226 does not transmit passwords in the clear.
1227 """
1228
1229 auth_header = 'Authorization'
1230 handler_order = 490 # before Basic auth
1231
1232 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001233 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001234 retry = self.http_error_auth_reqed('www-authenticate',
1235 host, req, headers)
1236 self.reset_retry_count()
1237 return retry
1238
1239
1240class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1241
1242 auth_header = 'Proxy-Authorization'
1243 handler_order = 490 # before Basic auth
1244
1245 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001246 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001247 retry = self.http_error_auth_reqed('proxy-authenticate',
1248 host, req, headers)
1249 self.reset_retry_count()
1250 return retry
1251
1252class AbstractHTTPHandler(BaseHandler):
1253
1254 def __init__(self, debuglevel=0):
1255 self._debuglevel = debuglevel
1256
1257 def set_http_debuglevel(self, level):
1258 self._debuglevel = level
1259
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001260 def _get_content_length(self, request):
1261 return http.client.HTTPConnection._get_content_length(
1262 request.data,
1263 request.get_method())
1264
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001265 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001266 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001267 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001268 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001269
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001270 if request.data is not None: # POST
1271 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001272 if isinstance(data, str):
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001273 msg = "POST data should be bytes, an iterable of bytes, " \
1274 "or a file object. It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001275 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001276 if not request.has_header('Content-type'):
1277 request.add_unredirected_header(
1278 'Content-type',
1279 'application/x-www-form-urlencoded')
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001280 if (not request.has_header('Content-length')
1281 and not request.has_header('Transfer-encoding')):
1282 content_length = self._get_content_length(request)
1283 if content_length is not None:
1284 request.add_unredirected_header(
1285 'Content-length', str(content_length))
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001286 else:
1287 request.add_unredirected_header(
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001288 'Transfer-encoding', 'chunked')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001289
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001290 sel_host = host
1291 if request.has_proxy():
Cheryl Sabella0250de42018-04-25 16:51:54 -07001292 scheme, sel = _splittype(request.selector)
1293 sel_host, sel_path = _splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001294 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001295 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001296 for name, value in self.parent.addheaders:
1297 name = name.capitalize()
1298 if not request.has_header(name):
1299 request.add_unredirected_header(name, value)
1300
1301 return request
1302
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001303 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001304 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001305
1306 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001307 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001308 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001310 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001311
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001312 # will parse host:port
1313 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran9642eed2016-05-13 01:32:42 -07001314 h.set_debuglevel(self._debuglevel)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001315
1316 headers = dict(req.unredirected_hdrs)
Serhiy Storchaka3f2e6f12018-02-26 16:50:11 +02001317 headers.update({k: v for k, v in req.headers.items()
1318 if k not in headers})
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001319
1320 # TODO(jhylton): Should this be redesigned to handle
1321 # persistent connections?
1322
1323 # We want to make an HTTP/1.1 request, but the addinfourl
1324 # class isn't prepared to deal with a persistent connection.
1325 # It will try to read all remaining data from the socket,
1326 # which will block while the server waits for the next request.
1327 # So make sure the connection gets closed after the (only)
1328 # request.
1329 headers["Connection"] = "close"
Jon Dufresne39726282017-05-18 07:35:54 -07001330 headers = {name.title(): val for name, val in headers.items()}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001331
1332 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001333 tunnel_headers = {}
1334 proxy_auth_hdr = "Proxy-Authorization"
1335 if proxy_auth_hdr in headers:
1336 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1337 # Proxy-Authorization should not be sent to origin
1338 # server.
1339 del headers[proxy_auth_hdr]
1340 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001341
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001342 try:
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001343 try:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001344 h.request(req.get_method(), req.selector, req.data, headers,
1345 encode_chunked=req.has_header('Transfer-encoding'))
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001346 except OSError as err: # timeout error
1347 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001348 r = h.getresponse()
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001349 except:
1350 h.close()
1351 raise
1352
1353 # If the server does not send us a 'Connection: close' header,
1354 # HTTPConnection assumes the socket should be left open. Manually
1355 # mark the socket to be closed when this response object goes away.
1356 if h.sock:
1357 h.sock.close()
1358 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001359
Senthil Kumaran26430412011-04-13 07:01:19 +08001360 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001361 # This line replaces the .msg attribute of the HTTPResponse
1362 # with .headers, because urllib clients expect the response to
1363 # have the reason in .msg. It would be good to mark this
1364 # attribute is deprecated and get then to use info() or
1365 # .headers.
1366 r.msg = r.reason
1367 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001368
1369
1370class HTTPHandler(AbstractHTTPHandler):
1371
1372 def http_open(self, req):
1373 return self.do_open(http.client.HTTPConnection, req)
1374
1375 http_request = AbstractHTTPHandler.do_request_
1376
1377if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001378
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001379 class HTTPSHandler(AbstractHTTPHandler):
1380
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001381 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1382 AbstractHTTPHandler.__init__(self, debuglevel)
1383 self._context = context
1384 self._check_hostname = check_hostname
1385
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001386 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001387 return self.do_open(http.client.HTTPSConnection, req,
1388 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001389
1390 https_request = AbstractHTTPHandler.do_request_
1391
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001392 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001393
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001394class HTTPCookieProcessor(BaseHandler):
1395 def __init__(self, cookiejar=None):
1396 import http.cookiejar
1397 if cookiejar is None:
1398 cookiejar = http.cookiejar.CookieJar()
1399 self.cookiejar = cookiejar
1400
1401 def http_request(self, request):
1402 self.cookiejar.add_cookie_header(request)
1403 return request
1404
1405 def http_response(self, request, response):
1406 self.cookiejar.extract_cookies(response, request)
1407 return response
1408
1409 https_request = http_request
1410 https_response = http_response
1411
1412class UnknownHandler(BaseHandler):
1413 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001414 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001415 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001416
1417def parse_keqv_list(l):
1418 """Parse list of key=value strings where keys are not duplicated."""
1419 parsed = {}
1420 for elt in l:
1421 k, v = elt.split('=', 1)
1422 if v[0] == '"' and v[-1] == '"':
1423 v = v[1:-1]
1424 parsed[k] = v
1425 return parsed
1426
1427def parse_http_list(s):
1428 """Parse lists as described by RFC 2068 Section 2.
1429
1430 In particular, parse comma-separated lists where the elements of
1431 the list may include quoted-strings. A quoted-string could
1432 contain a comma. A non-quoted string could have quotes in the
1433 middle. Neither commas nor quotes count if they are escaped.
1434 Only double-quotes count, not single-quotes.
1435 """
1436 res = []
1437 part = ''
1438
1439 escape = quote = False
1440 for cur in s:
1441 if escape:
1442 part += cur
1443 escape = False
1444 continue
1445 if quote:
1446 if cur == '\\':
1447 escape = True
1448 continue
1449 elif cur == '"':
1450 quote = False
1451 part += cur
1452 continue
1453
1454 if cur == ',':
1455 res.append(part)
1456 part = ''
1457 continue
1458
1459 if cur == '"':
1460 quote = True
1461
1462 part += cur
1463
1464 # append last part
1465 if part:
1466 res.append(part)
1467
1468 return [part.strip() for part in res]
1469
1470class FileHandler(BaseHandler):
1471 # Use local file or FTP depending on form of URL
1472 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001473 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001474 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1475 req.host != 'localhost'):
Senthil Kumaranbc07ac52014-07-22 00:15:20 -07001476 if not req.host in self.get_names():
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001477 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001478 else:
1479 return self.open_local_file(req)
1480
1481 # names for the localhost
1482 names = None
1483 def get_names(self):
1484 if FileHandler.names is None:
1485 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001486 FileHandler.names = tuple(
1487 socket.gethostbyname_ex('localhost')[2] +
1488 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001489 except socket.gaierror:
1490 FileHandler.names = (socket.gethostbyname('localhost'),)
1491 return FileHandler.names
1492
1493 # not entirely sure what the rules are here
1494 def open_local_file(self, req):
1495 import email.utils
1496 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001497 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001498 filename = req.selector
1499 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001500 try:
1501 stats = os.stat(localfile)
1502 size = stats.st_size
1503 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001504 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001505 headers = email.message_from_string(
1506 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1507 (mtype or 'text/plain', size, modified))
1508 if host:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001509 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001510 if not host or \
1511 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001512 if host:
1513 origurl = 'file://' + host + filename
1514 else:
1515 origurl = 'file://' + filename
1516 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001517 except OSError as exp:
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001518 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001519 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001520
1521def _safe_gethostbyname(host):
1522 try:
1523 return socket.gethostbyname(host)
1524 except socket.gaierror:
1525 return None
1526
1527class FTPHandler(BaseHandler):
1528 def ftp_open(self, req):
1529 import ftplib
1530 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001531 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001532 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001533 raise URLError('ftp error: no host given')
Cheryl Sabella0250de42018-04-25 16:51:54 -07001534 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001535 if port is None:
1536 port = ftplib.FTP_PORT
1537 else:
1538 port = int(port)
1539
1540 # username/password handling
Cheryl Sabella0250de42018-04-25 16:51:54 -07001541 user, host = _splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001542 if user:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001543 user, passwd = _splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001544 else:
1545 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001546 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001547 user = user or ''
1548 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001549
1550 try:
1551 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001552 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001553 raise URLError(msg)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001554 path, attrs = _splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001555 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001556 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001557 dirs, file = dirs[:-1], dirs[-1]
1558 if dirs and not dirs[0]:
1559 dirs = dirs[1:]
1560 try:
1561 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1562 type = file and 'I' or 'D'
1563 for attr in attrs:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001564 attr, value = _splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001565 if attr.lower() == 'type' and \
1566 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1567 type = value.upper()
1568 fp, retrlen = fw.retrfile(file, type)
1569 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001570 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001571 if mtype:
1572 headers += "Content-type: %s\n" % mtype
1573 if retrlen is not None and retrlen >= 0:
1574 headers += "Content-length: %d\n" % retrlen
1575 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001576 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001577 except ftplib.all_errors as exp:
1578 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001579 raise exc.with_traceback(sys.exc_info()[2])
1580
1581 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001582 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1583 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001584
1585class CacheFTPHandler(FTPHandler):
1586 # XXX would be nice to have pluggable cache strategies
1587 # XXX this stuff is definitely not thread safe
1588 def __init__(self):
1589 self.cache = {}
1590 self.timeout = {}
1591 self.soonest = 0
1592 self.delay = 60
1593 self.max_conns = 16
1594
1595 def setTimeout(self, t):
1596 self.delay = t
1597
1598 def setMaxConns(self, m):
1599 self.max_conns = m
1600
1601 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1602 key = user, host, port, '/'.join(dirs), timeout
1603 if key in self.cache:
1604 self.timeout[key] = time.time() + self.delay
1605 else:
1606 self.cache[key] = ftpwrapper(user, passwd, host, port,
1607 dirs, timeout)
1608 self.timeout[key] = time.time() + self.delay
1609 self.check_cache()
1610 return self.cache[key]
1611
1612 def check_cache(self):
1613 # first check for old ones
1614 t = time.time()
1615 if self.soonest <= t:
1616 for k, v in list(self.timeout.items()):
1617 if v < t:
1618 self.cache[k].close()
1619 del self.cache[k]
1620 del self.timeout[k]
1621 self.soonest = min(list(self.timeout.values()))
1622
1623 # then check the size
1624 if len(self.cache) == self.max_conns:
1625 for k, v in list(self.timeout.items()):
1626 if v == self.soonest:
1627 del self.cache[k]
1628 del self.timeout[k]
1629 break
1630 self.soonest = min(list(self.timeout.values()))
1631
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001632 def clear_cache(self):
1633 for conn in self.cache.values():
1634 conn.close()
1635 self.cache.clear()
1636 self.timeout.clear()
1637
Antoine Pitroudf204be2012-11-24 17:59:08 +01001638class DataHandler(BaseHandler):
1639 def data_open(self, req):
1640 # data URLs as specified in RFC 2397.
1641 #
1642 # ignores POSTed data
1643 #
1644 # syntax:
1645 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1646 # mediatype := [ type "/" subtype ] *( ";" parameter )
1647 # data := *urlchar
1648 # parameter := attribute "=" value
1649 url = req.full_url
1650
1651 scheme, data = url.split(":",1)
1652 mediatype, data = data.split(",",1)
1653
1654 # even base64 encoded data URLs might be quoted so unquote in any case:
1655 data = unquote_to_bytes(data)
1656 if mediatype.endswith(";base64"):
1657 data = base64.decodebytes(data)
1658 mediatype = mediatype[:-7]
1659
1660 if not mediatype:
1661 mediatype = "text/plain;charset=US-ASCII"
1662
1663 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1664 (mediatype, len(data)))
1665
1666 return addinfourl(io.BytesIO(data), headers, url)
1667
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001668
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001669# Code move from the old urllib module
1670
1671MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1672
1673# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001674if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001675 from nturl2path import url2pathname, pathname2url
1676else:
1677 def url2pathname(pathname):
1678 """OS-specific conversion from a relative URL of the 'file' scheme
1679 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001680 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001681
1682 def pathname2url(pathname):
1683 """OS-specific conversion from a file system path to a relative URL
1684 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001685 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001686
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001687
1688ftpcache = {}
Senthil Kumarana2a9ddd2017-04-08 23:27:25 -07001689
1690
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001691class URLopener:
1692 """Class to open URLs.
1693 This is a class rather than just a subroutine because we may need
1694 more than one set of global protocol-specific options.
1695 Note -- this is a base class for those who don't want the
1696 automatic handling of errors type 302 (relocated) and 401
1697 (authorization needed)."""
1698
1699 __tempfiles = None
1700
1701 version = "Python-urllib/%s" % __version__
1702
1703 # Constructor
1704 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001705 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001706 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1707 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001708 if proxies is None:
1709 proxies = getproxies()
1710 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1711 self.proxies = proxies
1712 self.key_file = x509.get('key_file')
1713 self.cert_file = x509.get('cert_file')
Raymond Hettingerb7f3c942016-09-09 16:44:53 -07001714 self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001715 self.__tempfiles = []
1716 self.__unlink = os.unlink # See cleanup()
1717 self.tempcache = None
1718 # Undocumented feature: if you assign {} to tempcache,
1719 # it is used to cache files retrieved with
1720 # self.retrieve(). This is not enabled by default
1721 # since it does not work for changing documents (and I
1722 # haven't got the logic to check expiration headers
1723 # yet).
1724 self.ftpcache = ftpcache
1725 # Undocumented feature: you can use a different
1726 # ftp cache by assigning to the .ftpcache member;
1727 # in case you want logically independent URL openers
1728 # XXX This is not threadsafe. Bah.
1729
1730 def __del__(self):
1731 self.close()
1732
1733 def close(self):
1734 self.cleanup()
1735
1736 def cleanup(self):
1737 # This code sometimes runs when the rest of this module
1738 # has already been deleted, so it can't use any globals
1739 # or import anything.
1740 if self.__tempfiles:
1741 for file in self.__tempfiles:
1742 try:
1743 self.__unlink(file)
1744 except OSError:
1745 pass
1746 del self.__tempfiles[:]
1747 if self.tempcache:
1748 self.tempcache.clear()
1749
1750 def addheader(self, *args):
1751 """Add a header to be used by the HTTP interface only
1752 e.g. u.addheader('Accept', 'sound/basic')"""
1753 self.addheaders.append(args)
1754
1755 # External interface
1756 def open(self, fullurl, data=None):
1757 """Use URLopener().open(file) instead of open(file, 'r')."""
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001758 fullurl = unwrap(_to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001759 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001760 if self.tempcache and fullurl in self.tempcache:
1761 filename, headers = self.tempcache[fullurl]
1762 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001763 return addinfourl(fp, headers, fullurl)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001764 urltype, url = _splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001765 if not urltype:
1766 urltype = 'file'
1767 if urltype in self.proxies:
1768 proxy = self.proxies[urltype]
Cheryl Sabella0250de42018-04-25 16:51:54 -07001769 urltype, proxyhost = _splittype(proxy)
1770 host, selector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001771 url = (host, fullurl) # Signal special case to open_*()
1772 else:
1773 proxy = None
1774 name = 'open_' + urltype
1775 self.type = urltype
1776 name = name.replace('-', '_')
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001777 if not hasattr(self, name) or name == 'open_local_file':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001778 if proxy:
1779 return self.open_unknown_proxy(proxy, fullurl, data)
1780 else:
1781 return self.open_unknown(fullurl, data)
1782 try:
1783 if data is None:
1784 return getattr(self, name)(url)
1785 else:
1786 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001787 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001788 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001789 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001790 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001791
1792 def open_unknown(self, fullurl, data=None):
1793 """Overridable interface to open unknown URL type."""
Cheryl Sabella0250de42018-04-25 16:51:54 -07001794 type, url = _splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001795 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001796
1797 def open_unknown_proxy(self, proxy, fullurl, data=None):
1798 """Overridable interface to open unknown URL type."""
Cheryl Sabella0250de42018-04-25 16:51:54 -07001799 type, url = _splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001800 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001801
1802 # External interface
1803 def retrieve(self, url, filename=None, reporthook=None, data=None):
1804 """retrieve(url) returns (filename, headers) for a local object
1805 or (tempfilename, headers) for a remote object."""
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001806 url = unwrap(_to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001807 if self.tempcache and url in self.tempcache:
1808 return self.tempcache[url]
Cheryl Sabella0250de42018-04-25 16:51:54 -07001809 type, url1 = _splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001810 if filename is None and (not type or type == 'file'):
1811 try:
1812 fp = self.open_local_file(url1)
1813 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001814 fp.close()
Xtreakc661b302019-05-19 19:10:06 +05301815 return url2pathname(_splithost(url1)[1]), hdrs
Pablo Galindo293dd232019-11-19 21:34:03 +00001816 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001817 pass
1818 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001819 try:
1820 headers = fp.info()
1821 if filename:
1822 tfp = open(filename, 'wb')
1823 else:
Xtreakc661b302019-05-19 19:10:06 +05301824 garbage, path = _splittype(url)
1825 garbage, path = _splithost(path or "")
1826 path, garbage = _splitquery(path or "")
1827 path, garbage = _splitattr(path or "")
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001828 suffix = os.path.splitext(path)[1]
1829 (fd, filename) = tempfile.mkstemp(suffix)
1830 self.__tempfiles.append(filename)
1831 tfp = os.fdopen(fd, 'wb')
1832 try:
1833 result = filename, headers
1834 if self.tempcache is not None:
1835 self.tempcache[url] = result
1836 bs = 1024*8
1837 size = -1
1838 read = 0
1839 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001840 if "content-length" in headers:
1841 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001842 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001843 reporthook(blocknum, bs, size)
1844 while 1:
1845 block = fp.read(bs)
1846 if not block:
1847 break
1848 read += len(block)
1849 tfp.write(block)
1850 blocknum += 1
1851 if reporthook:
1852 reporthook(blocknum, bs, size)
1853 finally:
1854 tfp.close()
1855 finally:
1856 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001857
1858 # raise exception if actual size does not match content-length header
1859 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001860 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001861 "retrieval incomplete: got only %i out of %i bytes"
1862 % (read, size), result)
1863
1864 return result
1865
1866 # Each method named open_<type> knows how to open that type of URL
1867
1868 def _open_generic_http(self, connection_factory, url, data):
1869 """Make an HTTP connection using connection_class.
1870
1871 This is an internal method that should be called from
1872 open_http() or open_https().
1873
1874 Arguments:
1875 - connection_factory should take a host name and return an
1876 HTTPConnection instance.
1877 - url is the url to retrieval or a host, relative-path pair.
1878 - data is payload for a POST request or None.
1879 """
1880
1881 user_passwd = None
1882 proxy_passwd= None
1883 if isinstance(url, str):
Cheryl Sabella0250de42018-04-25 16:51:54 -07001884 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001885 if host:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001886 user_passwd, host = _splituser(host)
Georg Brandl13e89462008-07-01 19:56:00 +00001887 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001888 realhost = host
1889 else:
1890 host, selector = url
1891 # check whether the proxy contains authorization information
Cheryl Sabella0250de42018-04-25 16:51:54 -07001892 proxy_passwd, host = _splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001893 # now we proceed with the url we want to obtain
Cheryl Sabella0250de42018-04-25 16:51:54 -07001894 urltype, rest = _splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001895 url = rest
1896 user_passwd = None
1897 if urltype.lower() != 'http':
1898 realhost = None
1899 else:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001900 realhost, rest = _splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001901 if realhost:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001902 user_passwd, realhost = _splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001903 if user_passwd:
1904 selector = "%s://%s%s" % (urltype, realhost, rest)
1905 if proxy_bypass(realhost):
1906 host = realhost
1907
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001908 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001909
1910 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001911 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001912 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001913 else:
1914 proxy_auth = None
1915
1916 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001917 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001918 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001919 else:
1920 auth = None
1921 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001922 headers = {}
1923 if proxy_auth:
1924 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1925 if auth:
1926 headers["Authorization"] = "Basic %s" % auth
1927 if realhost:
1928 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001929
1930 # Add Connection:close as we don't support persistent connections yet.
1931 # This helps in closing the socket and avoiding ResourceWarning
1932
1933 headers["Connection"] = "close"
1934
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001935 for header, value in self.addheaders:
1936 headers[header] = value
1937
1938 if data is not None:
1939 headers["Content-Type"] = "application/x-www-form-urlencoded"
1940 http_conn.request("POST", selector, data, headers)
1941 else:
1942 http_conn.request("GET", selector, headers=headers)
1943
1944 try:
1945 response = http_conn.getresponse()
1946 except http.client.BadStatusLine:
1947 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001948 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001949
1950 # According to RFC 2616, "2xx" code indicates that the client's
1951 # request was successfully received, understood, and accepted.
1952 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001953 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001954 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001955 else:
1956 return self.http_error(
1957 url, response.fp,
1958 response.status, response.reason, response.msg, data)
1959
1960 def open_http(self, url, data=None):
1961 """Use HTTP protocol."""
1962 return self._open_generic_http(http.client.HTTPConnection, url, data)
1963
1964 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1965 """Handle http errors.
1966
1967 Derived class can override this, or provide specific handlers
1968 named http_error_DDD where DDD is the 3-digit error code."""
1969 # First check if there's a specific handler for this error
1970 name = 'http_error_%d' % errcode
1971 if hasattr(self, name):
1972 method = getattr(self, name)
1973 if data is None:
1974 result = method(url, fp, errcode, errmsg, headers)
1975 else:
1976 result = method(url, fp, errcode, errmsg, headers, data)
1977 if result: return result
1978 return self.http_error_default(url, fp, errcode, errmsg, headers)
1979
1980 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001981 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001982 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001983 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001984
1985 if _have_ssl:
1986 def _https_connection(self, host):
1987 return http.client.HTTPSConnection(host,
1988 key_file=self.key_file,
1989 cert_file=self.cert_file)
1990
1991 def open_https(self, url, data=None):
1992 """Use HTTPS protocol."""
1993 return self._open_generic_http(self._https_connection, url, data)
1994
1995 def open_file(self, url):
1996 """Use local file or FTP depending on form of URL."""
1997 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001998 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001999 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00002000 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002001 else:
2002 return self.open_local_file(url)
2003
2004 def open_local_file(self, url):
2005 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08002006 import email.utils
2007 import mimetypes
Cheryl Sabella0250de42018-04-25 16:51:54 -07002008 host, file = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002009 localname = url2pathname(file)
2010 try:
2011 stats = os.stat(localname)
2012 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07002013 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002014 size = stats.st_size
2015 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
2016 mtype = mimetypes.guess_type(url)[0]
2017 headers = email.message_from_string(
2018 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
2019 (mtype or 'text/plain', size, modified))
2020 if not host:
2021 urlfile = file
2022 if file[:1] == '/':
2023 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00002024 return addinfourl(open(localname, 'rb'), headers, urlfile)
Cheryl Sabella0250de42018-04-25 16:51:54 -07002025 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002026 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07002027 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002028 urlfile = file
2029 if file[:1] == '/':
2030 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08002031 elif file[:2] == './':
2032 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00002033 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002034 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002035
2036 def open_ftp(self, url):
2037 """Use FTP protocol."""
2038 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002039 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002040 import mimetypes
Cheryl Sabella0250de42018-04-25 16:51:54 -07002041 host, path = _splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002042 if not host: raise URLError('ftp error: no host given')
Cheryl Sabella0250de42018-04-25 16:51:54 -07002043 host, port = _splitport(host)
2044 user, host = _splituser(host)
2045 if user: user, passwd = _splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002046 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00002047 host = unquote(host)
2048 user = unquote(user or '')
2049 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002050 host = socket.gethostbyname(host)
2051 if not port:
2052 import ftplib
2053 port = ftplib.FTP_PORT
2054 else:
2055 port = int(port)
Cheryl Sabella0250de42018-04-25 16:51:54 -07002056 path, attrs = _splitattr(path)
Georg Brandl13e89462008-07-01 19:56:00 +00002057 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002058 dirs = path.split('/')
2059 dirs, file = dirs[:-1], dirs[-1]
2060 if dirs and not dirs[0]: dirs = dirs[1:]
2061 if dirs and not dirs[0]: dirs[0] = '/'
2062 key = user, host, port, '/'.join(dirs)
2063 # XXX thread unsafe!
2064 if len(self.ftpcache) > MAXFTPCACHE:
2065 # Prune the cache, rather arbitrarily
Benjamin Peterson3c2dca62014-06-07 15:08:04 -07002066 for k in list(self.ftpcache):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002067 if k != key:
2068 v = self.ftpcache[k]
2069 del self.ftpcache[k]
2070 v.close()
2071 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002072 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002073 self.ftpcache[key] = \
2074 ftpwrapper(user, passwd, host, port, dirs)
2075 if not file: type = 'D'
2076 else: type = 'I'
2077 for attr in attrs:
Cheryl Sabella0250de42018-04-25 16:51:54 -07002078 attr, value = _splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002079 if attr.lower() == 'type' and \
2080 value in ('a', 'A', 'i', 'I', 'd', 'D'):
2081 type = value.upper()
2082 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2083 mtype = mimetypes.guess_type("ftp:" + url)[0]
2084 headers = ""
2085 if mtype:
2086 headers += "Content-Type: %s\n" % mtype
2087 if retrlen is not None and retrlen >= 0:
2088 headers += "Content-Length: %d\n" % retrlen
2089 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00002090 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002091 except ftperrors() as exp:
2092 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002093
2094 def open_data(self, url, data=None):
2095 """Use "data" URL."""
2096 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002097 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002098 # ignore POSTed data
2099 #
2100 # syntax of data URLs:
2101 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2102 # mediatype := [ type "/" subtype ] *( ";" parameter )
2103 # data := *urlchar
2104 # parameter := attribute "=" value
2105 try:
2106 [type, data] = url.split(',', 1)
2107 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002108 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002109 if not type:
2110 type = 'text/plain;charset=US-ASCII'
2111 semi = type.rfind(';')
2112 if semi >= 0 and '=' not in type[semi:]:
2113 encoding = type[semi+1:]
2114 type = type[:semi]
2115 else:
2116 encoding = ''
2117 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002118 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002119 time.gmtime(time.time())))
2120 msg.append('Content-type: %s' % type)
2121 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002122 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002123 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002124 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002125 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002126 msg.append('Content-Length: %d' % len(data))
2127 msg.append('')
2128 msg.append(data)
2129 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002130 headers = email.message_from_string(msg)
2131 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002132 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002133 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002134
2135
2136class FancyURLopener(URLopener):
2137 """Derived class with handlers for errors we can handle (perhaps)."""
2138
2139 def __init__(self, *args, **kwargs):
2140 URLopener.__init__(self, *args, **kwargs)
2141 self.auth_cache = {}
2142 self.tries = 0
2143 self.maxtries = 10
2144
2145 def http_error_default(self, url, fp, errcode, errmsg, headers):
2146 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002147 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002148
2149 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2150 """Error 302 -- relocated (temporarily)."""
2151 self.tries += 1
Martin Pantera0370222016-02-04 06:01:35 +00002152 try:
2153 if self.maxtries and self.tries >= self.maxtries:
2154 if hasattr(self, "http_error_500"):
2155 meth = self.http_error_500
2156 else:
2157 meth = self.http_error_default
2158 return meth(url, fp, 500,
2159 "Internal Server Error: Redirect Recursion",
2160 headers)
2161 result = self.redirect_internal(url, fp, errcode, errmsg,
2162 headers, data)
2163 return result
2164 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002165 self.tries = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002166
2167 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2168 if 'location' in headers:
2169 newurl = headers['location']
2170 elif 'uri' in headers:
2171 newurl = headers['uri']
2172 else:
2173 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002174 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002175
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002176 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002177 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002178
2179 urlparts = urlparse(newurl)
2180
2181 # For security reasons, we don't allow redirection to anything other
2182 # than http, https and ftp.
2183
2184 # We are using newer HTTPError with older redirect_internal method
2185 # This older method will get deprecated in 3.3
2186
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002187 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002188 raise HTTPError(newurl, errcode,
2189 errmsg +
2190 " Redirection to url '%s' is not allowed." % newurl,
2191 headers, fp)
2192
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002193 return self.open(newurl)
2194
2195 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2196 """Error 301 -- also relocated (permanently)."""
2197 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2198
2199 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2200 """Error 303 -- also relocated (essentially identical to 302)."""
2201 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2202
2203 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2204 """Error 307 -- relocated, but turn POST into error."""
2205 if data is None:
2206 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2207 else:
2208 return self.http_error_default(url, fp, errcode, errmsg, headers)
2209
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002210 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2211 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002212 """Error 401 -- authentication required.
2213 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002214 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002215 URLopener.http_error_default(self, url, fp,
2216 errcode, errmsg, headers)
2217 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002218 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2219 if not match:
2220 URLopener.http_error_default(self, url, fp,
2221 errcode, errmsg, headers)
2222 scheme, realm = match.groups()
2223 if scheme.lower() != 'basic':
2224 URLopener.http_error_default(self, url, fp,
2225 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002226 if not retry:
2227 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2228 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002229 name = 'retry_' + self.type + '_basic_auth'
2230 if data is None:
2231 return getattr(self,name)(url, realm)
2232 else:
2233 return getattr(self,name)(url, realm, data)
2234
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002235 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2236 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002237 """Error 407 -- proxy authentication required.
2238 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002239 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002240 URLopener.http_error_default(self, url, fp,
2241 errcode, errmsg, headers)
2242 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002243 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2244 if not match:
2245 URLopener.http_error_default(self, url, fp,
2246 errcode, errmsg, headers)
2247 scheme, realm = match.groups()
2248 if scheme.lower() != 'basic':
2249 URLopener.http_error_default(self, url, fp,
2250 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002251 if not retry:
2252 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2253 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002254 name = 'retry_proxy_' + self.type + '_basic_auth'
2255 if data is None:
2256 return getattr(self,name)(url, realm)
2257 else:
2258 return getattr(self,name)(url, realm, data)
2259
2260 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002261 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002262 newurl = 'http://' + host + selector
2263 proxy = self.proxies['http']
Cheryl Sabella0250de42018-04-25 16:51:54 -07002264 urltype, proxyhost = _splittype(proxy)
2265 proxyhost, proxyselector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002266 i = proxyhost.find('@') + 1
2267 proxyhost = proxyhost[i:]
2268 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2269 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002270 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002271 quote(passwd, safe=''), proxyhost)
2272 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2273 if data is None:
2274 return self.open(newurl)
2275 else:
2276 return self.open(newurl, data)
2277
2278 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002279 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002280 newurl = 'https://' + host + selector
2281 proxy = self.proxies['https']
Cheryl Sabella0250de42018-04-25 16:51:54 -07002282 urltype, proxyhost = _splittype(proxy)
2283 proxyhost, proxyselector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002284 i = proxyhost.find('@') + 1
2285 proxyhost = proxyhost[i:]
2286 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2287 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002288 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002289 quote(passwd, safe=''), proxyhost)
2290 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2291 if data is None:
2292 return self.open(newurl)
2293 else:
2294 return self.open(newurl, data)
2295
2296 def retry_http_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002297 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002298 i = host.find('@') + 1
2299 host = host[i:]
2300 user, passwd = self.get_user_passwd(host, realm, i)
2301 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002302 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002303 quote(passwd, safe=''), host)
2304 newurl = 'http://' + host + selector
2305 if data is None:
2306 return self.open(newurl)
2307 else:
2308 return self.open(newurl, data)
2309
2310 def retry_https_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002311 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002312 i = host.find('@') + 1
2313 host = host[i:]
2314 user, passwd = self.get_user_passwd(host, realm, i)
2315 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002316 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002317 quote(passwd, safe=''), host)
2318 newurl = 'https://' + host + selector
2319 if data is None:
2320 return self.open(newurl)
2321 else:
2322 return self.open(newurl, data)
2323
Florent Xicluna757445b2010-05-17 17:24:07 +00002324 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002325 key = realm + '@' + host.lower()
2326 if key in self.auth_cache:
2327 if clear_cache:
2328 del self.auth_cache[key]
2329 else:
2330 return self.auth_cache[key]
2331 user, passwd = self.prompt_user_passwd(host, realm)
2332 if user or passwd: self.auth_cache[key] = (user, passwd)
2333 return user, passwd
2334
2335 def prompt_user_passwd(self, host, realm):
2336 """Override this in a GUI environment!"""
2337 import getpass
2338 try:
2339 user = input("Enter username for %s at %s: " % (realm, host))
2340 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2341 (user, realm, host))
2342 return user, passwd
2343 except KeyboardInterrupt:
2344 print()
2345 return None, None
2346
2347
2348# Utility functions
2349
2350_localhost = None
2351def localhost():
2352 """Return the IP address of the magic hostname 'localhost'."""
2353 global _localhost
2354 if _localhost is None:
2355 _localhost = socket.gethostbyname('localhost')
2356 return _localhost
2357
2358_thishost = None
2359def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002360 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002361 global _thishost
2362 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002363 try:
2364 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2365 except socket.gaierror:
2366 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002367 return _thishost
2368
2369_ftperrors = None
2370def ftperrors():
2371 """Return the set of errors raised by the FTP class."""
2372 global _ftperrors
2373 if _ftperrors is None:
2374 import ftplib
2375 _ftperrors = ftplib.all_errors
2376 return _ftperrors
2377
2378_noheaders = None
2379def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002380 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002381 global _noheaders
2382 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002383 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002384 return _noheaders
2385
2386
2387# Utility classes
2388
2389class ftpwrapper:
2390 """Class used by open_ftp() for cache of open FTP connections."""
2391
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002392 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2393 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002394 self.user = user
2395 self.passwd = passwd
2396 self.host = host
2397 self.port = port
2398 self.dirs = dirs
2399 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002400 self.refcount = 0
2401 self.keepalive = persistent
Victor Stinnerab73e652015-04-07 12:49:27 +02002402 try:
2403 self.init()
2404 except:
2405 self.close()
2406 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002407
2408 def init(self):
2409 import ftplib
2410 self.busy = 0
2411 self.ftp = ftplib.FTP()
2412 self.ftp.connect(self.host, self.port, self.timeout)
2413 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002414 _target = '/'.join(self.dirs)
2415 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002416
2417 def retrfile(self, file, type):
2418 import ftplib
2419 self.endtransfer()
2420 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2421 else: cmd = 'TYPE ' + type; isdir = 0
2422 try:
2423 self.ftp.voidcmd(cmd)
2424 except ftplib.all_errors:
2425 self.init()
2426 self.ftp.voidcmd(cmd)
2427 conn = None
2428 if file and not isdir:
2429 # Try to retrieve as a file
2430 try:
2431 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002432 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002433 except ftplib.error_perm as reason:
2434 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002435 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002436 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002437 if not conn:
2438 # Set transfer mode to ASCII!
2439 self.ftp.voidcmd('TYPE A')
2440 # Try a directory listing. Verify that directory exists.
2441 if file:
2442 pwd = self.ftp.pwd()
2443 try:
2444 try:
2445 self.ftp.cwd(file)
2446 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002447 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002448 finally:
2449 self.ftp.cwd(pwd)
2450 cmd = 'LIST ' + file
2451 else:
2452 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002453 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002454 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002455
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002456 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2457 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002458 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002459 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002460 return (ftpobj, retrlen)
2461
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002462 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002464
2465 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002466 self.keepalive = False
2467 if self.refcount <= 0:
2468 self.real_close()
2469
2470 def file_close(self):
2471 self.endtransfer()
2472 self.refcount -= 1
2473 if self.refcount <= 0 and not self.keepalive:
2474 self.real_close()
2475
2476 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002477 self.endtransfer()
2478 try:
2479 self.ftp.close()
2480 except ftperrors():
2481 pass
2482
2483# Proxy handling
2484def getproxies_environment():
2485 """Return a dictionary of scheme -> proxy server URL mappings.
2486
2487 Scan the environment for variables named <scheme>_proxy;
2488 this seems to be the standard convention. If you need a
2489 different way, you can pass a proxies dictionary to the
2490 [Fancy]URLopener constructor.
2491
2492 """
2493 proxies = {}
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002494 # in order to prefer lowercase variables, process environment in
2495 # two passes: first matches any, second pass matches lowercase only
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002496 for name, value in os.environ.items():
2497 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002498 if value and name[-6:] == '_proxy':
2499 proxies[name[:-6]] = value
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002500 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2501 # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2502 # header from the client
Senthil Kumaran17742f22016-07-30 23:39:06 -07002503 # If "proxy" is lowercase, it will still be used thanks to the next block
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002504 if 'REQUEST_METHOD' in os.environ:
2505 proxies.pop('http', None)
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002506 for name, value in os.environ.items():
2507 if name[-6:] == '_proxy':
2508 name = name.lower()
2509 if value:
2510 proxies[name[:-6]] = value
2511 else:
2512 proxies.pop(name[:-6], None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002513 return proxies
2514
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002515def proxy_bypass_environment(host, proxies=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002516 """Test if proxies should not be used for a particular host.
2517
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002518 Checks the proxy dict for the value of no_proxy, which should
2519 be a list of comma separated DNS suffixes, or '*' for all hosts.
2520
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002521 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002522 if proxies is None:
2523 proxies = getproxies_environment()
2524 # don't bypass, if no_proxy isn't specified
2525 try:
2526 no_proxy = proxies['no']
2527 except KeyError:
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002528 return False
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002529 # '*' is special case for always bypass
2530 if no_proxy == '*':
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002531 return True
2532 host = host.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002533 # strip port off host
Cheryl Sabella0250de42018-04-25 16:51:54 -07002534 hostonly, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002535 # check if the host ends with any of the DNS suffixes
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002536 for name in no_proxy.split(','):
2537 name = name.strip()
Martin Panteraa279822016-04-30 01:03:40 +00002538 if name:
Xiang Zhang959ff7f2017-01-09 11:47:55 +08002539 name = name.lstrip('.') # ignore leading dots
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002540 name = name.lower()
2541 if hostonly == name or host == name:
2542 return True
2543 name = '.' + name
2544 if hostonly.endswith(name) or host.endswith(name):
2545 return True
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002546 # otherwise, don't bypass
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002547 return False
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002548
2549
Ronald Oussorene72e1612011-03-14 18:15:25 -04002550# This code tests an OSX specific data structure but is testable on all
2551# platforms
2552def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2553 """
2554 Return True iff this host shouldn't be accessed using a proxy
2555
2556 This function uses the MacOSX framework SystemConfiguration
2557 to fetch the proxy information.
2558
2559 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2560 { 'exclude_simple': bool,
2561 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2562 }
2563 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002564 from fnmatch import fnmatch
2565
Cheryl Sabella0250de42018-04-25 16:51:54 -07002566 hostonly, port = _splitport(host)
Ronald Oussorene72e1612011-03-14 18:15:25 -04002567
2568 def ip2num(ipAddr):
2569 parts = ipAddr.split('.')
2570 parts = list(map(int, parts))
2571 if len(parts) != 4:
2572 parts = (parts + [0, 0, 0, 0])[:4]
2573 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2574
2575 # Check for simple host names:
2576 if '.' not in host:
2577 if proxy_settings['exclude_simple']:
2578 return True
2579
2580 hostIP = None
2581
2582 for value in proxy_settings.get('exceptions', ()):
2583 # Items in the list are strings like these: *.local, 169.254/16
2584 if not value: continue
2585
2586 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2587 if m is not None:
2588 if hostIP is None:
2589 try:
2590 hostIP = socket.gethostbyname(hostonly)
2591 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002592 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002593 continue
2594
2595 base = ip2num(m.group(1))
2596 mask = m.group(2)
2597 if mask is None:
2598 mask = 8 * (m.group(1).count('.') + 1)
2599 else:
2600 mask = int(mask[1:])
Ronald Oussoren93a1cca2020-10-19 20:16:21 +02002601
2602 if mask < 0 or mask > 32:
2603 # System libraries ignore invalid prefix lengths
2604 continue
2605
Ronald Oussorene72e1612011-03-14 18:15:25 -04002606 mask = 32 - mask
2607
2608 if (hostIP >> mask) == (base >> mask):
2609 return True
2610
2611 elif fnmatch(host, value):
2612 return True
2613
2614 return False
2615
2616
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002617if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002618 from _scproxy import _get_proxy_settings, _get_proxies
2619
2620 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002621 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002622 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002623
2624 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002625 """Return a dictionary of scheme -> proxy server URL mappings.
2626
Ronald Oussoren84151202010-04-18 20:46:11 +00002627 This function uses the MacOSX framework SystemConfiguration
2628 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002629 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002630 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002631
Ronald Oussoren84151202010-04-18 20:46:11 +00002632
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002633
2634 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002635 """Return True, if host should be bypassed.
2636
2637 Checks proxy settings gathered from the environment, if specified,
2638 or from the MacOSX framework SystemConfiguration.
2639
2640 """
2641 proxies = getproxies_environment()
2642 if proxies:
2643 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002644 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002645 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002646
2647 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002648 return getproxies_environment() or getproxies_macosx_sysconf()
2649
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002650
2651elif os.name == 'nt':
2652 def getproxies_registry():
2653 """Return a dictionary of scheme -> proxy server URL mappings.
2654
2655 Win32 uses the registry to store proxies.
2656
2657 """
2658 proxies = {}
2659 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002660 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002661 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002662 # Std module, so should be around - but you never know!
2663 return proxies
2664 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002665 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002666 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002667 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002668 'ProxyEnable')[0]
2669 if proxyEnable:
2670 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002671 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002672 'ProxyServer')[0])
2673 if '=' in proxyServer:
2674 # Per-protocol settings
2675 for p in proxyServer.split(';'):
2676 protocol, address = p.split('=', 1)
2677 # See if address has a type:// prefix
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002678 if not re.match('(?:[^/:]+)://', address):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002679 address = '%s://%s' % (protocol, address)
2680 proxies[protocol] = address
2681 else:
2682 # Use one setting for all protocols
2683 if proxyServer[:5] == 'http:':
2684 proxies['http'] = proxyServer
2685 else:
2686 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002687 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002688 proxies['ftp'] = 'ftp://%s' % proxyServer
2689 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002690 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002691 # Either registry key not found etc, or the value in an
2692 # unexpected format.
2693 # proxies already set up to be empty so nothing to do
2694 pass
2695 return proxies
2696
2697 def getproxies():
2698 """Return a dictionary of scheme -> proxy server URL mappings.
2699
2700 Returns settings gathered from the environment, if specified,
2701 or the registry.
2702
2703 """
2704 return getproxies_environment() or getproxies_registry()
2705
2706 def proxy_bypass_registry(host):
2707 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002708 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002709 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002710 # Std modules, so should be around - but you never know!
2711 return 0
2712 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002713 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002714 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002715 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002716 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002717 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002718 'ProxyOverride')[0])
2719 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002720 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002721 return 0
2722 if not proxyEnable or not proxyOverride:
2723 return 0
2724 # try to make a host list from name and IP address.
Cheryl Sabella0250de42018-04-25 16:51:54 -07002725 rawHost, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002726 host = [rawHost]
2727 try:
2728 addr = socket.gethostbyname(rawHost)
2729 if addr != rawHost:
2730 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002731 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002732 pass
2733 try:
2734 fqdn = socket.getfqdn(rawHost)
2735 if fqdn != rawHost:
2736 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002737 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002738 pass
2739 # make a check value list from the registry entry: replace the
2740 # '<local>' string by the localhost entry and the corresponding
2741 # canonical entry.
2742 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002743 # now check if we match one of the registry values.
2744 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002745 if test == '<local>':
2746 if '.' not in rawHost:
2747 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002748 test = test.replace(".", r"\.") # mask dots
2749 test = test.replace("*", r".*") # change glob sequence
2750 test = test.replace("?", r".") # change glob char
2751 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002752 if re.match(test, val, re.I):
2753 return 1
2754 return 0
2755
2756 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002757 """Return True, if host should be bypassed.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002758
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002759 Checks proxy settings gathered from the environment, if specified,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002760 or the registry.
2761
2762 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002763 proxies = getproxies_environment()
2764 if proxies:
2765 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002766 else:
2767 return proxy_bypass_registry(host)
2768
2769else:
2770 # By default use environment variables
2771 getproxies = getproxies_environment
2772 proxy_bypass = proxy_bypass_environment