blob: eca6cc350161f2038f99f88b1bf9a0c8cc834bb9 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020021OSError); for HTTP errors, raises an HTTPError, which can also be
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers. Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
Senthil Kumaran1107c5d2009-11-15 06:20:55 +000033
Senthil Kumaran47fff872009-12-20 07:10:31 +000034OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036
37Request -- An object that encapsulates the state of a request. The
38state can be as simple as the URL. It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
Georg Brandl029986a2008-06-23 11:44:14 +000049import urllib.request
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# set up authentication info
Georg Brandl029986a2008-06-23 11:44:14 +000052authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
Georg Brandl029986a2008-06-23 11:44:14 +000058proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059
60# build a new opener that adds authentication and caching FTP handlers
Georg Brandl029986a2008-06-23 11:44:14 +000061opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063
64# install it
Georg Brandl029986a2008-06-23 11:44:14 +000065urllib.request.install_opener(opener)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000066
Miss Islington (bot)f7f1c262021-07-30 07:25:28 -070067f = urllib.request.urlopen('https://www.python.org/')
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled? The client needs to know the HTTP error code. But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +000085import bisect
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092import re
93import socket
Martin Pantere6f06092016-05-16 01:14:20 +000094import string
Jeremy Hylton1afc1692008-06-18 20:49:58 +000095import sys
96import time
Senthil Kumarane24f96a2012-03-13 19:29:33 -070097import tempfile
98import contextlib
Senthil Kumaran38b968b92012-03-14 13:43:53 -070099import warnings
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700100
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101
Georg Brandl13e89462008-07-01 19:56:00 +0000102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
Rémi Lapeyre674ee122019-05-27 15:43:45 +0200104 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
Cheryl Sabella0250de42018-04-25 16:51:54 -0700105 _splittype, _splithost, _splitport, _splituser, _splitpasswd,
106 _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100107 unquote_to_bytes, urlunparse)
Georg Brandl13e89462008-07-01 19:56:00 +0000108from urllib.response import addinfourl, addclosehook
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109
110# check for SSL
111try:
112 import ssl
Brett Cannoncd171c82013-07-04 17:43:24 -0400113except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 _have_ssl = False
115else:
116 _have_ssl = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800118__all__ = [
119 # Classes
120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
R David Murray4c7f9952015-04-16 16:36:18 -0400123 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
Senthil Kumaran6c5bd402011-11-01 23:20:31 +0800127 'UnknownHandler', 'HTTPErrorProcessor',
128 # Functions
129 'urlopen', 'install_opener', 'build_opener',
130 'pathname2url', 'url2pathname', 'getproxies',
131 # Legacy interface
132 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133]
134
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135# used in User-Agent header sent
Serhiy Storchaka885bdc42016-02-11 13:10:36 +0200136__version__ = '%d.%d' % sys.version_info[:2]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000137
138_opener = None
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000139def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800140 *, cafile=None, capath=None, cadefault=False, context=None):
Raymond Hettinger507343a2015-08-18 00:35:52 -0700141 '''Open the URL url, which can be either a string or a Request object.
142
Martin Panter3c0d0ba2016-08-24 06:33:33 +0000143 *data* must be an object specifying additional data to be sent to
144 the server, or None if no such data is needed. See Request for
145 details.
Raymond Hettinger507343a2015-08-18 00:35:52 -0700146
147 urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148 header in its HTTP requests.
149
150 The optional *timeout* parameter specifies a timeout in seconds for
151 blocking operations like the connection attempt (if not specified, the
152 global default timeout setting will be used). This only works for HTTP,
153 HTTPS and FTP connections.
154
155 If *context* is specified, it must be a ssl.SSLContext instance describing
156 the various SSL options. See HTTPSConnection for more details.
157
158 The optional *cafile* and *capath* parameters specify a set of trusted CA
159 certificates for HTTPS requests. cafile should point to a single file
160 containing a bundle of CA certificates, whereas capath should point to a
161 directory of hashed certificate files. More information can be found in
162 ssl.SSLContext.load_verify_locations().
163
164 The *cadefault* parameter is ignored.
165
Raymond Hettinger507343a2015-08-18 00:35:52 -0700166
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700167 This function always returns an object which can work as a
168 context manager and has the properties url, headers, and status.
169 See urllib.response.addinfourl for more detail on these properties.
Raymond Hettinger507343a2015-08-18 00:35:52 -0700170
Martin Panter29f256902016-06-04 05:06:34 +0000171 For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
172 object slightly modified. In addition to the three new methods above, the
173 msg attribute contains the same information as the reason attribute ---
174 the reason phrase returned by the server --- instead of the response
175 headers as it is specified in the documentation for HTTPResponse.
R David Murrayd2367c62016-06-03 20:16:06 -0400176
Martin Panter29f256902016-06-04 05:06:34 +0000177 For FTP, file, and data URLs and requests explicitly handled by legacy
178 URLopener and FancyURLopener classes, this function returns a
179 urllib.response.addinfourl object.
180
181 Note that None may be returned if no handler handles the request (though
Raymond Hettinger507343a2015-08-18 00:35:52 -0700182 the default installed global OpenerDirector uses UnknownHandler to ensure
183 this never happens).
184
185 In addition, if proxy settings are detected (for example, when a *_proxy
186 environment variable like http_proxy is set), ProxyHandler is default
187 installed and makes sure the requests are handled through the proxy.
188
189 '''
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000190 global _opener
Antoine Pitroude9ac6c2012-05-16 21:40:01 +0200191 if cafile or capath or cadefault:
Christian Heimesd0486372016-09-10 23:23:33 +0200192 import warnings
Boštjan Mejak15869582018-11-25 19:32:50 +0100193 warnings.warn("cafile, capath and cadefault are deprecated, use a "
Christian Heimesd0486372016-09-10 23:23:33 +0200194 "custom context instead.", DeprecationWarning, 2)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800195 if context is not None:
196 raise ValueError(
197 "You can't pass both context and any of cafile, capath, and "
198 "cadefault"
199 )
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000200 if not _have_ssl:
201 raise ValueError('SSL support not available')
Benjamin Petersonb6666972014-12-07 13:46:02 -0500202 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
Christian Heimes67986f92013-11-23 22:43:47 +0100203 cafile=cafile,
204 capath=capath)
Christian Heimesf97406b2020-11-13 16:37:52 +0100205 # send ALPN extension to indicate HTTP/1.1 protocol
206 context.set_alpn_protocols(['http/1.1'])
Benjamin Petersonb6666972014-12-07 13:46:02 -0500207 https_handler = HTTPSHandler(context=context)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000208 opener = build_opener(https_handler)
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800209 elif context:
210 https_handler = HTTPSHandler(context=context)
211 opener = build_opener(https_handler)
Antoine Pitrou803e6d62010-10-13 10:36:15 +0000212 elif _opener is None:
213 _opener = opener = build_opener()
214 else:
215 opener = _opener
216 return opener.open(url, data, timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000217
218def install_opener(opener):
219 global _opener
220 _opener = opener
221
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700222_url_tempfiles = []
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000223def urlretrieve(url, filename=None, reporthook=None, data=None):
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700224 """
225 Retrieve a URL into a temporary location on disk.
226
227 Requires a URL argument. If a filename is passed, it is used as
228 the temporary file location. The reporthook argument should be
229 a callable that accepts a block number, a read size, and the
230 total file size of the URL target. The data argument should be
231 valid URL encoded data.
232
233 If a filename is passed and the URL points to a local resource,
234 the result is a copy from local file to new file.
235
236 Returns a tuple containing the path to the newly created
237 data file as well as the resulting HTTPMessage object.
238 """
Cheryl Sabella0250de42018-04-25 16:51:54 -0700239 url_type, path = _splittype(url)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700240
241 with contextlib.closing(urlopen(url, data)) as fp:
242 headers = fp.info()
243
244 # Just return the local path and the "headers" for file://
245 # URLs. No sense in performing a copy unless requested.
246 if url_type == "file" and not filename:
247 return os.path.normpath(path), headers
248
249 # Handle temporary file setup.
250 if filename:
251 tfp = open(filename, 'wb')
252 else:
253 tfp = tempfile.NamedTemporaryFile(delete=False)
254 filename = tfp.name
255 _url_tempfiles.append(filename)
256
257 with tfp:
258 result = filename, headers
259 bs = 1024*8
260 size = -1
261 read = 0
262 blocknum = 0
263 if "content-length" in headers:
264 size = int(headers["Content-Length"])
265
266 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800267 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700268
269 while True:
270 block = fp.read(bs)
271 if not block:
272 break
273 read += len(block)
274 tfp.write(block)
275 blocknum += 1
276 if reporthook:
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800277 reporthook(blocknum, bs, size)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700278
279 if size >= 0 and read < size:
280 raise ContentTooShortError(
281 "retrieval incomplete: got only %i out of %i bytes"
282 % (read, size), result)
283
284 return result
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000285
286def urlcleanup():
Robert Collins2fee5c92015-08-04 12:52:06 +1200287 """Clean up temporary files from urlretrieve calls."""
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700288 for temp_file in _url_tempfiles:
289 try:
290 os.unlink(temp_file)
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200291 except OSError:
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700292 pass
293
294 del _url_tempfiles[:]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000295 global _opener
296 if _opener:
297 _opener = None
298
299# copied from cookielib.py
Antoine Pitroufd036452008-08-19 17:56:33 +0000300_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000301def request_host(request):
302 """Return request-host, as defined by RFC 2965.
303
304 Variation from RFC: returned value is lowercased, for convenient
305 comparison.
306
307 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000308 url = request.full_url
Georg Brandl13e89462008-07-01 19:56:00 +0000309 host = urlparse(url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000310 if host == "":
311 host = request.get_header("Host", "")
312
313 # remove port, if present
314 host = _cut_port_re.sub("", host, 1)
315 return host.lower()
316
317class Request:
318
319 def __init__(self, url, data=None, headers={},
Senthil Kumarande49d642011-10-16 23:54:44 +0800320 origin_req_host=None, unverifiable=False,
321 method=None):
Senthil Kumaran52380922013-04-25 05:45:48 -0700322 self.full_url = url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000323 self.headers = {}
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200324 self.unredirected_hdrs = {}
325 self._data = None
326 self.data = data
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000327 self._tunnel_host = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000328 for key, value in headers.items():
329 self.add_header(key, value)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330 if origin_req_host is None:
331 origin_req_host = request_host(self)
332 self.origin_req_host = origin_req_host
333 self.unverifiable = unverifiable
Jason R. Coombs7dc4f4b2013-09-08 12:47:07 -0400334 if method:
335 self.method = method
Senthil Kumaran52380922013-04-25 05:45:48 -0700336
337 @property
338 def full_url(self):
Senthil Kumaran83070752013-05-24 09:14:12 -0700339 if self.fragment:
340 return '{}#{}'.format(self._full_url, self.fragment)
Senthil Kumaran52380922013-04-25 05:45:48 -0700341 return self._full_url
342
343 @full_url.setter
344 def full_url(self, url):
345 # unwrap('<URL:type://host/path>') --> 'type://host/path'
Rémi Lapeyre674ee122019-05-27 15:43:45 +0200346 self._full_url = unwrap(url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700347 self._full_url, self.fragment = _splittag(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000348 self._parse()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349
Senthil Kumaran52380922013-04-25 05:45:48 -0700350 @full_url.deleter
351 def full_url(self):
352 self._full_url = None
353 self.fragment = None
354 self.selector = ''
355
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200356 @property
357 def data(self):
358 return self._data
359
360 @data.setter
361 def data(self, data):
362 if data != self._data:
363 self._data = data
364 # issue 16464
365 # if we change data we need to remove content-length header
366 # (cause it's most probably calculated for previous value)
367 if self.has_header("Content-length"):
368 self.remove_header("Content-length")
369
370 @data.deleter
371 def data(self):
R David Murray9cc7d452013-03-20 00:10:51 -0400372 self.data = None
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200373
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000374 def _parse(self):
Cheryl Sabella0250de42018-04-25 16:51:54 -0700375 self.type, rest = _splittype(self._full_url)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000376 if self.type is None:
R David Murrayd8a46962013-04-03 06:58:34 -0400377 raise ValueError("unknown url type: %r" % self.full_url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700378 self.host, self.selector = _splithost(rest)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000379 if self.host:
380 self.host = unquote(self.host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000381
382 def get_method(self):
Senthil Kumarande49d642011-10-16 23:54:44 +0800383 """Return a string indicating the HTTP request method."""
Jason R. Coombsaae6a1d2013-09-08 12:54:33 -0400384 default_method = "POST" if self.data is not None else "GET"
385 return getattr(self, 'method', default_method)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000386
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000387 def get_full_url(self):
Senthil Kumaran52380922013-04-25 05:45:48 -0700388 return self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000389
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000390 def set_proxy(self, host, type):
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000391 if self.type == 'https' and not self._tunnel_host:
392 self._tunnel_host = self.host
393 else:
394 self.type= type
395 self.selector = self.full_url
396 self.host = host
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000397
398 def has_proxy(self):
399 return self.selector == self.full_url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400
401 def add_header(self, key, val):
402 # useful for something like authentication
403 self.headers[key.capitalize()] = val
404
405 def add_unredirected_header(self, key, val):
406 # will not be added to a redirected request
407 self.unredirected_hdrs[key.capitalize()] = val
408
409 def has_header(self, header_name):
410 return (header_name in self.headers or
411 header_name in self.unredirected_hdrs)
412
413 def get_header(self, header_name, default=None):
414 return self.headers.get(
415 header_name,
416 self.unredirected_hdrs.get(header_name, default))
417
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200418 def remove_header(self, header_name):
419 self.headers.pop(header_name, None)
420 self.unredirected_hdrs.pop(header_name, None)
421
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000422 def header_items(self):
Serhiy Storchakada084702019-03-27 08:02:28 +0200423 hdrs = {**self.unredirected_hdrs, **self.headers}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000424 return list(hdrs.items())
425
426class OpenerDirector:
427 def __init__(self):
428 client_version = "Python-urllib/%s" % __version__
429 self.addheaders = [('User-agent', client_version)]
R. David Murray25b8cca2010-12-23 19:44:49 +0000430 # self.handlers is retained only for backward compatibility
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000431 self.handlers = []
R. David Murray25b8cca2010-12-23 19:44:49 +0000432 # manage the individual handlers
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000433 self.handle_open = {}
434 self.handle_error = {}
435 self.process_response = {}
436 self.process_request = {}
437
438 def add_handler(self, handler):
439 if not hasattr(handler, "add_parent"):
440 raise TypeError("expected BaseHandler instance, got %r" %
441 type(handler))
442
443 added = False
444 for meth in dir(handler):
445 if meth in ["redirect_request", "do_open", "proxy_open"]:
446 # oops, coincidental match
447 continue
448
449 i = meth.find("_")
450 protocol = meth[:i]
451 condition = meth[i+1:]
452
453 if condition.startswith("error"):
454 j = condition.find("_") + i + 1
455 kind = meth[j+1:]
456 try:
457 kind = int(kind)
458 except ValueError:
459 pass
460 lookup = self.handle_error.get(protocol, {})
461 self.handle_error[protocol] = lookup
462 elif condition == "open":
463 kind = protocol
464 lookup = self.handle_open
465 elif condition == "response":
466 kind = protocol
467 lookup = self.process_response
468 elif condition == "request":
469 kind = protocol
470 lookup = self.process_request
471 else:
472 continue
473
474 handlers = lookup.setdefault(kind, [])
475 if handlers:
476 bisect.insort(handlers, handler)
477 else:
478 handlers.append(handler)
479 added = True
480
481 if added:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000482 bisect.insort(self.handlers, handler)
483 handler.add_parent(self)
484
485 def close(self):
486 # Only exists for backwards compatibility.
487 pass
488
489 def _call_chain(self, chain, kind, meth_name, *args):
490 # Handlers raise an exception if no one else should try to handle
491 # the request, or return None if they can't but another handler
492 # could. Otherwise, they return the response.
493 handlers = chain.get(kind, ())
494 for handler in handlers:
495 func = getattr(handler, meth_name)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000496 result = func(*args)
497 if result is not None:
498 return result
499
500 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
501 # accept a URL or a Request object
502 if isinstance(fullurl, str):
503 req = Request(fullurl, data)
504 else:
505 req = fullurl
506 if data is not None:
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000507 req.data = data
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000508
509 req.timeout = timeout
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000510 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000511
512 # pre-process request
513 meth_name = protocol+"_request"
514 for processor in self.process_request.get(protocol, []):
515 meth = getattr(processor, meth_name)
516 req = meth(req)
517
Steve Dowerb82e17e2019-05-23 08:45:22 -0700518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000519 response = self._open(req, data)
520
521 # post-process response
522 meth_name = protocol+"_response"
523 for processor in self.process_response.get(protocol, []):
524 meth = getattr(processor, meth_name)
525 response = meth(req, response)
526
527 return response
528
529 def _open(self, req, data=None):
530 result = self._call_chain(self.handle_open, 'default',
531 'default_open', req)
532 if result:
533 return result
534
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000535 protocol = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
539 return result
540
541 return self._call_chain(self.handle_open, 'unknown',
542 'unknown_open', req)
543
544 def error(self, proto, *args):
545 if proto in ('http', 'https'):
546 # XXX http[s] protocols are special-cased
547 dict = self.handle_error['http'] # https is not different than http
548 proto = args[2] # YUCK!
549 meth_name = 'http_error_%s' % proto
550 http_err = 1
551 orig_args = args
552 else:
553 dict = self.handle_error
554 meth_name = proto + '_error'
555 http_err = 0
556 args = (dict, proto, meth_name) + args
557 result = self._call_chain(*args)
558 if result:
559 return result
560
561 if http_err:
562 args = (dict, 'default', 'http_error_default') + orig_args
563 return self._call_chain(*args)
564
565# XXX probably also want an abstract factory that knows when it makes
566# sense to skip a superclass in favor of a subclass and when it might
567# make sense to include both
568
569def build_opener(*handlers):
570 """Create an opener object from a list of handlers.
571
572 The opener will use several default handlers, including support
Senthil Kumaran1107c5d2009-11-15 06:20:55 +0000573 for HTTP, FTP and when applicable HTTPS.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000574
575 If any of the handlers passed as arguments are subclasses of the
576 default handlers, the default handlers will not be used.
577 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000578 opener = OpenerDirector()
579 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
580 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Antoine Pitroudf204be2012-11-24 17:59:08 +0100581 FTPHandler, FileHandler, HTTPErrorProcessor,
582 DataHandler]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000583 if hasattr(http.client, "HTTPSConnection"):
584 default_classes.append(HTTPSHandler)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000585 skip = set()
586 for klass in default_classes:
587 for check in handlers:
Benjamin Peterson78c85382014-04-01 16:27:30 -0400588 if isinstance(check, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000589 if issubclass(check, klass):
590 skip.add(klass)
591 elif isinstance(check, klass):
592 skip.add(klass)
593 for klass in skip:
594 default_classes.remove(klass)
595
596 for klass in default_classes:
597 opener.add_handler(klass())
598
599 for h in handlers:
Benjamin Peterson5dd3cae2014-04-01 14:20:56 -0400600 if isinstance(h, type):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000601 h = h()
602 opener.add_handler(h)
603 return opener
604
605class BaseHandler:
606 handler_order = 500
607
608 def add_parent(self, parent):
609 self.parent = parent
610
611 def close(self):
612 # Only exists for backwards compatibility
613 pass
614
615 def __lt__(self, other):
616 if not hasattr(other, "handler_order"):
617 # Try to preserve the old behavior of having custom classes
618 # inserted after default ones (works only for custom user
619 # classes which are not aware of handler_order).
620 return True
621 return self.handler_order < other.handler_order
622
623
624class HTTPErrorProcessor(BaseHandler):
625 """Process HTTP error responses."""
626 handler_order = 1000 # after all other processing
627
628 def http_response(self, request, response):
629 code, msg, hdrs = response.code, response.msg, response.info()
630
631 # According to RFC 2616, "2xx" code indicates that the client's
632 # request was successfully received, understood, and accepted.
633 if not (200 <= code < 300):
634 response = self.parent.error(
635 'http', request, response, code, msg, hdrs)
636
637 return response
638
639 https_response = http_response
640
641class HTTPDefaultErrorHandler(BaseHandler):
642 def http_error_default(self, req, fp, code, msg, hdrs):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000643 raise HTTPError(req.full_url, code, msg, hdrs, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000644
645class HTTPRedirectHandler(BaseHandler):
646 # maximum number of redirections to any single URL
647 # this is needed because of the state that cookies introduce
648 max_repeats = 4
649 # maximum total number of redirections (regardless of URL) before
650 # assuming we're in a loop
651 max_redirections = 10
652
653 def redirect_request(self, req, fp, code, msg, headers, newurl):
654 """Return a Request or None in response to a redirect.
655
656 This is called by the http_error_30x methods when a
657 redirection response is received. If a redirection should
658 take place, return a new Request to allow http_error_30x to
659 perform the redirect. Otherwise, raise HTTPError if no-one
660 else should try to handle this url. Return None if you can't
661 but another Handler might.
662 """
663 m = req.get_method()
664 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
665 or code in (301, 302, 303) and m == "POST")):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000666 raise HTTPError(req.full_url, code, msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000667
668 # Strictly (according to RFC 2616), 301 or 302 in response to
669 # a POST MUST NOT cause a redirection without confirmation
Georg Brandl029986a2008-06-23 11:44:14 +0000670 # from the user (of urllib.request, in this case). In practice,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000671 # essentially all clients do redirect in this case, so we do
672 # the same.
Martin Pantere6f06092016-05-16 01:14:20 +0000673
674 # Be conciliant with URIs containing a space. This is mainly
675 # redundant with the more complete encoding done in http_error_302(),
676 # but it is kept for compatibility with other callers.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000677 newurl = newurl.replace(' ', '%20')
Martin Pantere6f06092016-05-16 01:14:20 +0000678
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 CONTENT_HEADERS = ("content-length", "content-type")
Jon Dufresne39726282017-05-18 07:35:54 -0700680 newheaders = {k: v for k, v in req.headers.items()
681 if k.lower() not in CONTENT_HEADERS}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 return Request(newurl,
683 headers=newheaders,
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000684 origin_req_host=req.origin_req_host,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000685 unverifiable=True)
686
687 # Implementation note: To avoid the server sending us into an
688 # infinite loop, the request object needs to track what URLs we
689 # have already seen. Do this by adding a handler-specific
690 # attribute to the Request object.
691 def http_error_302(self, req, fp, code, msg, headers):
692 # Some servers (incorrectly) return multiple Location headers
693 # (so probably same goes for URI). Use first header.
694 if "location" in headers:
695 newurl = headers["location"]
696 elif "uri" in headers:
697 newurl = headers["uri"]
698 else:
699 return
Facundo Batistaf24802c2008-08-17 03:36:03 +0000700
701 # fix a possible malformed URL
702 urlparts = urlparse(newurl)
guido@google.coma119df92011-03-29 11:41:02 -0700703
704 # For security reasons we don't allow redirection to anything other
705 # than http, https or ftp.
706
Senthil Kumaran6497aa32012-01-04 13:46:59 +0800707 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800708 raise HTTPError(
709 newurl, code,
710 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
711 headers, fp)
guido@google.coma119df92011-03-29 11:41:02 -0700712
Martin Panterce6e0682016-05-16 01:07:13 +0000713 if not urlparts.path and urlparts.netloc:
Facundo Batistaf24802c2008-08-17 03:36:03 +0000714 urlparts = list(urlparts)
715 urlparts[2] = "/"
716 newurl = urlunparse(urlparts)
717
Martin Pantere6f06092016-05-16 01:14:20 +0000718 # http.client.parse_headers() decodes as ISO-8859-1. Recover the
719 # original bytes and percent-encode non-ASCII bytes, and any special
720 # characters such as the space.
721 newurl = quote(
722 newurl, encoding="iso-8859-1", safe=string.punctuation)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000723 newurl = urljoin(req.full_url, newurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000724
725 # XXX Probably want to forget about the state of the current
726 # request, although that might interact poorly with other
727 # handlers that also use handler-specific request attributes
728 new = self.redirect_request(req, fp, code, msg, headers, newurl)
729 if new is None:
730 return
731
732 # loop detection
733 # .redirect_dict has a key url if url was previously visited.
734 if hasattr(req, 'redirect_dict'):
735 visited = new.redirect_dict = req.redirect_dict
736 if (visited.get(newurl, 0) >= self.max_repeats or
737 len(visited) >= self.max_redirections):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000738 raise HTTPError(req.full_url, code,
Georg Brandl13e89462008-07-01 19:56:00 +0000739 self.inf_msg + msg, headers, fp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000740 else:
741 visited = new.redirect_dict = req.redirect_dict = {}
742 visited[newurl] = visited.get(newurl, 0) + 1
743
744 # Don't close the fp until we are sure that we won't use it
745 # with HTTPError.
746 fp.read()
747 fp.close()
748
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000749 return self.parent.open(new, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750
751 http_error_301 = http_error_303 = http_error_307 = http_error_302
752
753 inf_msg = "The HTTP server returned a redirect error that would " \
754 "lead to an infinite loop.\n" \
755 "The last 30x error message was:\n"
756
757
758def _parse_proxy(proxy):
759 """Return (scheme, user, password, host/port) given a URL or an authority.
760
761 If a URL is supplied, it must have an authority (host:port) component.
762 According to RFC 3986, having an authority component means the URL must
Senthil Kumarand8e24f12014-04-14 16:32:20 -0400763 have two slashes after the scheme.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 """
Cheryl Sabella0250de42018-04-25 16:51:54 -0700765 scheme, r_scheme = _splittype(proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000766 if not r_scheme.startswith("/"):
767 # authority
768 scheme = None
769 authority = proxy
770 else:
771 # URL
772 if not r_scheme.startswith("//"):
773 raise ValueError("proxy URL with no authority: %r" % proxy)
774 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
775 # and 3.3.), path is empty or starts with '/'
Senthil Kumaran030a7132020-12-29 04:18:42 -0800776 if '@' in r_scheme:
777 host_separator = r_scheme.find('@')
778 end = r_scheme.find("/", host_separator)
779 else:
780 end = r_scheme.find("/", 2)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000781 if end == -1:
782 end = None
783 authority = r_scheme[2:end]
Cheryl Sabella0250de42018-04-25 16:51:54 -0700784 userinfo, hostport = _splituser(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000785 if userinfo is not None:
Cheryl Sabella0250de42018-04-25 16:51:54 -0700786 user, password = _splitpasswd(userinfo)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000787 else:
788 user = password = None
789 return scheme, user, password, hostport
790
791class ProxyHandler(BaseHandler):
792 # Proxies must be in front
793 handler_order = 100
794
795 def __init__(self, proxies=None):
796 if proxies is None:
797 proxies = getproxies()
798 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
799 self.proxies = proxies
800 for type, url in proxies.items():
Zackery Spytzb761e3a2019-09-13 08:07:07 -0600801 type = type.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 setattr(self, '%s_open' % type,
Georg Brandlfcbdbf22012-06-24 19:56:31 +0200803 lambda r, proxy=url, type=type, meth=self.proxy_open:
804 meth(r, proxy, type))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000805
806 def proxy_open(self, req, proxy, type):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000807 orig_type = req.type
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000808 proxy_type, user, password, hostport = _parse_proxy(proxy)
809 if proxy_type is None:
810 proxy_type = orig_type
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000811
812 if req.host and proxy_bypass(req.host):
813 return None
814
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 if user and password:
Georg Brandl13e89462008-07-01 19:56:00 +0000816 user_pass = '%s:%s' % (unquote(user),
817 unquote(password))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818 creds = base64.b64encode(user_pass.encode()).decode("ascii")
819 req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl13e89462008-07-01 19:56:00 +0000820 hostport = unquote(hostport)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000821 req.set_proxy(hostport, proxy_type)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000822 if orig_type == proxy_type or orig_type == 'https':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000823 # let other handlers take care of it
824 return None
825 else:
826 # need to start over, because the other handlers don't
827 # grok the proxy's URL type
828 # e.g. if we have a constructor arg proxies like so:
829 # {'http': 'ftp://proxy.example.com'}, we may end up turning
830 # a request for http://acme.example.com/a into one for
831 # ftp://proxy.example.com/a
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000832 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000833
834class HTTPPasswordMgr:
835
836 def __init__(self):
837 self.passwd = {}
838
839 def add_password(self, realm, uri, user, passwd):
840 # uri could be a single URI or a sequence
841 if isinstance(uri, str):
842 uri = [uri]
Senthil Kumaran34d38dc2011-10-20 02:48:01 +0800843 if realm not in self.passwd:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 self.passwd[realm] = {}
845 for default_port in True, False:
846 reduced_uri = tuple(
Jon Dufresne39726282017-05-18 07:35:54 -0700847 self.reduce_uri(u, default_port) for u in uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000848 self.passwd[realm][reduced_uri] = (user, passwd)
849
850 def find_user_password(self, realm, authuri):
851 domains = self.passwd.get(realm, {})
852 for default_port in True, False:
853 reduced_authuri = self.reduce_uri(authuri, default_port)
854 for uris, authinfo in domains.items():
855 for uri in uris:
856 if self.is_suburi(uri, reduced_authuri):
857 return authinfo
858 return None, None
859
860 def reduce_uri(self, uri, default_port=True):
861 """Accept authority or URI and extract only the authority and path."""
862 # note HTTP URLs do not have a userinfo component
Georg Brandl13e89462008-07-01 19:56:00 +0000863 parts = urlsplit(uri)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000864 if parts[1]:
865 # URI
866 scheme = parts[0]
867 authority = parts[1]
868 path = parts[2] or '/'
869 else:
870 # host or host:port
871 scheme = None
872 authority = uri
873 path = '/'
Cheryl Sabella0250de42018-04-25 16:51:54 -0700874 host, port = _splitport(authority)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000875 if default_port and port is None and scheme is not None:
876 dport = {"http": 80,
877 "https": 443,
878 }.get(scheme)
879 if dport is not None:
880 authority = "%s:%d" % (host, dport)
881 return authority, path
882
883 def is_suburi(self, base, test):
884 """Check if test is below base in a URI tree
885
886 Both args must be URIs in reduced form.
887 """
888 if base == test:
889 return True
890 if base[0] != test[0]:
891 return False
892 common = posixpath.commonprefix((base[1], test[1]))
893 if len(common) == len(base[1]):
894 return True
895 return False
896
897
898class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
899
900 def find_user_password(self, realm, authuri):
901 user, password = HTTPPasswordMgr.find_user_password(self, realm,
902 authuri)
903 if user is not None:
904 return user, password
905 return HTTPPasswordMgr.find_user_password(self, None, authuri)
906
907
R David Murray4c7f9952015-04-16 16:36:18 -0400908class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
909
910 def __init__(self, *args, **kwargs):
911 self.authenticated = {}
912 super().__init__(*args, **kwargs)
913
914 def add_password(self, realm, uri, user, passwd, is_authenticated=False):
915 self.update_authenticated(uri, is_authenticated)
916 # Add a default for prior auth requests
917 if realm is not None:
918 super().add_password(None, uri, user, passwd)
919 super().add_password(realm, uri, user, passwd)
920
921 def update_authenticated(self, uri, is_authenticated=False):
922 # uri could be a single URI or a sequence
923 if isinstance(uri, str):
924 uri = [uri]
925
926 for default_port in True, False:
927 for u in uri:
928 reduced_uri = self.reduce_uri(u, default_port)
929 self.authenticated[reduced_uri] = is_authenticated
930
931 def is_authenticated(self, authuri):
932 for default_port in True, False:
933 reduced_authuri = self.reduce_uri(authuri, default_port)
934 for uri in self.authenticated:
935 if self.is_suburi(uri, reduced_authuri):
936 return self.authenticated[uri]
937
938
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000939class AbstractBasicAuthHandler:
940
941 # XXX this allows for multiple auth-schemes, but will stupidly pick
942 # the last one with a realm specified.
943
944 # allow for double- and single-quoted realm values
945 # (single quotes are a violation of the RFC, but appear in the wild)
Victor Stinner0b297d42020-04-02 02:52:20 +0200946 rx = re.compile('(?:^|,)' # start of the string or ','
947 '[ \t]*' # optional whitespaces
Yeting Li7215d1a2021-04-07 19:27:41 +0800948 '([^ \t,]+)' # scheme like "Basic"
Victor Stinner0b297d42020-04-02 02:52:20 +0200949 '[ \t]+' # mandatory whitespaces
950 # realm=xxx
951 # realm='xxx'
952 # realm="xxx"
953 'realm=(["\']?)([^"\']*)\\2',
954 re.I)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000955
956 # XXX could pre-emptively send auth info already accepted (RFC 2617,
957 # end of section 2, and section 1.2 immediately after "credentials"
958 # production).
959
960 def __init__(self, password_mgr=None):
961 if password_mgr is None:
962 password_mgr = HTTPPasswordMgr()
963 self.passwd = password_mgr
964 self.add_password = self.passwd.add_password
Senthil Kumaran67a62a42010-08-19 17:50:31 +0000965
Victor Stinner0b297d42020-04-02 02:52:20 +0200966 def _parse_realm(self, header):
967 # parse WWW-Authenticate header: accept multiple challenges per header
968 found_challenge = False
969 for mo in AbstractBasicAuthHandler.rx.finditer(header):
970 scheme, quote, realm = mo.groups()
971 if quote not in ['"', "'"]:
972 warnings.warn("Basic Auth Realm was unquoted",
973 UserWarning, 3)
974
975 yield (scheme, realm)
976
977 found_challenge = True
978
979 if not found_challenge:
980 if header:
981 scheme = header.split()[0]
982 else:
983 scheme = ''
984 yield (scheme, None)
985
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000986 def http_error_auth_reqed(self, authreq, host, req, headers):
987 # host may be an authority (without userinfo) or a URL with an
988 # authority
Victor Stinner0b297d42020-04-02 02:52:20 +0200989 headers = headers.get_all(authreq)
990 if not headers:
991 # no header found
992 return
Senthil Kumaranf4998ac2010-06-01 12:53:48 +0000993
Victor Stinner0b297d42020-04-02 02:52:20 +0200994 unsupported = None
995 for header in headers:
996 for scheme, realm in self._parse_realm(header):
997 if scheme.lower() != 'basic':
998 unsupported = scheme
999 continue
1000
1001 if realm is not None:
1002 # Use the first matching Basic challenge.
1003 # Ignore following challenges even if they use the Basic
1004 # scheme.
1005 return self.retry_http_basic_auth(host, req, realm)
1006
1007 if unsupported is not None:
1008 raise ValueError("AbstractBasicAuthHandler does not "
1009 "support the following scheme: %r"
1010 % (scheme,))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001011
1012 def retry_http_basic_auth(self, host, req, realm):
1013 user, pw = self.passwd.find_user_password(realm, host)
1014 if pw is not None:
1015 raw = "%s:%s" % (user, pw)
1016 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
Senthil Kumaran78373762014-08-20 07:53:58 +05301017 if req.get_header(self.auth_header, None) == auth:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001018 return None
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001019 req.add_unredirected_header(self.auth_header, auth)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001020 return self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 else:
1022 return None
1023
R David Murray4c7f9952015-04-16 16:36:18 -04001024 def http_request(self, req):
1025 if (not hasattr(self.passwd, 'is_authenticated') or
1026 not self.passwd.is_authenticated(req.full_url)):
1027 return req
1028
1029 if not req.has_header('Authorization'):
1030 user, passwd = self.passwd.find_user_password(None, req.full_url)
1031 credentials = '{0}:{1}'.format(user, passwd).encode()
1032 auth_str = base64.standard_b64encode(credentials).decode()
1033 req.add_unredirected_header('Authorization',
1034 'Basic {}'.format(auth_str.strip()))
1035 return req
1036
1037 def http_response(self, req, response):
1038 if hasattr(self.passwd, 'is_authenticated'):
1039 if 200 <= response.code < 300:
1040 self.passwd.update_authenticated(req.full_url, True)
1041 else:
1042 self.passwd.update_authenticated(req.full_url, False)
1043 return response
1044
1045 https_request = http_request
1046 https_response = http_response
1047
1048
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001049
1050class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1051
1052 auth_header = 'Authorization'
1053
1054 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001055 url = req.full_url
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001056 response = self.http_error_auth_reqed('www-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001057 url, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001058 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001059
1060
1061class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1062
1063 auth_header = 'Proxy-authorization'
1064
1065 def http_error_407(self, req, fp, code, msg, headers):
1066 # http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl029986a2008-06-23 11:44:14 +00001067 # authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001068 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1069 # userinfo.
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001070 authority = req.host
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001071 response = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001072 authority, req, headers)
Senthil Kumaran67a62a42010-08-19 17:50:31 +00001073 return response
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001074
1075
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001076# Return n random bytes.
1077_randombytes = os.urandom
1078
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001079
1080class AbstractDigestAuthHandler:
1081 # Digest authentication is specified in RFC 2617.
1082
1083 # XXX The client does not inspect the Authentication-Info header
1084 # in a successful response.
1085
1086 # XXX It should be possible to test this implementation against
1087 # a mock server that just generates a static set of challenges.
1088
1089 # XXX qop="auth-int" supports is shaky
1090
1091 def __init__(self, passwd=None):
1092 if passwd is None:
1093 passwd = HTTPPasswordMgr()
1094 self.passwd = passwd
1095 self.add_password = self.passwd.add_password
1096 self.retried = 0
1097 self.nonce_count = 0
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001098 self.last_nonce = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001099
1100 def reset_retry_count(self):
1101 self.retried = 0
1102
1103 def http_error_auth_reqed(self, auth_header, host, req, headers):
1104 authreq = headers.get(auth_header, None)
1105 if self.retried > 5:
1106 # Don't fail endlessly - if we failed once, we'll probably
1107 # fail a second time. Hm. Unless the Password Manager is
1108 # prompting for the information. Crap. This isn't great
1109 # but it's better than the current 'repeat until recursion
1110 # depth exceeded' approach <wink>
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001111 raise HTTPError(req.full_url, 401, "digest auth failed",
Georg Brandl13e89462008-07-01 19:56:00 +00001112 headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001113 else:
1114 self.retried += 1
1115 if authreq:
1116 scheme = authreq.split()[0]
1117 if scheme.lower() == 'digest':
1118 return self.retry_http_digest_auth(req, authreq)
Senthil Kumaran1a129c82011-10-20 02:50:13 +08001119 elif scheme.lower() != 'basic':
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001120 raise ValueError("AbstractDigestAuthHandler does not support"
1121 " the following scheme: '%s'" % scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001122
1123 def retry_http_digest_auth(self, req, auth):
1124 token, challenge = auth.split(' ', 1)
1125 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1126 auth = self.get_authorization(req, chal)
1127 if auth:
1128 auth_val = 'Digest %s' % auth
1129 if req.headers.get(self.auth_header, None) == auth_val:
1130 return None
1131 req.add_unredirected_header(self.auth_header, auth_val)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001132 resp = self.parent.open(req, timeout=req.timeout)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001133 return resp
1134
1135 def get_cnonce(self, nonce):
1136 # The cnonce-value is an opaque
1137 # quoted string value provided by the client and used by both client
1138 # and server to avoid chosen plaintext attacks, to provide mutual
1139 # authentication, and to provide some message integrity protection.
1140 # This isn't a fabulous effort, but it's probably Good Enough.
1141 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08001142 b = s.encode("ascii") + _randombytes(8)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001143 dig = hashlib.sha1(b).hexdigest()
1144 return dig[:16]
1145
1146 def get_authorization(self, req, chal):
1147 try:
1148 realm = chal['realm']
1149 nonce = chal['nonce']
1150 qop = chal.get('qop')
1151 algorithm = chal.get('algorithm', 'MD5')
1152 # mod_digest doesn't send an opaque, even though it isn't
1153 # supposed to be optional
1154 opaque = chal.get('opaque', None)
1155 except KeyError:
1156 return None
1157
1158 H, KD = self.get_algorithm_impls(algorithm)
1159 if H is None:
1160 return None
1161
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001162 user, pw = self.passwd.find_user_password(realm, req.full_url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001163 if user is None:
1164 return None
1165
1166 # XXX not implemented yet
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001167 if req.data is not None:
1168 entdig = self.get_entity_digest(req.data, chal)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001169 else:
1170 entdig = None
1171
1172 A1 = "%s:%s:%s" % (user, realm, pw)
1173 A2 = "%s:%s" % (req.get_method(),
1174 # XXX selector: what about proxies and full urls
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001175 req.selector)
PypeBros14a89c42019-11-23 00:19:08 +01001176 # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1177 # or `auth-int` to the response back. we use `auth` to send the response back.
Stephen Balousek5e260e02020-02-29 13:31:58 -07001178 if qop is None:
1179 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1180 elif 'auth' in qop.split(','):
Senthil Kumaran4c7eaee2009-11-15 08:43:45 +00001181 if nonce == self.last_nonce:
1182 self.nonce_count += 1
1183 else:
1184 self.nonce_count = 1
1185 self.last_nonce = nonce
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001186 ncvalue = '%08x' % self.nonce_count
1187 cnonce = self.get_cnonce(nonce)
PypeBros14a89c42019-11-23 00:19:08 +01001188 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001189 respdig = KD(H(A1), noncebit)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001190 else:
1191 # XXX handle auth-int.
Georg Brandl13e89462008-07-01 19:56:00 +00001192 raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001193
1194 # XXX should the partial digests be encoded too?
1195
1196 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001197 'response="%s"' % (user, realm, nonce, req.selector,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001198 respdig)
1199 if opaque:
1200 base += ', opaque="%s"' % opaque
1201 if entdig:
1202 base += ', digest="%s"' % entdig
1203 base += ', algorithm="%s"' % algorithm
1204 if qop:
1205 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1206 return base
1207
1208 def get_algorithm_impls(self, algorithm):
1209 # lambdas assume digest modules are imported at the top level
1210 if algorithm == 'MD5':
1211 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1212 elif algorithm == 'SHA':
1213 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1214 # XXX MD5-sess
Berker Peksage88dd1c2016-03-06 16:16:40 +02001215 else:
1216 raise ValueError("Unsupported digest authentication "
1217 "algorithm %r" % algorithm)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001218 KD = lambda s, d: H("%s:%s" % (s, d))
1219 return H, KD
1220
1221 def get_entity_digest(self, data, chal):
1222 # XXX not implemented yet
1223 return None
1224
1225
1226class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1227 """An authentication protocol defined by RFC 2069
1228
1229 Digest authentication improves on basic authentication because it
1230 does not transmit passwords in the clear.
1231 """
1232
1233 auth_header = 'Authorization'
1234 handler_order = 490 # before Basic auth
1235
1236 def http_error_401(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001237 host = urlparse(req.full_url)[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001238 retry = self.http_error_auth_reqed('www-authenticate',
1239 host, req, headers)
1240 self.reset_retry_count()
1241 return retry
1242
1243
1244class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1245
1246 auth_header = 'Proxy-Authorization'
1247 handler_order = 490 # before Basic auth
1248
1249 def http_error_407(self, req, fp, code, msg, headers):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001250 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001251 retry = self.http_error_auth_reqed('proxy-authenticate',
1252 host, req, headers)
1253 self.reset_retry_count()
1254 return retry
1255
1256class AbstractHTTPHandler(BaseHandler):
1257
1258 def __init__(self, debuglevel=0):
1259 self._debuglevel = debuglevel
1260
1261 def set_http_debuglevel(self, level):
1262 self._debuglevel = level
1263
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001264 def _get_content_length(self, request):
1265 return http.client.HTTPConnection._get_content_length(
1266 request.data,
1267 request.get_method())
1268
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001269 def do_request_(self, request):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001270 host = request.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001271 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001272 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001273
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001274 if request.data is not None: # POST
1275 data = request.data
Senthil Kumaran29333122011-02-11 11:25:47 +00001276 if isinstance(data, str):
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001277 msg = "POST data should be bytes, an iterable of bytes, " \
1278 "or a file object. It cannot be of type str."
Senthil Kumaran6b3434a2012-03-15 18:11:16 -07001279 raise TypeError(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001280 if not request.has_header('Content-type'):
1281 request.add_unredirected_header(
1282 'Content-type',
1283 'application/x-www-form-urlencoded')
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001284 if (not request.has_header('Content-length')
1285 and not request.has_header('Transfer-encoding')):
1286 content_length = self._get_content_length(request)
1287 if content_length is not None:
1288 request.add_unredirected_header(
1289 'Content-length', str(content_length))
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00001290 else:
1291 request.add_unredirected_header(
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001292 'Transfer-encoding', 'chunked')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001293
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001294 sel_host = host
1295 if request.has_proxy():
Cheryl Sabella0250de42018-04-25 16:51:54 -07001296 scheme, sel = _splittype(request.selector)
1297 sel_host, sel_path = _splithost(sel)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001298 if not request.has_header('Host'):
Facundo Batista72dc1ea2008-08-16 14:44:32 +00001299 request.add_unredirected_header('Host', sel_host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001300 for name, value in self.parent.addheaders:
1301 name = name.capitalize()
1302 if not request.has_header(name):
1303 request.add_unredirected_header(name, value)
1304
1305 return request
1306
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001307 def do_open(self, http_class, req, **http_conn_args):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001308 """Return an HTTPResponse object for the request, using http_class.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309
1310 http_class must implement the HTTPConnection API from http.client.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001311 """
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001312 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001313 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001314 raise URLError('no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001315
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001316 # will parse host:port
1317 h = http_class(host, timeout=req.timeout, **http_conn_args)
Senthil Kumaran9642eed2016-05-13 01:32:42 -07001318 h.set_debuglevel(self._debuglevel)
Senthil Kumaran42ef4b12010-09-27 01:26:03 +00001319
1320 headers = dict(req.unredirected_hdrs)
Serhiy Storchaka3f2e6f12018-02-26 16:50:11 +02001321 headers.update({k: v for k, v in req.headers.items()
1322 if k not in headers})
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001323
1324 # TODO(jhylton): Should this be redesigned to handle
1325 # persistent connections?
1326
1327 # We want to make an HTTP/1.1 request, but the addinfourl
1328 # class isn't prepared to deal with a persistent connection.
1329 # It will try to read all remaining data from the socket,
1330 # which will block while the server waits for the next request.
1331 # So make sure the connection gets closed after the (only)
1332 # request.
1333 headers["Connection"] = "close"
Jon Dufresne39726282017-05-18 07:35:54 -07001334 headers = {name.title(): val for name, val in headers.items()}
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001335
1336 if req._tunnel_host:
Senthil Kumaran47fff872009-12-20 07:10:31 +00001337 tunnel_headers = {}
1338 proxy_auth_hdr = "Proxy-Authorization"
1339 if proxy_auth_hdr in headers:
1340 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1341 # Proxy-Authorization should not be sent to origin
1342 # server.
1343 del headers[proxy_auth_hdr]
1344 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001345
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001346 try:
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001347 try:
Martin Panter3c0d0ba2016-08-24 06:33:33 +00001348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001350 except OSError as err: # timeout error
1351 raise URLError(err)
Senthil Kumaran45686b42011-07-27 09:31:03 +08001352 r = h.getresponse()
Serhiy Storchakaf54c3502014-09-06 21:41:39 +03001353 except:
1354 h.close()
1355 raise
1356
1357 # If the server does not send us a 'Connection: close' header,
1358 # HTTPConnection assumes the socket should be left open. Manually
1359 # mark the socket to be closed when this response object goes away.
1360 if h.sock:
1361 h.sock.close()
1362 h.sock = None
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001363
Senthil Kumaran26430412011-04-13 07:01:19 +08001364 r.url = req.get_full_url()
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001365 # This line replaces the .msg attribute of the HTTPResponse
1366 # with .headers, because urllib clients expect the response to
1367 # have the reason in .msg. It would be good to mark this
1368 # attribute is deprecated and get then to use info() or
1369 # .headers.
1370 r.msg = r.reason
1371 return r
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001372
1373
1374class HTTPHandler(AbstractHTTPHandler):
1375
1376 def http_open(self, req):
1377 return self.do_open(http.client.HTTPConnection, req)
1378
1379 http_request = AbstractHTTPHandler.do_request_
1380
1381if hasattr(http.client, 'HTTPSConnection'):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001382
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001383 class HTTPSHandler(AbstractHTTPHandler):
1384
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001385 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1386 AbstractHTTPHandler.__init__(self, debuglevel)
1387 self._context = context
1388 self._check_hostname = check_hostname
1389
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001390 def https_open(self, req):
Antoine Pitrou803e6d62010-10-13 10:36:15 +00001391 return self.do_open(http.client.HTTPSConnection, req,
1392 context=self._context, check_hostname=self._check_hostname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001393
1394 https_request = AbstractHTTPHandler.do_request_
1395
Senthil Kumaran4c875a92011-11-01 23:57:57 +08001396 __all__.append('HTTPSHandler')
Senthil Kumaran0d54eb92011-11-01 23:49:46 +08001397
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001398class HTTPCookieProcessor(BaseHandler):
1399 def __init__(self, cookiejar=None):
1400 import http.cookiejar
1401 if cookiejar is None:
1402 cookiejar = http.cookiejar.CookieJar()
1403 self.cookiejar = cookiejar
1404
1405 def http_request(self, request):
1406 self.cookiejar.add_cookie_header(request)
1407 return request
1408
1409 def http_response(self, request, response):
1410 self.cookiejar.extract_cookies(response, request)
1411 return response
1412
1413 https_request = http_request
1414 https_response = http_response
1415
1416class UnknownHandler(BaseHandler):
1417 def unknown_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001418 type = req.type
Georg Brandl13e89462008-07-01 19:56:00 +00001419 raise URLError('unknown url type: %s' % type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001420
1421def parse_keqv_list(l):
1422 """Parse list of key=value strings where keys are not duplicated."""
1423 parsed = {}
1424 for elt in l:
1425 k, v = elt.split('=', 1)
1426 if v[0] == '"' and v[-1] == '"':
1427 v = v[1:-1]
1428 parsed[k] = v
1429 return parsed
1430
1431def parse_http_list(s):
1432 """Parse lists as described by RFC 2068 Section 2.
1433
1434 In particular, parse comma-separated lists where the elements of
1435 the list may include quoted-strings. A quoted-string could
1436 contain a comma. A non-quoted string could have quotes in the
1437 middle. Neither commas nor quotes count if they are escaped.
1438 Only double-quotes count, not single-quotes.
1439 """
1440 res = []
1441 part = ''
1442
1443 escape = quote = False
1444 for cur in s:
1445 if escape:
1446 part += cur
1447 escape = False
1448 continue
1449 if quote:
1450 if cur == '\\':
1451 escape = True
1452 continue
1453 elif cur == '"':
1454 quote = False
1455 part += cur
1456 continue
1457
1458 if cur == ',':
1459 res.append(part)
1460 part = ''
1461 continue
1462
1463 if cur == '"':
1464 quote = True
1465
1466 part += cur
1467
1468 # append last part
1469 if part:
1470 res.append(part)
1471
1472 return [part.strip() for part in res]
1473
1474class FileHandler(BaseHandler):
1475 # Use local file or FTP depending on form of URL
1476 def file_open(self, req):
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001477 url = req.selector
Senthil Kumaran2ef16322010-07-11 03:12:43 +00001478 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1479 req.host != 'localhost'):
Senthil Kumaranbc07ac52014-07-22 00:15:20 -07001480 if not req.host in self.get_names():
Senthil Kumaran383c32d2010-10-14 11:57:35 +00001481 raise URLError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001482 else:
1483 return self.open_local_file(req)
1484
1485 # names for the localhost
1486 names = None
1487 def get_names(self):
1488 if FileHandler.names is None:
1489 try:
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00001490 FileHandler.names = tuple(
1491 socket.gethostbyname_ex('localhost')[2] +
1492 socket.gethostbyname_ex(socket.gethostname())[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001493 except socket.gaierror:
1494 FileHandler.names = (socket.gethostbyname('localhost'),)
1495 return FileHandler.names
1496
1497 # not entirely sure what the rules are here
1498 def open_local_file(self, req):
1499 import email.utils
1500 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001501 host = req.host
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001502 filename = req.selector
1503 localfile = url2pathname(filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001504 try:
1505 stats = os.stat(localfile)
1506 size = stats.st_size
1507 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001508 mtype = mimetypes.guess_type(filename)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001509 headers = email.message_from_string(
1510 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1511 (mtype or 'text/plain', size, modified))
1512 if host:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001513 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001514 if not host or \
1515 (not port and _safe_gethostbyname(host) in self.get_names()):
Senthil Kumaran06f5a532010-05-08 05:12:05 +00001516 if host:
1517 origurl = 'file://' + host + filename
1518 else:
1519 origurl = 'file://' + filename
1520 return addinfourl(open(localfile, 'rb'), headers, origurl)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001521 except OSError as exp:
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001522 raise URLError(exp)
Georg Brandl13e89462008-07-01 19:56:00 +00001523 raise URLError('file not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001524
1525def _safe_gethostbyname(host):
1526 try:
1527 return socket.gethostbyname(host)
1528 except socket.gaierror:
1529 return None
1530
1531class FTPHandler(BaseHandler):
1532 def ftp_open(self, req):
1533 import ftplib
1534 import mimetypes
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001535 host = req.host
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001536 if not host:
Georg Brandl13e89462008-07-01 19:56:00 +00001537 raise URLError('ftp error: no host given')
Cheryl Sabella0250de42018-04-25 16:51:54 -07001538 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001539 if port is None:
1540 port = ftplib.FTP_PORT
1541 else:
1542 port = int(port)
1543
1544 # username/password handling
Cheryl Sabella0250de42018-04-25 16:51:54 -07001545 user, host = _splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001546 if user:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001547 user, passwd = _splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001548 else:
1549 passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00001550 host = unquote(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +00001551 user = user or ''
1552 passwd = passwd or ''
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001553
1554 try:
1555 host = socket.gethostbyname(host)
Andrew Svetlov0832af62012-12-18 23:10:48 +02001556 except OSError as msg:
Georg Brandl13e89462008-07-01 19:56:00 +00001557 raise URLError(msg)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001558 path, attrs = _splitattr(req.selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001559 dirs = path.split('/')
Georg Brandl13e89462008-07-01 19:56:00 +00001560 dirs = list(map(unquote, dirs))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001561 dirs, file = dirs[:-1], dirs[-1]
1562 if dirs and not dirs[0]:
1563 dirs = dirs[1:]
1564 try:
1565 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1566 type = file and 'I' or 'D'
1567 for attr in attrs:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001568 attr, value = _splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001569 if attr.lower() == 'type' and \
1570 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1571 type = value.upper()
1572 fp, retrlen = fw.retrfile(file, type)
1573 headers = ""
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001574 mtype = mimetypes.guess_type(req.full_url)[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001575 if mtype:
1576 headers += "Content-type: %s\n" % mtype
1577 if retrlen is not None and retrlen >= 0:
1578 headers += "Content-length: %d\n" % retrlen
1579 headers = email.message_from_string(headers)
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +00001580 return addinfourl(fp, headers, req.full_url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07001581 except ftplib.all_errors as exp:
1582 exc = URLError('ftp error: %r' % exp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001583 raise exc.with_traceback(sys.exc_info()[2])
1584
1585 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001586 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1587 persistent=False)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001588
1589class CacheFTPHandler(FTPHandler):
1590 # XXX would be nice to have pluggable cache strategies
1591 # XXX this stuff is definitely not thread safe
1592 def __init__(self):
1593 self.cache = {}
1594 self.timeout = {}
1595 self.soonest = 0
1596 self.delay = 60
1597 self.max_conns = 16
1598
1599 def setTimeout(self, t):
1600 self.delay = t
1601
1602 def setMaxConns(self, m):
1603 self.max_conns = m
1604
1605 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1606 key = user, host, port, '/'.join(dirs), timeout
1607 if key in self.cache:
1608 self.timeout[key] = time.time() + self.delay
1609 else:
1610 self.cache[key] = ftpwrapper(user, passwd, host, port,
1611 dirs, timeout)
1612 self.timeout[key] = time.time() + self.delay
1613 self.check_cache()
1614 return self.cache[key]
1615
1616 def check_cache(self):
1617 # first check for old ones
1618 t = time.time()
1619 if self.soonest <= t:
1620 for k, v in list(self.timeout.items()):
1621 if v < t:
1622 self.cache[k].close()
1623 del self.cache[k]
1624 del self.timeout[k]
1625 self.soonest = min(list(self.timeout.values()))
1626
1627 # then check the size
1628 if len(self.cache) == self.max_conns:
1629 for k, v in list(self.timeout.items()):
1630 if v == self.soonest:
1631 del self.cache[k]
1632 del self.timeout[k]
1633 break
1634 self.soonest = min(list(self.timeout.values()))
1635
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001636 def clear_cache(self):
1637 for conn in self.cache.values():
1638 conn.close()
1639 self.cache.clear()
1640 self.timeout.clear()
1641
Antoine Pitroudf204be2012-11-24 17:59:08 +01001642class DataHandler(BaseHandler):
1643 def data_open(self, req):
1644 # data URLs as specified in RFC 2397.
1645 #
1646 # ignores POSTed data
1647 #
1648 # syntax:
1649 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1650 # mediatype := [ type "/" subtype ] *( ";" parameter )
1651 # data := *urlchar
1652 # parameter := attribute "=" value
1653 url = req.full_url
1654
1655 scheme, data = url.split(":",1)
1656 mediatype, data = data.split(",",1)
1657
1658 # even base64 encoded data URLs might be quoted so unquote in any case:
1659 data = unquote_to_bytes(data)
1660 if mediatype.endswith(";base64"):
1661 data = base64.decodebytes(data)
1662 mediatype = mediatype[:-7]
1663
1664 if not mediatype:
1665 mediatype = "text/plain;charset=US-ASCII"
1666
1667 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1668 (mediatype, len(data)))
1669
1670 return addinfourl(io.BytesIO(data), headers, url)
1671
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02001672
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001673# Code move from the old urllib module
1674
1675MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1676
1677# Helper for non-unix systems
Ronald Oussoren94f25282010-05-05 19:11:21 +00001678if os.name == 'nt':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001679 from nturl2path import url2pathname, pathname2url
1680else:
1681 def url2pathname(pathname):
1682 """OS-specific conversion from a relative URL of the 'file' scheme
1683 to a file system path; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001684 return unquote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001685
1686 def pathname2url(pathname):
1687 """OS-specific conversion from a file system path to a relative URL
1688 of the 'file' scheme; not recommended for general use."""
Georg Brandl13e89462008-07-01 19:56:00 +00001689 return quote(pathname)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001690
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001691
1692ftpcache = {}
Senthil Kumarana2a9ddd2017-04-08 23:27:25 -07001693
1694
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001695class URLopener:
1696 """Class to open URLs.
1697 This is a class rather than just a subroutine because we may need
1698 more than one set of global protocol-specific options.
1699 Note -- this is a base class for those who don't want the
1700 automatic handling of errors type 302 (relocated) and 401
1701 (authorization needed)."""
1702
1703 __tempfiles = None
1704
1705 version = "Python-urllib/%s" % __version__
1706
1707 # Constructor
1708 def __init__(self, proxies=None, **x509):
Georg Brandlfcbdbf22012-06-24 19:56:31 +02001709 msg = "%(class)s style of invoking requests is deprecated. " \
Senthil Kumaran38b968b92012-03-14 13:43:53 -07001710 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1711 warnings.warn(msg, DeprecationWarning, stacklevel=3)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001712 if proxies is None:
1713 proxies = getproxies()
1714 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1715 self.proxies = proxies
1716 self.key_file = x509.get('key_file')
1717 self.cert_file = x509.get('cert_file')
Raymond Hettingerb7f3c942016-09-09 16:44:53 -07001718 self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001719 self.__tempfiles = []
1720 self.__unlink = os.unlink # See cleanup()
1721 self.tempcache = None
1722 # Undocumented feature: if you assign {} to tempcache,
1723 # it is used to cache files retrieved with
1724 # self.retrieve(). This is not enabled by default
1725 # since it does not work for changing documents (and I
1726 # haven't got the logic to check expiration headers
1727 # yet).
1728 self.ftpcache = ftpcache
1729 # Undocumented feature: you can use a different
1730 # ftp cache by assigning to the .ftpcache member;
1731 # in case you want logically independent URL openers
1732 # XXX This is not threadsafe. Bah.
1733
1734 def __del__(self):
1735 self.close()
1736
1737 def close(self):
1738 self.cleanup()
1739
1740 def cleanup(self):
1741 # This code sometimes runs when the rest of this module
1742 # has already been deleted, so it can't use any globals
1743 # or import anything.
1744 if self.__tempfiles:
1745 for file in self.__tempfiles:
1746 try:
1747 self.__unlink(file)
1748 except OSError:
1749 pass
1750 del self.__tempfiles[:]
1751 if self.tempcache:
1752 self.tempcache.clear()
1753
1754 def addheader(self, *args):
1755 """Add a header to be used by the HTTP interface only
1756 e.g. u.addheader('Accept', 'sound/basic')"""
1757 self.addheaders.append(args)
1758
1759 # External interface
1760 def open(self, fullurl, data=None):
1761 """Use URLopener().open(file) instead of open(file, 'r')."""
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001762 fullurl = unwrap(_to_bytes(fullurl))
Senthil Kumaran734f0592010-02-20 22:19:04 +00001763 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001764 if self.tempcache and fullurl in self.tempcache:
1765 filename, headers = self.tempcache[fullurl]
1766 fp = open(filename, 'rb')
Georg Brandl13e89462008-07-01 19:56:00 +00001767 return addinfourl(fp, headers, fullurl)
Cheryl Sabella0250de42018-04-25 16:51:54 -07001768 urltype, url = _splittype(fullurl)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001769 if not urltype:
1770 urltype = 'file'
1771 if urltype in self.proxies:
1772 proxy = self.proxies[urltype]
Cheryl Sabella0250de42018-04-25 16:51:54 -07001773 urltype, proxyhost = _splittype(proxy)
1774 host, selector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001775 url = (host, fullurl) # Signal special case to open_*()
1776 else:
1777 proxy = None
1778 name = 'open_' + urltype
1779 self.type = urltype
1780 name = name.replace('-', '_')
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001781 if not hasattr(self, name) or name == 'open_local_file':
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001782 if proxy:
1783 return self.open_unknown_proxy(proxy, fullurl, data)
1784 else:
1785 return self.open_unknown(fullurl, data)
1786 try:
1787 if data is None:
1788 return getattr(self, name)(url)
1789 else:
1790 return getattr(self, name)(url, data)
Senthil Kumaranf5776862012-10-21 13:30:02 -07001791 except (HTTPError, URLError):
Antoine Pitrou6b4883d2011-10-12 02:54:14 +02001792 raise
Andrew Svetlov0832af62012-12-18 23:10:48 +02001793 except OSError as msg:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001794 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001795
1796 def open_unknown(self, fullurl, data=None):
1797 """Overridable interface to open unknown URL type."""
Cheryl Sabella0250de42018-04-25 16:51:54 -07001798 type, url = _splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001799 raise OSError('url error', 'unknown url type', type)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001800
1801 def open_unknown_proxy(self, proxy, fullurl, data=None):
1802 """Overridable interface to open unknown URL type."""
Cheryl Sabella0250de42018-04-25 16:51:54 -07001803 type, url = _splittype(fullurl)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001804 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001805
1806 # External interface
1807 def retrieve(self, url, filename=None, reporthook=None, data=None):
1808 """retrieve(url) returns (filename, headers) for a local object
1809 or (tempfilename, headers) for a remote object."""
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001810 url = unwrap(_to_bytes(url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001811 if self.tempcache and url in self.tempcache:
1812 return self.tempcache[url]
Cheryl Sabella0250de42018-04-25 16:51:54 -07001813 type, url1 = _splittype(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001814 if filename is None and (not type or type == 'file'):
1815 try:
1816 fp = self.open_local_file(url1)
1817 hdrs = fp.info()
Philip Jenveycb134d72009-12-03 02:45:01 +00001818 fp.close()
Xtreakc661b302019-05-19 19:10:06 +05301819 return url2pathname(_splithost(url1)[1]), hdrs
Pablo Galindo293dd232019-11-19 21:34:03 +00001820 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001821 pass
1822 fp = self.open(url, data)
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001823 try:
1824 headers = fp.info()
1825 if filename:
1826 tfp = open(filename, 'wb')
1827 else:
Xtreakc661b302019-05-19 19:10:06 +05301828 garbage, path = _splittype(url)
1829 garbage, path = _splithost(path or "")
1830 path, garbage = _splitquery(path or "")
1831 path, garbage = _splitattr(path or "")
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001832 suffix = os.path.splitext(path)[1]
1833 (fd, filename) = tempfile.mkstemp(suffix)
1834 self.__tempfiles.append(filename)
1835 tfp = os.fdopen(fd, 'wb')
1836 try:
1837 result = filename, headers
1838 if self.tempcache is not None:
1839 self.tempcache[url] = result
1840 bs = 1024*8
1841 size = -1
1842 read = 0
1843 blocknum = 0
Senthil Kumarance260142011-11-01 01:35:17 +08001844 if "content-length" in headers:
1845 size = int(headers["Content-Length"])
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001846 if reporthook:
Benjamin Peterson5f28b7b2009-03-26 21:49:58 +00001847 reporthook(blocknum, bs, size)
1848 while 1:
1849 block = fp.read(bs)
1850 if not block:
1851 break
1852 read += len(block)
1853 tfp.write(block)
1854 blocknum += 1
1855 if reporthook:
1856 reporthook(blocknum, bs, size)
1857 finally:
1858 tfp.close()
1859 finally:
1860 fp.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001861
1862 # raise exception if actual size does not match content-length header
1863 if size >= 0 and read < size:
Georg Brandl13e89462008-07-01 19:56:00 +00001864 raise ContentTooShortError(
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001865 "retrieval incomplete: got only %i out of %i bytes"
1866 % (read, size), result)
1867
1868 return result
1869
1870 # Each method named open_<type> knows how to open that type of URL
1871
1872 def _open_generic_http(self, connection_factory, url, data):
1873 """Make an HTTP connection using connection_class.
1874
1875 This is an internal method that should be called from
1876 open_http() or open_https().
1877
1878 Arguments:
1879 - connection_factory should take a host name and return an
1880 HTTPConnection instance.
1881 - url is the url to retrieval or a host, relative-path pair.
1882 - data is payload for a POST request or None.
1883 """
1884
1885 user_passwd = None
1886 proxy_passwd= None
1887 if isinstance(url, str):
Cheryl Sabella0250de42018-04-25 16:51:54 -07001888 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001889 if host:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001890 user_passwd, host = _splituser(host)
Georg Brandl13e89462008-07-01 19:56:00 +00001891 host = unquote(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001892 realhost = host
1893 else:
1894 host, selector = url
1895 # check whether the proxy contains authorization information
Cheryl Sabella0250de42018-04-25 16:51:54 -07001896 proxy_passwd, host = _splituser(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001897 # now we proceed with the url we want to obtain
Cheryl Sabella0250de42018-04-25 16:51:54 -07001898 urltype, rest = _splittype(selector)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001899 url = rest
1900 user_passwd = None
1901 if urltype.lower() != 'http':
1902 realhost = None
1903 else:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001904 realhost, rest = _splithost(rest)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001905 if realhost:
Cheryl Sabella0250de42018-04-25 16:51:54 -07001906 user_passwd, realhost = _splituser(realhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001907 if user_passwd:
1908 selector = "%s://%s%s" % (urltype, realhost, rest)
1909 if proxy_bypass(realhost):
1910 host = realhost
1911
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001912 if not host: raise OSError('http error', 'no host given')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001913
1914 if proxy_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001915 proxy_passwd = unquote(proxy_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001916 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001917 else:
1918 proxy_auth = None
1919
1920 if user_passwd:
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001921 user_passwd = unquote(user_passwd)
Senthil Kumaran5626eec2010-08-04 17:46:23 +00001922 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001923 else:
1924 auth = None
1925 http_conn = connection_factory(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001926 headers = {}
1927 if proxy_auth:
1928 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1929 if auth:
1930 headers["Authorization"] = "Basic %s" % auth
1931 if realhost:
1932 headers["Host"] = realhost
Senthil Kumarand91ffca2011-03-19 17:25:27 +08001933
1934 # Add Connection:close as we don't support persistent connections yet.
1935 # This helps in closing the socket and avoiding ResourceWarning
1936
1937 headers["Connection"] = "close"
1938
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001939 for header, value in self.addheaders:
1940 headers[header] = value
1941
1942 if data is not None:
1943 headers["Content-Type"] = "application/x-www-form-urlencoded"
1944 http_conn.request("POST", selector, data, headers)
1945 else:
1946 http_conn.request("GET", selector, headers=headers)
1947
1948 try:
1949 response = http_conn.getresponse()
1950 except http.client.BadStatusLine:
1951 # something went wrong with the HTTP status line
Georg Brandl13e89462008-07-01 19:56:00 +00001952 raise URLError("http protocol error: bad status line")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001953
1954 # According to RFC 2616, "2xx" code indicates that the client's
1955 # request was successfully received, understood, and accepted.
1956 if 200 <= response.status < 300:
Antoine Pitroub353c122009-02-11 00:39:14 +00001957 return addinfourl(response, response.msg, "http:" + url,
Georg Brandl13e89462008-07-01 19:56:00 +00001958 response.status)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001959 else:
1960 return self.http_error(
1961 url, response.fp,
1962 response.status, response.reason, response.msg, data)
1963
1964 def open_http(self, url, data=None):
1965 """Use HTTP protocol."""
1966 return self._open_generic_http(http.client.HTTPConnection, url, data)
1967
1968 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1969 """Handle http errors.
1970
1971 Derived class can override this, or provide specific handlers
1972 named http_error_DDD where DDD is the 3-digit error code."""
1973 # First check if there's a specific handler for this error
1974 name = 'http_error_%d' % errcode
1975 if hasattr(self, name):
1976 method = getattr(self, name)
1977 if data is None:
1978 result = method(url, fp, errcode, errmsg, headers)
1979 else:
1980 result = method(url, fp, errcode, errmsg, headers, data)
1981 if result: return result
1982 return self.http_error_default(url, fp, errcode, errmsg, headers)
1983
1984 def http_error_default(self, url, fp, errcode, errmsg, headers):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001985 """Default error handler: close the connection and raise OSError."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001986 fp.close()
Georg Brandl13e89462008-07-01 19:56:00 +00001987 raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001988
1989 if _have_ssl:
1990 def _https_connection(self, host):
1991 return http.client.HTTPSConnection(host,
1992 key_file=self.key_file,
1993 cert_file=self.cert_file)
1994
1995 def open_https(self, url, data=None):
1996 """Use HTTPS protocol."""
1997 return self._open_generic_http(self._https_connection, url, data)
1998
1999 def open_file(self, url):
2000 """Use local file or FTP depending on form of URL."""
2001 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002002 raise URLError('file error: proxy support for file protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002003 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
Senthil Kumaran383c32d2010-10-14 11:57:35 +00002004 raise ValueError("file:// scheme is supported only on localhost")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002005 else:
2006 return self.open_local_file(url)
2007
2008 def open_local_file(self, url):
2009 """Use local file."""
Senthil Kumaran6c5bd402011-11-01 23:20:31 +08002010 import email.utils
2011 import mimetypes
Cheryl Sabella0250de42018-04-25 16:51:54 -07002012 host, file = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002013 localname = url2pathname(file)
2014 try:
2015 stats = os.stat(localname)
2016 except OSError as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -07002017 raise URLError(e.strerror, e.filename)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002018 size = stats.st_size
2019 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
2020 mtype = mimetypes.guess_type(url)[0]
2021 headers = email.message_from_string(
2022 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
2023 (mtype or 'text/plain', size, modified))
2024 if not host:
2025 urlfile = file
2026 if file[:1] == '/':
2027 urlfile = 'file://' + file
Georg Brandl13e89462008-07-01 19:56:00 +00002028 return addinfourl(open(localname, 'rb'), headers, urlfile)
Cheryl Sabella0250de42018-04-25 16:51:54 -07002029 host, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002030 if (not port
Senthil Kumaran40d80782012-10-22 09:43:04 -07002031 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002032 urlfile = file
2033 if file[:1] == '/':
2034 urlfile = 'file://' + file
Senthil Kumaran3800ea92012-01-21 11:52:48 +08002035 elif file[:2] == './':
2036 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
Georg Brandl13e89462008-07-01 19:56:00 +00002037 return addinfourl(open(localname, 'rb'), headers, urlfile)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002038 raise URLError('local file error: not on local host')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002039
2040 def open_ftp(self, url):
2041 """Use FTP protocol."""
2042 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002043 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002044 import mimetypes
Cheryl Sabella0250de42018-04-25 16:51:54 -07002045 host, path = _splithost(url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002046 if not host: raise URLError('ftp error: no host given')
Cheryl Sabella0250de42018-04-25 16:51:54 -07002047 host, port = _splitport(host)
2048 user, host = _splituser(host)
2049 if user: user, passwd = _splitpasswd(user)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002050 else: passwd = None
Georg Brandl13e89462008-07-01 19:56:00 +00002051 host = unquote(host)
2052 user = unquote(user or '')
2053 passwd = unquote(passwd or '')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002054 host = socket.gethostbyname(host)
2055 if not port:
2056 import ftplib
2057 port = ftplib.FTP_PORT
2058 else:
2059 port = int(port)
Cheryl Sabella0250de42018-04-25 16:51:54 -07002060 path, attrs = _splitattr(path)
Georg Brandl13e89462008-07-01 19:56:00 +00002061 path = unquote(path)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002062 dirs = path.split('/')
2063 dirs, file = dirs[:-1], dirs[-1]
2064 if dirs and not dirs[0]: dirs = dirs[1:]
2065 if dirs and not dirs[0]: dirs[0] = '/'
2066 key = user, host, port, '/'.join(dirs)
2067 # XXX thread unsafe!
2068 if len(self.ftpcache) > MAXFTPCACHE:
2069 # Prune the cache, rather arbitrarily
Benjamin Peterson3c2dca62014-06-07 15:08:04 -07002070 for k in list(self.ftpcache):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002071 if k != key:
2072 v = self.ftpcache[k]
2073 del self.ftpcache[k]
2074 v.close()
2075 try:
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002076 if key not in self.ftpcache:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002077 self.ftpcache[key] = \
2078 ftpwrapper(user, passwd, host, port, dirs)
2079 if not file: type = 'D'
2080 else: type = 'I'
2081 for attr in attrs:
Cheryl Sabella0250de42018-04-25 16:51:54 -07002082 attr, value = _splitvalue(attr)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002083 if attr.lower() == 'type' and \
2084 value in ('a', 'A', 'i', 'I', 'd', 'D'):
2085 type = value.upper()
2086 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2087 mtype = mimetypes.guess_type("ftp:" + url)[0]
2088 headers = ""
2089 if mtype:
2090 headers += "Content-Type: %s\n" % mtype
2091 if retrlen is not None and retrlen >= 0:
2092 headers += "Content-Length: %d\n" % retrlen
2093 headers = email.message_from_string(headers)
Georg Brandl13e89462008-07-01 19:56:00 +00002094 return addinfourl(fp, headers, "ftp:" + url)
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002095 except ftperrors() as exp:
2096 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002097
2098 def open_data(self, url, data=None):
2099 """Use "data" URL."""
2100 if not isinstance(url, str):
Senthil Kumaran3ebef362012-10-21 18:31:25 -07002101 raise URLError('data error: proxy support for data protocol currently not implemented')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002102 # ignore POSTed data
2103 #
2104 # syntax of data URLs:
2105 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2106 # mediatype := [ type "/" subtype ] *( ";" parameter )
2107 # data := *urlchar
2108 # parameter := attribute "=" value
2109 try:
2110 [type, data] = url.split(',', 1)
2111 except ValueError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002112 raise OSError('data error', 'bad data URL')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002113 if not type:
2114 type = 'text/plain;charset=US-ASCII'
2115 semi = type.rfind(';')
2116 if semi >= 0 and '=' not in type[semi:]:
2117 encoding = type[semi+1:]
2118 type = type[:semi]
2119 else:
2120 encoding = ''
2121 msg = []
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00002122 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002123 time.gmtime(time.time())))
2124 msg.append('Content-type: %s' % type)
2125 if encoding == 'base64':
Georg Brandl706824f2009-06-04 09:42:55 +00002126 # XXX is this encoding/decoding ok?
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00002127 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002128 else:
Georg Brandl13e89462008-07-01 19:56:00 +00002129 data = unquote(data)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002130 msg.append('Content-Length: %d' % len(data))
2131 msg.append('')
2132 msg.append(data)
2133 msg = '\n'.join(msg)
Georg Brandl13e89462008-07-01 19:56:00 +00002134 headers = email.message_from_string(msg)
2135 f = io.StringIO(msg)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002136 #f.fileno = None # needed for addinfourl
Georg Brandl13e89462008-07-01 19:56:00 +00002137 return addinfourl(f, headers, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002138
2139
2140class FancyURLopener(URLopener):
2141 """Derived class with handlers for errors we can handle (perhaps)."""
2142
2143 def __init__(self, *args, **kwargs):
2144 URLopener.__init__(self, *args, **kwargs)
2145 self.auth_cache = {}
2146 self.tries = 0
2147 self.maxtries = 10
2148
2149 def http_error_default(self, url, fp, errcode, errmsg, headers):
2150 """Default error handling -- don't raise an exception."""
Georg Brandl13e89462008-07-01 19:56:00 +00002151 return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002152
2153 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2154 """Error 302 -- relocated (temporarily)."""
2155 self.tries += 1
Martin Pantera0370222016-02-04 06:01:35 +00002156 try:
2157 if self.maxtries and self.tries >= self.maxtries:
2158 if hasattr(self, "http_error_500"):
2159 meth = self.http_error_500
2160 else:
2161 meth = self.http_error_default
2162 return meth(url, fp, 500,
2163 "Internal Server Error: Redirect Recursion",
2164 headers)
2165 result = self.redirect_internal(url, fp, errcode, errmsg,
2166 headers, data)
2167 return result
2168 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002169 self.tries = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002170
2171 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2172 if 'location' in headers:
2173 newurl = headers['location']
2174 elif 'uri' in headers:
2175 newurl = headers['uri']
2176 else:
2177 return
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002178 fp.close()
guido@google.coma119df92011-03-29 11:41:02 -07002179
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002180 # In case the server sent a relative URL, join with original:
Georg Brandl13e89462008-07-01 19:56:00 +00002181 newurl = urljoin(self.type + ":" + url, newurl)
guido@google.coma119df92011-03-29 11:41:02 -07002182
2183 urlparts = urlparse(newurl)
2184
2185 # For security reasons, we don't allow redirection to anything other
2186 # than http, https and ftp.
2187
2188 # We are using newer HTTPError with older redirect_internal method
2189 # This older method will get deprecated in 3.3
2190
Senthil Kumaran6497aa32012-01-04 13:46:59 +08002191 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
guido@google.coma119df92011-03-29 11:41:02 -07002192 raise HTTPError(newurl, errcode,
2193 errmsg +
2194 " Redirection to url '%s' is not allowed." % newurl,
2195 headers, fp)
2196
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002197 return self.open(newurl)
2198
2199 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2200 """Error 301 -- also relocated (permanently)."""
2201 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2202
2203 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2204 """Error 303 -- also relocated (essentially identical to 302)."""
2205 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2206
2207 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2208 """Error 307 -- relocated, but turn POST into error."""
2209 if data is None:
2210 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2211 else:
2212 return self.http_error_default(url, fp, errcode, errmsg, headers)
2213
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002214 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2215 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002216 """Error 401 -- authentication required.
2217 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002218 if 'www-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002219 URLopener.http_error_default(self, url, fp,
2220 errcode, errmsg, headers)
2221 stuff = headers['www-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002222 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2223 if not match:
2224 URLopener.http_error_default(self, url, fp,
2225 errcode, errmsg, headers)
2226 scheme, realm = match.groups()
2227 if scheme.lower() != 'basic':
2228 URLopener.http_error_default(self, url, fp,
2229 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002230 if not retry:
2231 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2232 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002233 name = 'retry_' + self.type + '_basic_auth'
2234 if data is None:
2235 return getattr(self,name)(url, realm)
2236 else:
2237 return getattr(self,name)(url, realm, data)
2238
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002239 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2240 retry=False):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002241 """Error 407 -- proxy authentication required.
2242 This function supports Basic authentication only."""
Senthil Kumaran34d38dc2011-10-20 02:48:01 +08002243 if 'proxy-authenticate' not in headers:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002244 URLopener.http_error_default(self, url, fp,
2245 errcode, errmsg, headers)
2246 stuff = headers['proxy-authenticate']
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002247 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2248 if not match:
2249 URLopener.http_error_default(self, url, fp,
2250 errcode, errmsg, headers)
2251 scheme, realm = match.groups()
2252 if scheme.lower() != 'basic':
2253 URLopener.http_error_default(self, url, fp,
2254 errcode, errmsg, headers)
Senthil Kumaran80f1b052010-06-18 15:08:18 +00002255 if not retry:
2256 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2257 headers)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002258 name = 'retry_proxy_' + self.type + '_basic_auth'
2259 if data is None:
2260 return getattr(self,name)(url, realm)
2261 else:
2262 return getattr(self,name)(url, realm, data)
2263
2264 def retry_proxy_http_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002265 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002266 newurl = 'http://' + host + selector
2267 proxy = self.proxies['http']
Cheryl Sabella0250de42018-04-25 16:51:54 -07002268 urltype, proxyhost = _splittype(proxy)
2269 proxyhost, proxyselector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002270 i = proxyhost.find('@') + 1
2271 proxyhost = proxyhost[i:]
2272 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2273 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002274 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002275 quote(passwd, safe=''), proxyhost)
2276 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2277 if data is None:
2278 return self.open(newurl)
2279 else:
2280 return self.open(newurl, data)
2281
2282 def retry_proxy_https_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002283 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002284 newurl = 'https://' + host + selector
2285 proxy = self.proxies['https']
Cheryl Sabella0250de42018-04-25 16:51:54 -07002286 urltype, proxyhost = _splittype(proxy)
2287 proxyhost, proxyselector = _splithost(proxyhost)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002288 i = proxyhost.find('@') + 1
2289 proxyhost = proxyhost[i:]
2290 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2291 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002292 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002293 quote(passwd, safe=''), proxyhost)
2294 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2295 if data is None:
2296 return self.open(newurl)
2297 else:
2298 return self.open(newurl, data)
2299
2300 def retry_http_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002301 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002302 i = host.find('@') + 1
2303 host = host[i:]
2304 user, passwd = self.get_user_passwd(host, realm, i)
2305 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002306 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002307 quote(passwd, safe=''), host)
2308 newurl = 'http://' + host + selector
2309 if data is None:
2310 return self.open(newurl)
2311 else:
2312 return self.open(newurl, data)
2313
2314 def retry_https_basic_auth(self, url, realm, data=None):
Cheryl Sabella0250de42018-04-25 16:51:54 -07002315 host, selector = _splithost(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002316 i = host.find('@') + 1
2317 host = host[i:]
2318 user, passwd = self.get_user_passwd(host, realm, i)
2319 if not (user or passwd): return None
Georg Brandl13e89462008-07-01 19:56:00 +00002320 host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002321 quote(passwd, safe=''), host)
2322 newurl = 'https://' + host + selector
2323 if data is None:
2324 return self.open(newurl)
2325 else:
2326 return self.open(newurl, data)
2327
Florent Xicluna757445b2010-05-17 17:24:07 +00002328 def get_user_passwd(self, host, realm, clear_cache=0):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002329 key = realm + '@' + host.lower()
2330 if key in self.auth_cache:
2331 if clear_cache:
2332 del self.auth_cache[key]
2333 else:
2334 return self.auth_cache[key]
2335 user, passwd = self.prompt_user_passwd(host, realm)
2336 if user or passwd: self.auth_cache[key] = (user, passwd)
2337 return user, passwd
2338
2339 def prompt_user_passwd(self, host, realm):
2340 """Override this in a GUI environment!"""
2341 import getpass
2342 try:
2343 user = input("Enter username for %s at %s: " % (realm, host))
2344 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2345 (user, realm, host))
2346 return user, passwd
2347 except KeyboardInterrupt:
2348 print()
2349 return None, None
2350
2351
2352# Utility functions
2353
2354_localhost = None
2355def localhost():
2356 """Return the IP address of the magic hostname 'localhost'."""
2357 global _localhost
2358 if _localhost is None:
2359 _localhost = socket.gethostbyname('localhost')
2360 return _localhost
2361
2362_thishost = None
2363def thishost():
Senthil Kumaran99b2c8f2009-12-27 10:13:39 +00002364 """Return the IP addresses of the current host."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002365 global _thishost
2366 if _thishost is None:
Senthil Kumarandcdadfe2013-06-01 11:12:17 -07002367 try:
2368 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2369 except socket.gaierror:
2370 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002371 return _thishost
2372
2373_ftperrors = None
2374def ftperrors():
2375 """Return the set of errors raised by the FTP class."""
2376 global _ftperrors
2377 if _ftperrors is None:
2378 import ftplib
2379 _ftperrors = ftplib.all_errors
2380 return _ftperrors
2381
2382_noheaders = None
2383def noheaders():
Georg Brandl13e89462008-07-01 19:56:00 +00002384 """Return an empty email Message object."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002385 global _noheaders
2386 if _noheaders is None:
Georg Brandl13e89462008-07-01 19:56:00 +00002387 _noheaders = email.message_from_string("")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002388 return _noheaders
2389
2390
2391# Utility classes
2392
2393class ftpwrapper:
2394 """Class used by open_ftp() for cache of open FTP connections."""
2395
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002396 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2397 persistent=True):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002398 self.user = user
2399 self.passwd = passwd
2400 self.host = host
2401 self.port = port
2402 self.dirs = dirs
2403 self.timeout = timeout
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002404 self.refcount = 0
2405 self.keepalive = persistent
Victor Stinnerab73e652015-04-07 12:49:27 +02002406 try:
2407 self.init()
2408 except:
2409 self.close()
2410 raise
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002411
2412 def init(self):
2413 import ftplib
2414 self.busy = 0
2415 self.ftp = ftplib.FTP()
2416 self.ftp.connect(self.host, self.port, self.timeout)
2417 self.ftp.login(self.user, self.passwd)
Senthil Kumarancaa00fe2013-06-02 11:59:47 -07002418 _target = '/'.join(self.dirs)
2419 self.ftp.cwd(_target)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002420
2421 def retrfile(self, file, type):
2422 import ftplib
2423 self.endtransfer()
2424 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2425 else: cmd = 'TYPE ' + type; isdir = 0
2426 try:
2427 self.ftp.voidcmd(cmd)
2428 except ftplib.all_errors:
2429 self.init()
2430 self.ftp.voidcmd(cmd)
2431 conn = None
2432 if file and not isdir:
2433 # Try to retrieve as a file
2434 try:
2435 cmd = 'RETR ' + file
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002436 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002437 except ftplib.error_perm as reason:
2438 if str(reason)[:3] != '550':
Benjamin Peterson901a2782013-05-12 19:01:52 -05002439 raise URLError('ftp error: %r' % reason).with_traceback(
Georg Brandl13e89462008-07-01 19:56:00 +00002440 sys.exc_info()[2])
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002441 if not conn:
2442 # Set transfer mode to ASCII!
2443 self.ftp.voidcmd('TYPE A')
2444 # Try a directory listing. Verify that directory exists.
2445 if file:
2446 pwd = self.ftp.pwd()
2447 try:
2448 try:
2449 self.ftp.cwd(file)
2450 except ftplib.error_perm as reason:
Benjamin Peterson901a2782013-05-12 19:01:52 -05002451 raise URLError('ftp error: %r' % reason) from reason
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002452 finally:
2453 self.ftp.cwd(pwd)
2454 cmd = 'LIST ' + file
2455 else:
2456 cmd = 'LIST'
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002457 conn, retrlen = self.ftp.ntransfercmd(cmd)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002458 self.busy = 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002459
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002460 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2461 self.refcount += 1
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002462 conn.close()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002463 # Pass back both a suitably decorated object and a retrieval length
Senthil Kumaran2024acd2011-03-24 11:46:19 +08002464 return (ftpobj, retrlen)
2465
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002466 def endtransfer(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002467 self.busy = 0
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002468
2469 def close(self):
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +02002470 self.keepalive = False
2471 if self.refcount <= 0:
2472 self.real_close()
2473
2474 def file_close(self):
2475 self.endtransfer()
2476 self.refcount -= 1
2477 if self.refcount <= 0 and not self.keepalive:
2478 self.real_close()
2479
2480 def real_close(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002481 self.endtransfer()
2482 try:
2483 self.ftp.close()
2484 except ftperrors():
2485 pass
2486
2487# Proxy handling
2488def getproxies_environment():
2489 """Return a dictionary of scheme -> proxy server URL mappings.
2490
2491 Scan the environment for variables named <scheme>_proxy;
2492 this seems to be the standard convention. If you need a
2493 different way, you can pass a proxies dictionary to the
2494 [Fancy]URLopener constructor.
2495
2496 """
2497 proxies = {}
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002498 # in order to prefer lowercase variables, process environment in
2499 # two passes: first matches any, second pass matches lowercase only
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002500 for name, value in os.environ.items():
2501 name = name.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002502 if value and name[-6:] == '_proxy':
2503 proxies[name[:-6]] = value
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002504 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2505 # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2506 # header from the client
Senthil Kumaran17742f22016-07-30 23:39:06 -07002507 # If "proxy" is lowercase, it will still be used thanks to the next block
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -07002508 if 'REQUEST_METHOD' in os.environ:
2509 proxies.pop('http', None)
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002510 for name, value in os.environ.items():
2511 if name[-6:] == '_proxy':
2512 name = name.lower()
2513 if value:
2514 proxies[name[:-6]] = value
2515 else:
2516 proxies.pop(name[:-6], None)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002517 return proxies
2518
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002519def proxy_bypass_environment(host, proxies=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002520 """Test if proxies should not be used for a particular host.
2521
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002522 Checks the proxy dict for the value of no_proxy, which should
2523 be a list of comma separated DNS suffixes, or '*' for all hosts.
2524
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002525 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002526 if proxies is None:
2527 proxies = getproxies_environment()
2528 # don't bypass, if no_proxy isn't specified
2529 try:
2530 no_proxy = proxies['no']
2531 except KeyError:
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002532 return False
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002533 # '*' is special case for always bypass
2534 if no_proxy == '*':
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002535 return True
2536 host = host.lower()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002537 # strip port off host
Cheryl Sabella0250de42018-04-25 16:51:54 -07002538 hostonly, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002539 # check if the host ends with any of the DNS suffixes
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002540 for name in no_proxy.split(','):
2541 name = name.strip()
Martin Panteraa279822016-04-30 01:03:40 +00002542 if name:
Xiang Zhang959ff7f2017-01-09 11:47:55 +08002543 name = name.lstrip('.') # ignore leading dots
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002544 name = name.lower()
2545 if hostonly == name or host == name:
2546 return True
2547 name = '.' + name
2548 if hostonly.endswith(name) or host.endswith(name):
2549 return True
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002550 # otherwise, don't bypass
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002551 return False
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002552
2553
Ronald Oussorene72e1612011-03-14 18:15:25 -04002554# This code tests an OSX specific data structure but is testable on all
2555# platforms
2556def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2557 """
2558 Return True iff this host shouldn't be accessed using a proxy
2559
2560 This function uses the MacOSX framework SystemConfiguration
2561 to fetch the proxy information.
2562
2563 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2564 { 'exclude_simple': bool,
2565 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2566 }
2567 """
Ronald Oussorene72e1612011-03-14 18:15:25 -04002568 from fnmatch import fnmatch
2569
Cheryl Sabella0250de42018-04-25 16:51:54 -07002570 hostonly, port = _splitport(host)
Ronald Oussorene72e1612011-03-14 18:15:25 -04002571
2572 def ip2num(ipAddr):
2573 parts = ipAddr.split('.')
2574 parts = list(map(int, parts))
2575 if len(parts) != 4:
2576 parts = (parts + [0, 0, 0, 0])[:4]
2577 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2578
2579 # Check for simple host names:
2580 if '.' not in host:
2581 if proxy_settings['exclude_simple']:
2582 return True
2583
2584 hostIP = None
2585
2586 for value in proxy_settings.get('exceptions', ()):
2587 # Items in the list are strings like these: *.local, 169.254/16
2588 if not value: continue
2589
2590 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2591 if m is not None:
2592 if hostIP is None:
2593 try:
2594 hostIP = socket.gethostbyname(hostonly)
2595 hostIP = ip2num(hostIP)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002596 except OSError:
Ronald Oussorene72e1612011-03-14 18:15:25 -04002597 continue
2598
2599 base = ip2num(m.group(1))
2600 mask = m.group(2)
2601 if mask is None:
2602 mask = 8 * (m.group(1).count('.') + 1)
2603 else:
2604 mask = int(mask[1:])
Ronald Oussoren93a1cca2020-10-19 20:16:21 +02002605
2606 if mask < 0 or mask > 32:
2607 # System libraries ignore invalid prefix lengths
2608 continue
2609
Ronald Oussorene72e1612011-03-14 18:15:25 -04002610 mask = 32 - mask
2611
2612 if (hostIP >> mask) == (base >> mask):
2613 return True
2614
2615 elif fnmatch(host, value):
2616 return True
2617
2618 return False
2619
2620
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002621if sys.platform == 'darwin':
Ronald Oussoren84151202010-04-18 20:46:11 +00002622 from _scproxy import _get_proxy_settings, _get_proxies
2623
2624 def proxy_bypass_macosx_sysconf(host):
Ronald Oussoren84151202010-04-18 20:46:11 +00002625 proxy_settings = _get_proxy_settings()
Ronald Oussorene72e1612011-03-14 18:15:25 -04002626 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
Ronald Oussoren84151202010-04-18 20:46:11 +00002627
2628 def getproxies_macosx_sysconf():
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002629 """Return a dictionary of scheme -> proxy server URL mappings.
2630
Ronald Oussoren84151202010-04-18 20:46:11 +00002631 This function uses the MacOSX framework SystemConfiguration
2632 to fetch the proxy information.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002633 """
Ronald Oussoren84151202010-04-18 20:46:11 +00002634 return _get_proxies()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002635
Ronald Oussoren84151202010-04-18 20:46:11 +00002636
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002637
2638 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002639 """Return True, if host should be bypassed.
2640
2641 Checks proxy settings gathered from the environment, if specified,
2642 or from the MacOSX framework SystemConfiguration.
2643
2644 """
2645 proxies = getproxies_environment()
2646 if proxies:
2647 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002648 else:
Ronald Oussoren84151202010-04-18 20:46:11 +00002649 return proxy_bypass_macosx_sysconf(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002650
2651 def getproxies():
Ronald Oussoren84151202010-04-18 20:46:11 +00002652 return getproxies_environment() or getproxies_macosx_sysconf()
2653
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002654
2655elif os.name == 'nt':
2656 def getproxies_registry():
2657 """Return a dictionary of scheme -> proxy server URL mappings.
2658
2659 Win32 uses the registry to store proxies.
2660
2661 """
2662 proxies = {}
2663 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002664 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002665 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002666 # Std module, so should be around - but you never know!
2667 return proxies
2668 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002669 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002670 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002671 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002672 'ProxyEnable')[0]
2673 if proxyEnable:
2674 # Returned as Unicode but problems if not converted to ASCII
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002675 proxyServer = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002676 'ProxyServer')[0])
2677 if '=' in proxyServer:
2678 # Per-protocol settings
2679 for p in proxyServer.split(';'):
2680 protocol, address = p.split('=', 1)
2681 # See if address has a type:// prefix
Serhiy Storchaka6a265f02020-01-05 14:14:31 +02002682 if not re.match('(?:[^/:]+)://', address):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002683 address = '%s://%s' % (protocol, address)
2684 proxies[protocol] = address
2685 else:
2686 # Use one setting for all protocols
2687 if proxyServer[:5] == 'http:':
2688 proxies['http'] = proxyServer
2689 else:
2690 proxies['http'] = 'http://%s' % proxyServer
Senthil Kumaran04f31b82010-07-14 20:10:52 +00002691 proxies['https'] = 'https://%s' % proxyServer
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002692 proxies['ftp'] = 'ftp://%s' % proxyServer
2693 internetSettings.Close()
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002694 except (OSError, ValueError, TypeError):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002695 # Either registry key not found etc, or the value in an
2696 # unexpected format.
2697 # proxies already set up to be empty so nothing to do
2698 pass
2699 return proxies
2700
2701 def getproxies():
2702 """Return a dictionary of scheme -> proxy server URL mappings.
2703
2704 Returns settings gathered from the environment, if specified,
2705 or the registry.
2706
2707 """
2708 return getproxies_environment() or getproxies_registry()
2709
2710 def proxy_bypass_registry(host):
2711 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002712 import winreg
Brett Cannoncd171c82013-07-04 17:43:24 -04002713 except ImportError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002714 # Std modules, so should be around - but you never know!
2715 return 0
2716 try:
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002717 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002718 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002719 proxyEnable = winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002720 'ProxyEnable')[0]
Georg Brandl4ed72ac2009-04-01 04:28:33 +00002721 proxyOverride = str(winreg.QueryValueEx(internetSettings,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002722 'ProxyOverride')[0])
2723 # ^^^^ Returned as Unicode but problems if not converted to ASCII
Andrew Svetlov2606a6f2012-12-19 14:33:35 +02002724 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002725 return 0
2726 if not proxyEnable or not proxyOverride:
2727 return 0
2728 # try to make a host list from name and IP address.
Cheryl Sabella0250de42018-04-25 16:51:54 -07002729 rawHost, port = _splitport(host)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002730 host = [rawHost]
2731 try:
2732 addr = socket.gethostbyname(rawHost)
2733 if addr != rawHost:
2734 host.append(addr)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002735 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002736 pass
2737 try:
2738 fqdn = socket.getfqdn(rawHost)
2739 if fqdn != rawHost:
2740 host.append(fqdn)
Andrew Svetlov0832af62012-12-18 23:10:48 +02002741 except OSError:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002742 pass
2743 # make a check value list from the registry entry: replace the
2744 # '<local>' string by the localhost entry and the corresponding
2745 # canonical entry.
2746 proxyOverride = proxyOverride.split(';')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002747 # now check if we match one of the registry values.
2748 for test in proxyOverride:
Senthil Kumaran49476062009-05-01 06:00:23 +00002749 if test == '<local>':
2750 if '.' not in rawHost:
2751 return 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002752 test = test.replace(".", r"\.") # mask dots
2753 test = test.replace("*", r".*") # change glob sequence
2754 test = test.replace("?", r".") # change glob char
2755 for val in host:
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002756 if re.match(test, val, re.I):
2757 return 1
2758 return 0
2759
2760 def proxy_bypass(host):
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002761 """Return True, if host should be bypassed.
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002762
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002763 Checks proxy settings gathered from the environment, if specified,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002764 or the registry.
2765
2766 """
Senthil Kumarana7c0ff22016-04-25 08:16:23 -07002767 proxies = getproxies_environment()
2768 if proxies:
2769 return proxy_bypass_environment(host, proxies)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002770 else:
2771 return proxy_bypass_registry(host)
2772
2773else:
2774 # By default use environment variables
2775 getproxies = getproxies_environment
2776 proxy_bypass = proxy_bypass_environment