Blame - Lib/urllib/request.py - platform/external/python/cpython2

blob: 9f5e6073416d2245d7deec2205a2ec9a066abb21 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	50	import urllib.request
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	51
				52	# set up authentication info
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	53	authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	59	proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	60
				61	# build a new opener that adds authentication and caching FTP handlers
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	62	opener = urllib.request.build_opener(proxy_support, authinfo,
				63	urllib.request.CacheFTPHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	64
				65	# install it
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	66	urllib.request.install_opener(opener)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	67
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	68	f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	69	"""
				70
				71	# XXX issues:
				72	# If an authentication error handler that tries to perform
				73	# authentication for some reason but fails, how should the error be
				74	# signalled? The client needs to know the HTTP error code. But if
				75	# the handler knows that the problem was, e.g., that it didn't know
				76	# that hash algo that requested in the challenge, it would be good to
				77	# pass that information along to the client, too.
				78	# ftp errors aren't handled cleanly
				79	# check digest against correct (i.e. non-apache) implementation
				80
				81	# Possible extensions:
				82	# complex proxies XXX not sure what exactly was meant by this
				83	# abstract factory for opener
				84
				85	import base64
				86	import email
				87	import hashlib
				88	import http.client
				89	import io
				90	import os
				91	import posixpath
				92	import random
				93	import re
				94	import socket
				95	import sys
				96	import time
				97	import urllib.parse, urllib.error, urllib.response
				98	import bisect
				99
				100	from io import StringIO
				101
				102	# check for SSL
				103	try:
				104	import ssl
				105	except:
				106	_have_ssl = False
				107	else:
				108	_have_ssl = True
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	109
				110	# used in User-Agent header sent
				111	__version__ = sys.version[:3]
				112
				113	_opener = None
				114	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				115	global _opener
				116	if _opener is None:
				117	_opener = build_opener()
				118	return _opener.open(url, data, timeout)
				119
				120	def install_opener(opener):
				121	global _opener
				122	_opener = opener
				123
				124	# TODO(jhylton): Make this work with the same global opener.
				125	_urlopener = None
				126	def urlretrieve(url, filename=None, reporthook=None, data=None):
				127	global _urlopener
				128	if not _urlopener:
				129	_urlopener = FancyURLopener()
				130	return _urlopener.retrieve(url, filename, reporthook, data)
				131
				132	def urlcleanup():
				133	if _urlopener:
				134	_urlopener.cleanup()
				135	global _opener
				136	if _opener:
				137	_opener = None
				138
				139	# copied from cookielib.py
				140	_cut_port_re = re.compile(r":\d+$")
				141	def request_host(request):
				142	"""Return request-host, as defined by RFC 2965.
				143
				144	Variation from RFC: returned value is lowercased, for convenient
				145	comparison.
				146
				147	"""
				148	url = request.get_full_url()
				149	host = urllib.parse.urlparse(url)[1]
				150	if host == "":
				151	host = request.get_header("Host", "")
				152
				153	# remove port, if present
				154	host = _cut_port_re.sub("", host, 1)
				155	return host.lower()
				156
				157	class Request:
				158
				159	def __init__(self, url, data=None, headers={},
				160	origin_req_host=None, unverifiable=False):
				161	# unwrap('<URL:type://host/path>') --> 'type://host/path'
				162	self.__original = urllib.parse.unwrap(url)
				163	self.type = None
				164	# self.__r_type is what's left after doing the splittype
				165	self.host = None
				166	self.port = None
				167	self.data = data
				168	self.headers = {}
				169	for key, value in headers.items():
				170	self.add_header(key, value)
				171	self.unredirected_hdrs = {}
				172	if origin_req_host is None:
				173	origin_req_host = request_host(self)
				174	self.origin_req_host = origin_req_host
				175	self.unverifiable = unverifiable
				176
				177	def __getattr__(self, attr):
				178	# XXX this is a fallback mechanism to guard against these
				179	# methods getting called in a non-standard order. this may be
				180	# too complicated and/or unnecessary.
				181	# XXX should the __r_XXX attributes be public?
				182	if attr[:12] == '_Request__r_':
				183	name = attr[12:]
				184	if hasattr(Request, 'get_' + name):
				185	getattr(self, 'get_' + name)()
				186	return getattr(self, attr)
				187	raise AttributeError(attr)
				188
				189	def get_method(self):
				190	if self.has_data():
				191	return "POST"
				192	else:
				193	return "GET"
				194
				195	# XXX these helper methods are lame
				196
				197	def add_data(self, data):
				198	self.data = data
				199
				200	def has_data(self):
				201	return self.data is not None
				202
				203	def get_data(self):
				204	return self.data
				205
				206	def get_full_url(self):
				207	return self.__original
				208
				209	def get_type(self):
				210	if self.type is None:
				211	self.type, self.__r_type = urllib.parse.splittype(self.__original)
				212	if self.type is None:
				213	raise ValueError("unknown url type: %s" % self.__original)
				214	return self.type
				215
				216	def get_host(self):
				217	if self.host is None:
				218	self.host, self.__r_host = urllib.parse.splithost(self.__r_type)
				219	if self.host:
				220	self.host = urllib.parse.unquote(self.host)
				221	return self.host
				222
				223	def get_selector(self):
				224	return self.__r_host
				225
				226	def set_proxy(self, host, type):
				227	self.host, self.type = host, type
				228	self.__r_host = self.__original
				229
				230	def get_origin_req_host(self):
				231	return self.origin_req_host
				232
				233	def is_unverifiable(self):
				234	return self.unverifiable
				235
				236	def add_header(self, key, val):
				237	# useful for something like authentication
				238	self.headers[key.capitalize()] = val
				239
				240	def add_unredirected_header(self, key, val):
				241	# will not be added to a redirected request
				242	self.unredirected_hdrs[key.capitalize()] = val
				243
				244	def has_header(self, header_name):
				245	return (header_name in self.headers or
				246	header_name in self.unredirected_hdrs)
				247
				248	def get_header(self, header_name, default=None):
				249	return self.headers.get(
				250	header_name,
				251	self.unredirected_hdrs.get(header_name, default))
				252
				253	def header_items(self):
				254	hdrs = self.unredirected_hdrs.copy()
				255	hdrs.update(self.headers)
				256	return list(hdrs.items())
				257
				258	class OpenerDirector:
				259	def __init__(self):
				260	client_version = "Python-urllib/%s" % __version__
				261	self.addheaders = [('User-agent', client_version)]
				262	# manage the individual handlers
				263	self.handlers = []
				264	self.handle_open = {}
				265	self.handle_error = {}
				266	self.process_response = {}
				267	self.process_request = {}
				268
				269	def add_handler(self, handler):
				270	if not hasattr(handler, "add_parent"):
				271	raise TypeError("expected BaseHandler instance, got %r" %
				272	type(handler))
				273
				274	added = False
				275	for meth in dir(handler):
				276	if meth in ["redirect_request", "do_open", "proxy_open"]:
				277	# oops, coincidental match
				278	continue
				279
				280	i = meth.find("_")
				281	protocol = meth[:i]
				282	condition = meth[i+1:]
				283
				284	if condition.startswith("error"):
				285	j = condition.find("_") + i + 1
				286	kind = meth[j+1:]
				287	try:
				288	kind = int(kind)
				289	except ValueError:
				290	pass
				291	lookup = self.handle_error.get(protocol, {})
				292	self.handle_error[protocol] = lookup
				293	elif condition == "open":
				294	kind = protocol
				295	lookup = self.handle_open
				296	elif condition == "response":
				297	kind = protocol
				298	lookup = self.process_response
				299	elif condition == "request":
				300	kind = protocol
				301	lookup = self.process_request
				302	else:
				303	continue
				304
				305	handlers = lookup.setdefault(kind, [])
				306	if handlers:
				307	bisect.insort(handlers, handler)
				308	else:
				309	handlers.append(handler)
				310	added = True
				311
				312	if added:
				313	# the handlers must work in an specific order, the order
				314	# is specified in a Handler attribute
				315	bisect.insort(self.handlers, handler)
				316	handler.add_parent(self)
				317
				318	def close(self):
				319	# Only exists for backwards compatibility.
				320	pass
				321
				322	def _call_chain(self, chain, kind, meth_name, *args):
				323	# Handlers raise an exception if no one else should try to handle
				324	# the request, or return None if they can't but another handler
				325	# could. Otherwise, they return the response.
				326	handlers = chain.get(kind, ())
				327	for handler in handlers:
				328	func = getattr(handler, meth_name)
				329
				330	result = func(*args)
				331	if result is not None:
				332	return result
				333
				334	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				335	# accept a URL or a Request object
				336	if isinstance(fullurl, str):
				337	req = Request(fullurl, data)
				338	else:
				339	req = fullurl
				340	if data is not None:
				341	req.add_data(data)
				342
				343	req.timeout = timeout
				344	protocol = req.get_type()
				345
				346	# pre-process request
				347	meth_name = protocol+"_request"
				348	for processor in self.process_request.get(protocol, []):
				349	meth = getattr(processor, meth_name)
				350	req = meth(req)
				351
				352	response = self._open(req, data)
				353
				354	# post-process response
				355	meth_name = protocol+"_response"
				356	for processor in self.process_response.get(protocol, []):
				357	meth = getattr(processor, meth_name)
				358	response = meth(req, response)
				359
				360	return response
				361
				362	def _open(self, req, data=None):
				363	result = self._call_chain(self.handle_open, 'default',
				364	'default_open', req)
				365	if result:
				366	return result
				367
				368	protocol = req.get_type()
				369	result = self._call_chain(self.handle_open, protocol, protocol +
				370	'_open', req)
				371	if result:
				372	return result
				373
				374	return self._call_chain(self.handle_open, 'unknown',
				375	'unknown_open', req)
				376
				377	def error(self, proto, *args):
				378	if proto in ('http', 'https'):
				379	# XXX http[s] protocols are special-cased
				380	dict = self.handle_error['http'] # https is not different than http
				381	proto = args[2] # YUCK!
				382	meth_name = 'http_error_%s' % proto
				383	http_err = 1
				384	orig_args = args
				385	else:
				386	dict = self.handle_error
				387	meth_name = proto + '_error'
				388	http_err = 0
				389	args = (dict, proto, meth_name) + args
				390	result = self._call_chain(*args)
				391	if result:
				392	return result
				393
				394	if http_err:
				395	args = (dict, 'default', 'http_error_default') + orig_args
				396	return self._call_chain(*args)
				397
				398	# XXX probably also want an abstract factory that knows when it makes
				399	# sense to skip a superclass in favor of a subclass and when it might
				400	# make sense to include both
				401
				402	def build_opener(*handlers):
				403	"""Create an opener object from a list of handlers.
				404
				405	The opener will use several default handlers, including support
				406	for HTTP and FTP.
				407
				408	If any of the handlers passed as arguments are subclasses of the
				409	default handlers, the default handlers will not be used.
				410	"""
				411	def isclass(obj):
				412	return isinstance(obj, type) or hasattr(obj, "__bases__")
				413
				414	opener = OpenerDirector()
				415	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				416	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				417	FTPHandler, FileHandler, HTTPErrorProcessor]
				418	if hasattr(http.client, "HTTPSConnection"):
				419	default_classes.append(HTTPSHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	420	skip = set()
				421	for klass in default_classes:
				422	for check in handlers:
				423	if isclass(check):
				424	if issubclass(check, klass):
				425	skip.add(klass)
				426	elif isinstance(check, klass):
				427	skip.add(klass)
				428	for klass in skip:
				429	default_classes.remove(klass)
				430
				431	for klass in default_classes:
				432	opener.add_handler(klass())
				433
				434	for h in handlers:
				435	if isclass(h):
				436	h = h()
				437	opener.add_handler(h)
				438	return opener
				439
				440	class BaseHandler:
				441	handler_order = 500
				442
				443	def add_parent(self, parent):
				444	self.parent = parent
				445
				446	def close(self):
				447	# Only exists for backwards compatibility
				448	pass
				449
				450	def __lt__(self, other):
				451	if not hasattr(other, "handler_order"):
				452	# Try to preserve the old behavior of having custom classes
				453	# inserted after default ones (works only for custom user
				454	# classes which are not aware of handler_order).
				455	return True
				456	return self.handler_order < other.handler_order
				457
				458
				459	class HTTPErrorProcessor(BaseHandler):
				460	"""Process HTTP error responses."""
				461	handler_order = 1000 # after all other processing
				462
				463	def http_response(self, request, response):
				464	code, msg, hdrs = response.code, response.msg, response.info()
				465
				466	# According to RFC 2616, "2xx" code indicates that the client's
				467	# request was successfully received, understood, and accepted.
				468	if not (200 <= code < 300):
				469	response = self.parent.error(
				470	'http', request, response, code, msg, hdrs)
				471
				472	return response
				473
				474	https_response = http_response
				475
				476	class HTTPDefaultErrorHandler(BaseHandler):
				477	def http_error_default(self, req, fp, code, msg, hdrs):
				478	raise urllib.error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
				479
				480	class HTTPRedirectHandler(BaseHandler):
				481	# maximum number of redirections to any single URL
				482	# this is needed because of the state that cookies introduce
				483	max_repeats = 4
				484	# maximum total number of redirections (regardless of URL) before
				485	# assuming we're in a loop
				486	max_redirections = 10
				487
				488	def redirect_request(self, req, fp, code, msg, headers, newurl):
				489	"""Return a Request or None in response to a redirect.
				490
				491	This is called by the http_error_30x methods when a
				492	redirection response is received. If a redirection should
				493	take place, return a new Request to allow http_error_30x to
				494	perform the redirect. Otherwise, raise HTTPError if no-one
				495	else should try to handle this url. Return None if you can't
				496	but another Handler might.
				497	"""
				498	m = req.get_method()
				499	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				500	or code in (301, 302, 303) and m == "POST")):
				501	raise urllib.error.HTTPError(req.get_full_url(),
				502	code, msg, headers, fp)
				503
				504	# Strictly (according to RFC 2616), 301 or 302 in response to
				505	# a POST MUST NOT cause a redirection without confirmation
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	506	# from the user (of urllib.request, in this case). In practice,
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	507	# essentially all clients do redirect in this case, so we do
				508	# the same.
				509	# be conciliant with URIs containing a space
				510	newurl = newurl.replace(' ', '%20')
				511	CONTENT_HEADERS = ("content-length", "content-type")
				512	newheaders = dict((k, v) for k, v in req.headers.items()
				513	if k.lower() not in CONTENT_HEADERS)
				514	return Request(newurl,
				515	headers=newheaders,
				516	origin_req_host=req.get_origin_req_host(),
				517	unverifiable=True)
				518
				519	# Implementation note: To avoid the server sending us into an
				520	# infinite loop, the request object needs to track what URLs we
				521	# have already seen. Do this by adding a handler-specific
				522	# attribute to the Request object.
				523	def http_error_302(self, req, fp, code, msg, headers):
				524	# Some servers (incorrectly) return multiple Location headers
				525	# (so probably same goes for URI). Use first header.
				526	if "location" in headers:
				527	newurl = headers["location"]
				528	elif "uri" in headers:
				529	newurl = headers["uri"]
				530	else:
				531	return
				532	newurl = urllib.parse.urljoin(req.get_full_url(), newurl)
				533
				534	# XXX Probably want to forget about the state of the current
				535	# request, although that might interact poorly with other
				536	# handlers that also use handler-specific request attributes
				537	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				538	if new is None:
				539	return
				540
				541	# loop detection
				542	# .redirect_dict has a key url if url was previously visited.
				543	if hasattr(req, 'redirect_dict'):
				544	visited = new.redirect_dict = req.redirect_dict
				545	if (visited.get(newurl, 0) >= self.max_repeats or
				546	len(visited) >= self.max_redirections):
				547	raise urllib.error.HTTPError(req.get_full_url(), code,
				548	self.inf_msg + msg, headers, fp)
				549	else:
				550	visited = new.redirect_dict = req.redirect_dict = {}
				551	visited[newurl] = visited.get(newurl, 0) + 1
				552
				553	# Don't close the fp until we are sure that we won't use it
				554	# with HTTPError.
				555	fp.read()
				556	fp.close()
				557
				558	return self.parent.open(new)
				559
				560	http_error_301 = http_error_303 = http_error_307 = http_error_302
				561
				562	inf_msg = "The HTTP server returned a redirect error that would " \
				563	"lead to an infinite loop.\n" \
				564	"The last 30x error message was:\n"
				565
				566
				567	def _parse_proxy(proxy):
				568	"""Return (scheme, user, password, host/port) given a URL or an authority.
				569
				570	If a URL is supplied, it must have an authority (host:port) component.
				571	According to RFC 3986, having an authority component means the URL must
				572	have two slashes after the scheme:
				573
				574	>>> _parse_proxy('file:/ftp.example.com/')
				575	Traceback (most recent call last):
				576	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				577
				578	The first three items of the returned tuple may be None.
				579
				580	Examples of authority parsing:
				581
				582	>>> _parse_proxy('proxy.example.com')
				583	(None, None, None, 'proxy.example.com')
				584	>>> _parse_proxy('proxy.example.com:3128')
				585	(None, None, None, 'proxy.example.com:3128')
				586
				587	The authority component may optionally include userinfo (assumed to be
				588	username:password):
				589
				590	>>> _parse_proxy('joe:password@proxy.example.com')
				591	(None, 'joe', 'password', 'proxy.example.com')
				592	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				593	(None, 'joe', 'password', 'proxy.example.com:3128')
				594
				595	Same examples, but with URLs instead:
				596
				597	>>> _parse_proxy('http://proxy.example.com/')
				598	('http', None, None, 'proxy.example.com')
				599	>>> _parse_proxy('http://proxy.example.com:3128/')
				600	('http', None, None, 'proxy.example.com:3128')
				601	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				602	('http', 'joe', 'password', 'proxy.example.com')
				603	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				604	('http', 'joe', 'password', 'proxy.example.com:3128')
				605
				606	Everything after the authority is ignored:
				607
				608	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				609	('ftp', 'joe', 'password', 'proxy.example.com')
				610
				611	Test for no trailing '/' case:
				612
				613	>>> _parse_proxy('http://joe:password@proxy.example.com')
				614	('http', 'joe', 'password', 'proxy.example.com')
				615
				616	"""
				617	scheme, r_scheme = urllib.parse.splittype(proxy)
				618	if not r_scheme.startswith("/"):
				619	# authority
				620	scheme = None
				621	authority = proxy
				622	else:
				623	# URL
				624	if not r_scheme.startswith("//"):
				625	raise ValueError("proxy URL with no authority: %r" % proxy)
				626	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				627	# and 3.3.), path is empty or starts with '/'
				628	end = r_scheme.find("/", 2)
				629	if end == -1:
				630	end = None
				631	authority = r_scheme[2:end]
				632	userinfo, hostport = urllib.parse.splituser(authority)
				633	if userinfo is not None:
				634	user, password = urllib.parse.splitpasswd(userinfo)
				635	else:
				636	user = password = None
				637	return scheme, user, password, hostport
				638
				639	class ProxyHandler(BaseHandler):
				640	# Proxies must be in front
				641	handler_order = 100
				642
				643	def __init__(self, proxies=None):
				644	if proxies is None:
				645	proxies = getproxies()
				646	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				647	self.proxies = proxies
				648	for type, url in proxies.items():
				649	setattr(self, '%s_open' % type,
				650	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				651	meth(r, proxy, type))
				652
				653	def proxy_open(self, req, proxy, type):
				654	orig_type = req.get_type()
				655	proxy_type, user, password, hostport = _parse_proxy(proxy)
				656	if proxy_type is None:
				657	proxy_type = orig_type
				658	if user and password:
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	659	user_pass = '%s:%s' % (urllib.parse.unquote(user),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	660	urllib.parse.unquote(password))
				661	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				662	req.add_header('Proxy-authorization', 'Basic ' + creds)
				663	hostport = urllib.parse.unquote(hostport)
				664	req.set_proxy(hostport, proxy_type)
				665	if orig_type == proxy_type:
				666	# let other handlers take care of it
				667	return None
				668	else:
				669	# need to start over, because the other handlers don't
				670	# grok the proxy's URL type
				671	# e.g. if we have a constructor arg proxies like so:
				672	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				673	# a request for http://acme.example.com/a into one for
				674	# ftp://proxy.example.com/a
				675	return self.parent.open(req)
				676
				677	class HTTPPasswordMgr:
				678
				679	def __init__(self):
				680	self.passwd = {}
				681
				682	def add_password(self, realm, uri, user, passwd):
				683	# uri could be a single URI or a sequence
				684	if isinstance(uri, str):
				685	uri = [uri]
				686	if not realm in self.passwd:
				687	self.passwd[realm] = {}
				688	for default_port in True, False:
				689	reduced_uri = tuple(
				690	[self.reduce_uri(u, default_port) for u in uri])
				691	self.passwd[realm][reduced_uri] = (user, passwd)
				692
				693	def find_user_password(self, realm, authuri):
				694	domains = self.passwd.get(realm, {})
				695	for default_port in True, False:
				696	reduced_authuri = self.reduce_uri(authuri, default_port)
				697	for uris, authinfo in domains.items():
				698	for uri in uris:
				699	if self.is_suburi(uri, reduced_authuri):
				700	return authinfo
				701	return None, None
				702
				703	def reduce_uri(self, uri, default_port=True):
				704	"""Accept authority or URI and extract only the authority and path."""
				705	# note HTTP URLs do not have a userinfo component
				706	parts = urllib.parse.urlsplit(uri)
				707	if parts[1]:
				708	# URI
				709	scheme = parts[0]
				710	authority = parts[1]
				711	path = parts[2] or '/'
				712	else:
				713	# host or host:port
				714	scheme = None
				715	authority = uri
				716	path = '/'
				717	host, port = urllib.parse.splitport(authority)
				718	if default_port and port is None and scheme is not None:
				719	dport = {"http": 80,
				720	"https": 443,
				721	}.get(scheme)
				722	if dport is not None:
				723	authority = "%s:%d" % (host, dport)
				724	return authority, path
				725
				726	def is_suburi(self, base, test):
				727	"""Check if test is below base in a URI tree
				728
				729	Both args must be URIs in reduced form.
				730	"""
				731	if base == test:
				732	return True
				733	if base[0] != test[0]:
				734	return False
				735	common = posixpath.commonprefix((base[1], test[1]))
				736	if len(common) == len(base[1]):
				737	return True
				738	return False
				739
				740
				741	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				742
				743	def find_user_password(self, realm, authuri):
				744	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				745	authuri)
				746	if user is not None:
				747	return user, password
				748	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				749
				750
				751	class AbstractBasicAuthHandler:
				752
				753	# XXX this allows for multiple auth-schemes, but will stupidly pick
				754	# the last one with a realm specified.
				755
				756	# allow for double- and single-quoted realm values
				757	# (single quotes are a violation of the RFC, but appear in the wild)
				758	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				759	'realm=(["\'])(.*?)\\2', re.I)
				760
				761	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				762	# end of section 2, and section 1.2 immediately after "credentials"
				763	# production).
				764
				765	def __init__(self, password_mgr=None):
				766	if password_mgr is None:
				767	password_mgr = HTTPPasswordMgr()
				768	self.passwd = password_mgr
				769	self.add_password = self.passwd.add_password
				770
				771	def http_error_auth_reqed(self, authreq, host, req, headers):
				772	# host may be an authority (without userinfo) or a URL with an
				773	# authority
				774	# XXX could be multiple headers
				775	authreq = headers.get(authreq, None)
				776	if authreq:
				777	mo = AbstractBasicAuthHandler.rx.search(authreq)
				778	if mo:
				779	scheme, quote, realm = mo.groups()
				780	if scheme.lower() == 'basic':
				781	return self.retry_http_basic_auth(host, req, realm)
				782
				783	def retry_http_basic_auth(self, host, req, realm):
				784	user, pw = self.passwd.find_user_password(realm, host)
				785	if pw is not None:
				786	raw = "%s:%s" % (user, pw)
				787	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				788	if req.headers.get(self.auth_header, None) == auth:
				789	return None
				790	req.add_header(self.auth_header, auth)
				791	return self.parent.open(req)
				792	else:
				793	return None
				794
				795
				796	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				797
				798	auth_header = 'Authorization'
				799
				800	def http_error_401(self, req, fp, code, msg, headers):
				801	url = req.get_full_url()
				802	return self.http_error_auth_reqed('www-authenticate',
				803	url, req, headers)
				804
				805
				806	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				807
				808	auth_header = 'Proxy-authorization'
				809
				810	def http_error_407(self, req, fp, code, msg, headers):
				811	# http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	812	# authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	813	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				814	# userinfo.
				815	authority = req.get_host()
				816	return self.http_error_auth_reqed('proxy-authenticate',
				817	authority, req, headers)
				818
				819
				820	def randombytes(n):
				821	"""Return n random bytes."""
				822	return os.urandom(n)
				823
				824	class AbstractDigestAuthHandler:
				825	# Digest authentication is specified in RFC 2617.
				826
				827	# XXX The client does not inspect the Authentication-Info header
				828	# in a successful response.
				829
				830	# XXX It should be possible to test this implementation against
				831	# a mock server that just generates a static set of challenges.
				832
				833	# XXX qop="auth-int" supports is shaky
				834
				835	def __init__(self, passwd=None):
				836	if passwd is None:
				837	passwd = HTTPPasswordMgr()
				838	self.passwd = passwd
				839	self.add_password = self.passwd.add_password
				840	self.retried = 0
				841	self.nonce_count = 0
				842
				843	def reset_retry_count(self):
				844	self.retried = 0
				845
				846	def http_error_auth_reqed(self, auth_header, host, req, headers):
				847	authreq = headers.get(auth_header, None)
				848	if self.retried > 5:
				849	# Don't fail endlessly - if we failed once, we'll probably
				850	# fail a second time. Hm. Unless the Password Manager is
				851	# prompting for the information. Crap. This isn't great
				852	# but it's better than the current 'repeat until recursion
				853	# depth exceeded' approach <wink>
				854	raise urllib.error.HTTPError(req.get_full_url(), 401,
				855	"digest auth failed",
				856	headers, None)
				857	else:
				858	self.retried += 1
				859	if authreq:
				860	scheme = authreq.split()[0]
				861	if scheme.lower() == 'digest':
				862	return self.retry_http_digest_auth(req, authreq)
				863
				864	def retry_http_digest_auth(self, req, auth):
				865	token, challenge = auth.split(' ', 1)
				866	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				867	auth = self.get_authorization(req, chal)
				868	if auth:
				869	auth_val = 'Digest %s' % auth
				870	if req.headers.get(self.auth_header, None) == auth_val:
				871	return None
				872	req.add_unredirected_header(self.auth_header, auth_val)
				873	resp = self.parent.open(req)
				874	return resp
				875
				876	def get_cnonce(self, nonce):
				877	# The cnonce-value is an opaque
				878	# quoted string value provided by the client and used by both client
				879	# and server to avoid chosen plaintext attacks, to provide mutual
				880	# authentication, and to provide some message integrity protection.
				881	# This isn't a fabulous effort, but it's probably Good Enough.
				882	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				883	b = s.encode("ascii") + randombytes(8)
				884	dig = hashlib.sha1(b).hexdigest()
				885	return dig[:16]
				886
				887	def get_authorization(self, req, chal):
				888	try:
				889	realm = chal['realm']
				890	nonce = chal['nonce']
				891	qop = chal.get('qop')
				892	algorithm = chal.get('algorithm', 'MD5')
				893	# mod_digest doesn't send an opaque, even though it isn't
				894	# supposed to be optional
				895	opaque = chal.get('opaque', None)
				896	except KeyError:
				897	return None
				898
				899	H, KD = self.get_algorithm_impls(algorithm)
				900	if H is None:
				901	return None
				902
				903	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				904	if user is None:
				905	return None
				906
				907	# XXX not implemented yet
				908	if req.has_data():
				909	entdig = self.get_entity_digest(req.get_data(), chal)
				910	else:
				911	entdig = None
				912
				913	A1 = "%s:%s:%s" % (user, realm, pw)
				914	A2 = "%s:%s" % (req.get_method(),
				915	# XXX selector: what about proxies and full urls
				916	req.get_selector())
				917	if qop == 'auth':
				918	self.nonce_count += 1
				919	ncvalue = '%08x' % self.nonce_count
				920	cnonce = self.get_cnonce(nonce)
				921	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				922	respdig = KD(H(A1), noncebit)
				923	elif qop is None:
				924	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				925	else:
				926	# XXX handle auth-int.
				927	raise urllib.error.URLError("qop '%s' is not supported." % qop)
				928
				929	# XXX should the partial digests be encoded too?
				930
				931	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				932	'response="%s"' % (user, realm, nonce, req.get_selector(),
				933	respdig)
				934	if opaque:
				935	base += ', opaque="%s"' % opaque
				936	if entdig:
				937	base += ', digest="%s"' % entdig
				938	base += ', algorithm="%s"' % algorithm
				939	if qop:
				940	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				941	return base
				942
				943	def get_algorithm_impls(self, algorithm):
				944	# lambdas assume digest modules are imported at the top level
				945	if algorithm == 'MD5':
				946	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				947	elif algorithm == 'SHA':
				948	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				949	# XXX MD5-sess
				950	KD = lambda s, d: H("%s:%s" % (s, d))
				951	return H, KD
				952
				953	def get_entity_digest(self, data, chal):
				954	# XXX not implemented yet
				955	return None
				956
				957
				958	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				959	"""An authentication protocol defined by RFC 2069
				960
				961	Digest authentication improves on basic authentication because it
				962	does not transmit passwords in the clear.
				963	"""
				964
				965	auth_header = 'Authorization'
				966	handler_order = 490 # before Basic auth
				967
				968	def http_error_401(self, req, fp, code, msg, headers):
				969	host = urllib.parse.urlparse(req.get_full_url())[1]
				970	retry = self.http_error_auth_reqed('www-authenticate',
				971	host, req, headers)
				972	self.reset_retry_count()
				973	return retry
				974
				975
				976	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				977
				978	auth_header = 'Proxy-Authorization'
				979	handler_order = 490 # before Basic auth
				980
				981	def http_error_407(self, req, fp, code, msg, headers):
				982	host = req.get_host()
				983	retry = self.http_error_auth_reqed('proxy-authenticate',
				984	host, req, headers)
				985	self.reset_retry_count()
				986	return retry
				987
				988	class AbstractHTTPHandler(BaseHandler):
				989
				990	def __init__(self, debuglevel=0):
				991	self._debuglevel = debuglevel
				992
				993	def set_http_debuglevel(self, level):
				994	self._debuglevel = level
				995
				996	def do_request_(self, request):
				997	host = request.get_host()
				998	if not host:
				999	raise urllib.error.URLError('no host given')
				1000
				1001	if request.has_data(): # POST
				1002	data = request.get_data()
				1003	if not request.has_header('Content-type'):
				1004	request.add_unredirected_header(
				1005	'Content-type',
				1006	'application/x-www-form-urlencoded')
				1007	if not request.has_header('Content-length'):
				1008	request.add_unredirected_header(
				1009	'Content-length', '%d' % len(data))
				1010
				1011	scheme, sel = urllib.parse.splittype(request.get_selector())
				1012	sel_host, sel_path = urllib.parse.splithost(sel)
				1013	if not request.has_header('Host'):
				1014	request.add_unredirected_header('Host', sel_host or host)
				1015	for name, value in self.parent.addheaders:
				1016	name = name.capitalize()
				1017	if not request.has_header(name):
				1018	request.add_unredirected_header(name, value)
				1019
				1020	return request
				1021
				1022	def do_open(self, http_class, req):
				1023	"""Return an addinfourl object for the request, using http_class.
				1024
				1025	http_class must implement the HTTPConnection API from http.client.
				1026	The addinfourl return value is a file-like object. It also
				1027	has methods and attributes including:
				1028	- info(): return a mimetools.Message object for the headers
				1029	- geturl(): return the original request URL
				1030	- code: HTTP status code
				1031	"""
				1032	host = req.get_host()
				1033	if not host:
				1034	raise urllib.error.URLError('no host given')
				1035
				1036	h = http_class(host, timeout=req.timeout) # will parse host:port
				1037	headers = dict(req.headers)
				1038	headers.update(req.unredirected_hdrs)
				1039
				1040	# TODO(jhylton): Should this be redesigned to handle
				1041	# persistent connections?
				1042
				1043	# We want to make an HTTP/1.1 request, but the addinfourl
				1044	# class isn't prepared to deal with a persistent connection.
				1045	# It will try to read all remaining data from the socket,
				1046	# which will block while the server waits for the next request.
				1047	# So make sure the connection gets closed after the (only)
				1048	# request.
				1049	headers["Connection"] = "close"
				1050	headers = dict(
				1051	(name.title(), val) for name, val in headers.items())
				1052	try:
				1053	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1054	r = h.getresponse()
				1055	except socket.error as err: # XXX what error?
				1056	raise urllib.error.URLError(err)
				1057
				1058	resp = urllib.response.addinfourl(r.fp, r.msg, req.get_full_url())
				1059	resp.code = r.status
				1060	resp.msg = r.reason
				1061	return resp
				1062
				1063
				1064	class HTTPHandler(AbstractHTTPHandler):
				1065
				1066	def http_open(self, req):
				1067	return self.do_open(http.client.HTTPConnection, req)
				1068
				1069	http_request = AbstractHTTPHandler.do_request_
				1070
				1071	if hasattr(http.client, 'HTTPSConnection'):
				1072	class HTTPSHandler(AbstractHTTPHandler):
				1073
				1074	def https_open(self, req):
				1075	return self.do_open(http.client.HTTPSConnection, req)
				1076
				1077	https_request = AbstractHTTPHandler.do_request_
				1078
				1079	class HTTPCookieProcessor(BaseHandler):
				1080	def __init__(self, cookiejar=None):
				1081	import http.cookiejar
				1082	if cookiejar is None:
				1083	cookiejar = http.cookiejar.CookieJar()
				1084	self.cookiejar = cookiejar
				1085
				1086	def http_request(self, request):
				1087	self.cookiejar.add_cookie_header(request)
				1088	return request
				1089
				1090	def http_response(self, request, response):
				1091	self.cookiejar.extract_cookies(response, request)
				1092	return response
				1093
				1094	https_request = http_request
				1095	https_response = http_response
				1096
				1097	class UnknownHandler(BaseHandler):
				1098	def unknown_open(self, req):
				1099	type = req.get_type()
				1100	raise urllib.error.URLError('unknown url type: %s' % type)
				1101
				1102	def parse_keqv_list(l):
				1103	"""Parse list of key=value strings where keys are not duplicated."""
				1104	parsed = {}
				1105	for elt in l:
				1106	k, v = elt.split('=', 1)
				1107	if v[0] == '"' and v[-1] == '"':
				1108	v = v[1:-1]
				1109	parsed[k] = v
				1110	return parsed
				1111
				1112	def parse_http_list(s):
				1113	"""Parse lists as described by RFC 2068 Section 2.
				1114
				1115	In particular, parse comma-separated lists where the elements of
				1116	the list may include quoted-strings. A quoted-string could
				1117	contain a comma. A non-quoted string could have quotes in the
				1118	middle. Neither commas nor quotes count if they are escaped.
				1119	Only double-quotes count, not single-quotes.
				1120	"""
				1121	res = []
				1122	part = ''
				1123
				1124	escape = quote = False
				1125	for cur in s:
				1126	if escape:
				1127	part += cur
				1128	escape = False
				1129	continue
				1130	if quote:
				1131	if cur == '\\':
				1132	escape = True
				1133	continue
				1134	elif cur == '"':
				1135	quote = False
				1136	part += cur
				1137	continue
				1138
				1139	if cur == ',':
				1140	res.append(part)
				1141	part = ''
				1142	continue
				1143
				1144	if cur == '"':
				1145	quote = True
				1146
				1147	part += cur
				1148
				1149	# append last part
				1150	if part:
				1151	res.append(part)
				1152
				1153	return [part.strip() for part in res]
				1154
				1155	class FileHandler(BaseHandler):
				1156	# Use local file or FTP depending on form of URL
				1157	def file_open(self, req):
				1158	url = req.get_selector()
				1159	if url[:2] == '//' and url[2:3] != '/':
				1160	req.type = 'ftp'
				1161	return self.parent.open(req)
				1162	else:
				1163	return self.open_local_file(req)
				1164
				1165	# names for the localhost
				1166	names = None
				1167	def get_names(self):
				1168	if FileHandler.names is None:
				1169	try:
				1170	FileHandler.names = (socket.gethostbyname('localhost'),
				1171	socket.gethostbyname(socket.gethostname()))
				1172	except socket.gaierror:
				1173	FileHandler.names = (socket.gethostbyname('localhost'),)
				1174	return FileHandler.names
				1175
				1176	# not entirely sure what the rules are here
				1177	def open_local_file(self, req):
				1178	import email.utils
				1179	import mimetypes
				1180	host = req.get_host()
				1181	file = req.get_selector()
				1182	localfile = url2pathname(file)
				1183	try:
				1184	stats = os.stat(localfile)
				1185	size = stats.st_size
				1186	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1187	mtype = mimetypes.guess_type(file)[0]
				1188	headers = email.message_from_string(
				1189	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1190	(mtype or 'text/plain', size, modified))
				1191	if host:
				1192	host, port = urllib.parse.splitport(host)
				1193	if not host or \
				1194	(not port and _safe_gethostbyname(host) in self.get_names()):
				1195	return urllib.response.addinfourl(open(localfile, 'rb'),
				1196	headers, 'file:'+file)
				1197	except OSError as msg:
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	1198	# users shouldn't expect OSErrors coming from urlopen()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1199	raise urllib.error.URLError(msg)
				1200	raise urllib.error.URLError('file not on local host')
				1201
				1202	def _safe_gethostbyname(host):
				1203	try:
				1204	return socket.gethostbyname(host)
				1205	except socket.gaierror:
				1206	return None
				1207
				1208	class FTPHandler(BaseHandler):
				1209	def ftp_open(self, req):
				1210	import ftplib
				1211	import mimetypes
				1212	host = req.get_host()
				1213	if not host:
				1214	raise urllib.error.URLError('ftp error: no host given')
				1215	host, port = urllib.parse.splitport(host)
				1216	if port is None:
				1217	port = ftplib.FTP_PORT
				1218	else:
				1219	port = int(port)
				1220
				1221	# username/password handling
				1222	user, host = urllib.parse.splituser(host)
				1223	if user:
				1224	user, passwd = urllib.parse.splitpasswd(user)
				1225	else:
				1226	passwd = None
				1227	host = urllib.parse.unquote(host)
				1228	user = urllib.parse.unquote(user or '')
				1229	passwd = urllib.parse.unquote(passwd or '')
				1230
				1231	try:
				1232	host = socket.gethostbyname(host)
				1233	except socket.error as msg:
				1234	raise urllib.error.URLError(msg)
				1235	path, attrs = urllib.parse.splitattr(req.get_selector())
				1236	dirs = path.split('/')
				1237	dirs = list(map(urllib.parse.unquote, dirs))
				1238	dirs, file = dirs[:-1], dirs[-1]
				1239	if dirs and not dirs[0]:
				1240	dirs = dirs[1:]
				1241	try:
				1242	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1243	type = file and 'I' or 'D'
				1244	for attr in attrs:
				1245	attr, value = urllib.parse.splitvalue(attr)
				1246	if attr.lower() == 'type' and \
				1247	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1248	type = value.upper()
				1249	fp, retrlen = fw.retrfile(file, type)
				1250	headers = ""
				1251	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1252	if mtype:
				1253	headers += "Content-type: %s\n" % mtype
				1254	if retrlen is not None and retrlen >= 0:
				1255	headers += "Content-length: %d\n" % retrlen
				1256	headers = email.message_from_string(headers)
				1257	return urllib.response.addinfourl(fp, headers, req.get_full_url())
				1258	except ftplib.all_errors as msg:
				1259	exc = urllib.error.URLError('ftp error: %s' % msg)
				1260	raise exc.with_traceback(sys.exc_info()[2])
				1261
				1262	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1263	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1264	return fw
				1265
				1266	class CacheFTPHandler(FTPHandler):
				1267	# XXX would be nice to have pluggable cache strategies
				1268	# XXX this stuff is definitely not thread safe
				1269	def __init__(self):
				1270	self.cache = {}
				1271	self.timeout = {}
				1272	self.soonest = 0
				1273	self.delay = 60
				1274	self.max_conns = 16
				1275
				1276	def setTimeout(self, t):
				1277	self.delay = t
				1278
				1279	def setMaxConns(self, m):
				1280	self.max_conns = m
				1281
				1282	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1283	key = user, host, port, '/'.join(dirs), timeout
				1284	if key in self.cache:
				1285	self.timeout[key] = time.time() + self.delay
				1286	else:
				1287	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1288	dirs, timeout)
				1289	self.timeout[key] = time.time() + self.delay
				1290	self.check_cache()
				1291	return self.cache[key]
				1292
				1293	def check_cache(self):
				1294	# first check for old ones
				1295	t = time.time()
				1296	if self.soonest <= t:
				1297	for k, v in list(self.timeout.items()):
				1298	if v < t:
				1299	self.cache[k].close()
				1300	del self.cache[k]
				1301	del self.timeout[k]
				1302	self.soonest = min(list(self.timeout.values()))
				1303
				1304	# then check the size
				1305	if len(self.cache) == self.max_conns:
				1306	for k, v in list(self.timeout.items()):
				1307	if v == self.soonest:
				1308	del self.cache[k]
				1309	del self.timeout[k]
				1310	break
				1311	self.soonest = min(list(self.timeout.values()))
				1312
				1313	# Code move from the old urllib module
				1314
				1315	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1316
				1317	# Helper for non-unix systems
				1318	if os.name == 'mac':
				1319	from macurl2path import url2pathname, pathname2url
				1320	elif os.name == 'nt':
				1321	from nturl2path import url2pathname, pathname2url
				1322	else:
				1323	def url2pathname(pathname):
				1324	"""OS-specific conversion from a relative URL of the 'file' scheme
				1325	to a file system path; not recommended for general use."""
				1326	return urllib.parse.unquote(pathname)
				1327
				1328	def pathname2url(pathname):
				1329	"""OS-specific conversion from a file system path to a relative URL
				1330	of the 'file' scheme; not recommended for general use."""
				1331	return urllib.parse.quote(pathname)
				1332
				1333	# This really consists of two pieces:
				1334	# (1) a class which handles opening of all sorts of URLs
				1335	# (plus assorted utilities etc.)
				1336	# (2) a set of functions for parsing URLs
				1337	# XXX Should these be separated out into different modules?
				1338
				1339
				1340	ftpcache = {}
				1341	class URLopener:
				1342	"""Class to open URLs.
				1343	This is a class rather than just a subroutine because we may need
				1344	more than one set of global protocol-specific options.
				1345	Note -- this is a base class for those who don't want the
				1346	automatic handling of errors type 302 (relocated) and 401
				1347	(authorization needed)."""
				1348
				1349	__tempfiles = None
				1350
				1351	version = "Python-urllib/%s" % __version__
				1352
				1353	# Constructor
				1354	def __init__(self, proxies=None, **x509):
				1355	if proxies is None:
				1356	proxies = getproxies()
				1357	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1358	self.proxies = proxies
				1359	self.key_file = x509.get('key_file')
				1360	self.cert_file = x509.get('cert_file')
				1361	self.addheaders = [('User-Agent', self.version)]
				1362	self.__tempfiles = []
				1363	self.__unlink = os.unlink # See cleanup()
				1364	self.tempcache = None
				1365	# Undocumented feature: if you assign {} to tempcache,
				1366	# it is used to cache files retrieved with
				1367	# self.retrieve(). This is not enabled by default
				1368	# since it does not work for changing documents (and I
				1369	# haven't got the logic to check expiration headers
				1370	# yet).
				1371	self.ftpcache = ftpcache
				1372	# Undocumented feature: you can use a different
				1373	# ftp cache by assigning to the .ftpcache member;
				1374	# in case you want logically independent URL openers
				1375	# XXX This is not threadsafe. Bah.
				1376
				1377	def __del__(self):
				1378	self.close()
				1379
				1380	def close(self):
				1381	self.cleanup()
				1382
				1383	def cleanup(self):
				1384	# This code sometimes runs when the rest of this module
				1385	# has already been deleted, so it can't use any globals
				1386	# or import anything.
				1387	if self.__tempfiles:
				1388	for file in self.__tempfiles:
				1389	try:
				1390	self.__unlink(file)
				1391	except OSError:
				1392	pass
				1393	del self.__tempfiles[:]
				1394	if self.tempcache:
				1395	self.tempcache.clear()
				1396
				1397	def addheader(self, *args):
				1398	"""Add a header to be used by the HTTP interface only
				1399	e.g. u.addheader('Accept', 'sound/basic')"""
				1400	self.addheaders.append(args)
				1401
				1402	# External interface
				1403	def open(self, fullurl, data=None):
				1404	"""Use URLopener().open(file) instead of open(file, 'r')."""
				1405	fullurl = urllib.parse.unwrap(urllib.parse.toBytes(fullurl))
				1406	if self.tempcache and fullurl in self.tempcache:
				1407	filename, headers = self.tempcache[fullurl]
				1408	fp = open(filename, 'rb')
				1409	return urllib.response.addinfourl(fp, headers, fullurl)
				1410	urltype, url = urllib.parse.splittype(fullurl)
				1411	if not urltype:
				1412	urltype = 'file'
				1413	if urltype in self.proxies:
				1414	proxy = self.proxies[urltype]
				1415	urltype, proxyhost = urllib.parse.splittype(proxy)
				1416	host, selector = urllib.parse.splithost(proxyhost)
				1417	url = (host, fullurl) # Signal special case to open_*()
				1418	else:
				1419	proxy = None
				1420	name = 'open_' + urltype
				1421	self.type = urltype
				1422	name = name.replace('-', '_')
				1423	if not hasattr(self, name):
				1424	if proxy:
				1425	return self.open_unknown_proxy(proxy, fullurl, data)
				1426	else:
				1427	return self.open_unknown(fullurl, data)
				1428	try:
				1429	if data is None:
				1430	return getattr(self, name)(url)
				1431	else:
				1432	return getattr(self, name)(url, data)
				1433	except socket.error as msg:
				1434	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1435
				1436	def open_unknown(self, fullurl, data=None):
				1437	"""Overridable interface to open unknown URL type."""
				1438	type, url = urllib.parse.splittype(fullurl)
				1439	raise IOError('url error', 'unknown url type', type)
				1440
				1441	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1442	"""Overridable interface to open unknown URL type."""
				1443	type, url = urllib.parse.splittype(fullurl)
				1444	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1445
				1446	# External interface
				1447	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1448	"""retrieve(url) returns (filename, headers) for a local object
				1449	or (tempfilename, headers) for a remote object."""
				1450	url = urllib.parse.unwrap(urllib.parse.toBytes(url))
				1451	if self.tempcache and url in self.tempcache:
				1452	return self.tempcache[url]
				1453	type, url1 = urllib.parse.splittype(url)
				1454	if filename is None and (not type or type == 'file'):
				1455	try:
				1456	fp = self.open_local_file(url1)
				1457	hdrs = fp.info()
				1458	del fp
				1459	return url2pathname(urllib.parse.splithost(url1)[1]), hdrs
				1460	except IOError as msg:
				1461	pass
				1462	fp = self.open(url, data)
				1463	headers = fp.info()
				1464	if filename:
				1465	tfp = open(filename, 'wb')
				1466	else:
				1467	import tempfile
				1468	garbage, path = urllib.parse.splittype(url)
				1469	garbage, path = urllib.parse.splithost(path or "")
				1470	path, garbage = urllib.parse.splitquery(path or "")
				1471	path, garbage = urllib.parse.splitattr(path or "")
				1472	suffix = os.path.splitext(path)[1]
				1473	(fd, filename) = tempfile.mkstemp(suffix)
				1474	self.__tempfiles.append(filename)
				1475	tfp = os.fdopen(fd, 'wb')
				1476	result = filename, headers
				1477	if self.tempcache is not None:
				1478	self.tempcache[url] = result
				1479	bs = 1024*8
				1480	size = -1
				1481	read = 0
				1482	blocknum = 0
				1483	if reporthook:
				1484	if "content-length" in headers:
				1485	size = int(headers["Content-Length"])
				1486	reporthook(blocknum, bs, size)
				1487	while 1:
				1488	block = fp.read(bs)
				1489	if not block:
				1490	break
				1491	read += len(block)
				1492	tfp.write(block)
				1493	blocknum += 1
				1494	if reporthook:
				1495	reporthook(blocknum, bs, size)
				1496	fp.close()
				1497	tfp.close()
				1498	del fp
				1499	del tfp
				1500
				1501	# raise exception if actual size does not match content-length header
				1502	if size >= 0 and read < size:
				1503	raise urllib.error.ContentTooShortError(
				1504	"retrieval incomplete: got only %i out of %i bytes"
				1505	% (read, size), result)
				1506
				1507	return result
				1508
				1509	# Each method named open_<type> knows how to open that type of URL
				1510
				1511	def _open_generic_http(self, connection_factory, url, data):
				1512	"""Make an HTTP connection using connection_class.
				1513
				1514	This is an internal method that should be called from
				1515	open_http() or open_https().
				1516
				1517	Arguments:
				1518	- connection_factory should take a host name and return an
				1519	HTTPConnection instance.
				1520	- url is the url to retrieval or a host, relative-path pair.
				1521	- data is payload for a POST request or None.
				1522	"""
				1523
				1524	user_passwd = None
				1525	proxy_passwd= None
				1526	if isinstance(url, str):
				1527	host, selector = urllib.parse.splithost(url)
				1528	if host:
				1529	user_passwd, host = urllib.parse.splituser(host)
				1530	host = urllib.parse.unquote(host)
				1531	realhost = host
				1532	else:
				1533	host, selector = url
				1534	# check whether the proxy contains authorization information
				1535	proxy_passwd, host = urllib.parse.splituser(host)
				1536	# now we proceed with the url we want to obtain
				1537	urltype, rest = urllib.parse.splittype(selector)
				1538	url = rest
				1539	user_passwd = None
				1540	if urltype.lower() != 'http':
				1541	realhost = None
				1542	else:
				1543	realhost, rest = urllib.parse.splithost(rest)
				1544	if realhost:
				1545	user_passwd, realhost = urllib.parse.splituser(realhost)
				1546	if user_passwd:
				1547	selector = "%s://%s%s" % (urltype, realhost, rest)
				1548	if proxy_bypass(realhost):
				1549	host = realhost
				1550
				1551	#print "proxy via http:", host, selector
				1552	if not host: raise IOError('http error', 'no host given')
				1553
				1554	if proxy_passwd:
				1555	import base64
				1556	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1557	else:
				1558	proxy_auth = None
				1559
				1560	if user_passwd:
				1561	import base64
				1562	auth = base64.b64encode(user_passwd).strip()
				1563	else:
				1564	auth = None
				1565	http_conn = connection_factory(host)
				1566	# XXX We should fix urllib so that it works with HTTP/1.1.
				1567	http_conn._http_vsn = 10
				1568	http_conn._http_vsn_str = "HTTP/1.0"
				1569
				1570	headers = {}
				1571	if proxy_auth:
				1572	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1573	if auth:
				1574	headers["Authorization"] = "Basic %s" % auth
				1575	if realhost:
				1576	headers["Host"] = realhost
				1577	for header, value in self.addheaders:
				1578	headers[header] = value
				1579
				1580	if data is not None:
				1581	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1582	http_conn.request("POST", selector, data, headers)
				1583	else:
				1584	http_conn.request("GET", selector, headers=headers)
				1585
				1586	try:
				1587	response = http_conn.getresponse()
				1588	except http.client.BadStatusLine:
				1589	# something went wrong with the HTTP status line
				1590	raise urllib.error.URLError("http protocol error: bad status line")
				1591
				1592	# According to RFC 2616, "2xx" code indicates that the client's
				1593	# request was successfully received, understood, and accepted.
				1594	if 200 <= response.status < 300:
				1595	return urllib.response.addinfourl(response.fp, response.msg,
				1596	"http:" + url,
				1597	response.status)
				1598	else:
				1599	return self.http_error(
				1600	url, response.fp,
				1601	response.status, response.reason, response.msg, data)
				1602
				1603	def open_http(self, url, data=None):
				1604	"""Use HTTP protocol."""
				1605	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1606
				1607	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1608	"""Handle http errors.
				1609
				1610	Derived class can override this, or provide specific handlers
				1611	named http_error_DDD where DDD is the 3-digit error code."""
				1612	# First check if there's a specific handler for this error
				1613	name = 'http_error_%d' % errcode
				1614	if hasattr(self, name):
				1615	method = getattr(self, name)
				1616	if data is None:
				1617	result = method(url, fp, errcode, errmsg, headers)
				1618	else:
				1619	result = method(url, fp, errcode, errmsg, headers, data)
				1620	if result: return result
				1621	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1622
				1623	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1624	"""Default error handler: close the connection and raise IOError."""
				1625	void = fp.read()
				1626	fp.close()
				1627	raise urllib.error.HTTPError(url, errcode, errmsg, headers, None)
				1628
				1629	if _have_ssl:
				1630	def _https_connection(self, host):
				1631	return http.client.HTTPSConnection(host,
				1632	key_file=self.key_file,
				1633	cert_file=self.cert_file)
				1634
				1635	def open_https(self, url, data=None):
				1636	"""Use HTTPS protocol."""
				1637	return self._open_generic_http(self._https_connection, url, data)
				1638
				1639	def open_file(self, url):
				1640	"""Use local file or FTP depending on form of URL."""
				1641	if not isinstance(url, str):
				1642	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1643	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1644	return self.open_ftp(url)
				1645	else:
				1646	return self.open_local_file(url)
				1647
				1648	def open_local_file(self, url):
				1649	"""Use local file."""
				1650	import mimetypes, email.utils
				1651	from io import StringIO
				1652	host, file = urllib.parse.splithost(url)
				1653	localname = url2pathname(file)
				1654	try:
				1655	stats = os.stat(localname)
				1656	except OSError as e:
				1657	raise URLError(e.errno, e.strerror, e.filename)
				1658	size = stats.st_size
				1659	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1660	mtype = mimetypes.guess_type(url)[0]
				1661	headers = email.message_from_string(
				1662	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1663	(mtype or 'text/plain', size, modified))
				1664	if not host:
				1665	urlfile = file
				1666	if file[:1] == '/':
				1667	urlfile = 'file://' + file
				1668	return urllib.response.addinfourl(open(localname, 'rb'),
				1669	headers, urlfile)
				1670	host, port = urllib.parse.splitport(host)
				1671	if (not port
				1672	and socket.gethostbyname(host) in (localhost(), thishost())):
				1673	urlfile = file
				1674	if file[:1] == '/':
				1675	urlfile = 'file://' + file
				1676	return urllib.response.addinfourl(open(localname, 'rb'),
				1677	headers, urlfile)
				1678	raise URLError('local file error', 'not on local host')
				1679
				1680	def open_ftp(self, url):
				1681	"""Use FTP protocol."""
				1682	if not isinstance(url, str):
				1683	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1684	import mimetypes
				1685	from io import StringIO
				1686	host, path = urllib.parse.splithost(url)
				1687	if not host: raise URLError('ftp error', 'no host given')
				1688	host, port = urllib.parse.splitport(host)
				1689	user, host = urllib.parse.splituser(host)
				1690	if user: user, passwd = urllib.parse.splitpasswd(user)
				1691	else: passwd = None
				1692	host = urllib.parse.unquote(host)
				1693	user = urllib.parse.unquote(user or '')
				1694	passwd = urllib.parse.unquote(passwd or '')
				1695	host = socket.gethostbyname(host)
				1696	if not port:
				1697	import ftplib
				1698	port = ftplib.FTP_PORT
				1699	else:
				1700	port = int(port)
				1701	path, attrs = urllib.parse.splitattr(path)
				1702	path = urllib.parse.unquote(path)
				1703	dirs = path.split('/')
				1704	dirs, file = dirs[:-1], dirs[-1]
				1705	if dirs and not dirs[0]: dirs = dirs[1:]
				1706	if dirs and not dirs[0]: dirs[0] = '/'
				1707	key = user, host, port, '/'.join(dirs)
				1708	# XXX thread unsafe!
				1709	if len(self.ftpcache) > MAXFTPCACHE:
				1710	# Prune the cache, rather arbitrarily
				1711	for k in self.ftpcache.keys():
				1712	if k != key:
				1713	v = self.ftpcache[k]
				1714	del self.ftpcache[k]
				1715	v.close()
				1716	try:
				1717	if not key in self.ftpcache:
				1718	self.ftpcache[key] = \
				1719	ftpwrapper(user, passwd, host, port, dirs)
				1720	if not file: type = 'D'
				1721	else: type = 'I'
				1722	for attr in attrs:
				1723	attr, value = urllib.parse.splitvalue(attr)
				1724	if attr.lower() == 'type' and \
				1725	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1726	type = value.upper()
				1727	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1728	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1729	headers = ""
				1730	if mtype:
				1731	headers += "Content-Type: %s\n" % mtype
				1732	if retrlen is not None and retrlen >= 0:
				1733	headers += "Content-Length: %d\n" % retrlen
				1734	headers = email.message_from_string(headers)
				1735	return urllib.response.addinfourl(fp, headers, "ftp:" + url)
				1736	except ftperrors() as msg:
				1737	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1738
				1739	def open_data(self, url, data=None):
				1740	"""Use "data" URL."""
				1741	if not isinstance(url, str):
				1742	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1743	# ignore POSTed data
				1744	#
				1745	# syntax of data URLs:
				1746	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1747	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1748	# data := *urlchar
				1749	# parameter := attribute "=" value
				1750	try:
				1751	[type, data] = url.split(',', 1)
				1752	except ValueError:
				1753	raise IOError('data error', 'bad data URL')
				1754	if not type:
				1755	type = 'text/plain;charset=US-ASCII'
				1756	semi = type.rfind(';')
				1757	if semi >= 0 and '=' not in type[semi:]:
				1758	encoding = type[semi+1:]
				1759	type = type[:semi]
				1760	else:
				1761	encoding = ''
				1762	msg = []
				1763	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1764	time.gmtime(time.time())))
				1765	msg.append('Content-type: %s' % type)
				1766	if encoding == 'base64':
				1767	import base64
				1768	data = base64.decodestring(data)
				1769	else:
				1770	data = urllib.parse.unquote(data)
				1771	msg.append('Content-Length: %d' % len(data))
				1772	msg.append('')
				1773	msg.append(data)
				1774	msg = '\n'.join(msg)
				1775	headers = mimetools.message_from_string(msg)
				1776	#f.fileno = None # needed for addinfourl
				1777	return urllib.response.addinfourl(f, headers, url)
				1778
				1779
				1780	class FancyURLopener(URLopener):
				1781	"""Derived class with handlers for errors we can handle (perhaps)."""
				1782
				1783	def __init__(self, args, *kwargs):
				1784	URLopener.__init__(self, args, *kwargs)
				1785	self.auth_cache = {}
				1786	self.tries = 0
				1787	self.maxtries = 10
				1788
				1789	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1790	"""Default error handling -- don't raise an exception."""
				1791	return urllib.response.addinfourl(fp, headers, "http:" + url, errcode)
				1792
				1793	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1794	"""Error 302 -- relocated (temporarily)."""
				1795	self.tries += 1
				1796	if self.maxtries and self.tries >= self.maxtries:
				1797	if hasattr(self, "http_error_500"):
				1798	meth = self.http_error_500
				1799	else:
				1800	meth = self.http_error_default
				1801	self.tries = 0
				1802	return meth(url, fp, 500,
				1803	"Internal Server Error: Redirect Recursion", headers)
				1804	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1805	data)
				1806	self.tries = 0
				1807	return result
				1808
				1809	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1810	if 'location' in headers:
				1811	newurl = headers['location']
				1812	elif 'uri' in headers:
				1813	newurl = headers['uri']
				1814	else:
				1815	return
				1816	void = fp.read()
				1817	fp.close()
				1818	# In case the server sent a relative URL, join with original:
				1819	newurl = basejoin(self.type + ":" + url, newurl)
				1820	return self.open(newurl)
				1821
				1822	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1823	"""Error 301 -- also relocated (permanently)."""
				1824	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1825
				1826	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1827	"""Error 303 -- also relocated (essentially identical to 302)."""
				1828	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1829
				1830	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1831	"""Error 307 -- relocated, but turn POST into error."""
				1832	if data is None:
				1833	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1834	else:
				1835	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1836
				1837	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1838	"""Error 401 -- authentication required.
				1839	This function supports Basic authentication only."""
				1840	if not 'www-authenticate' in headers:
				1841	URLopener.http_error_default(self, url, fp,
				1842	errcode, errmsg, headers)
				1843	stuff = headers['www-authenticate']
				1844	import re
				1845	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1846	if not match:
				1847	URLopener.http_error_default(self, url, fp,
				1848	errcode, errmsg, headers)
				1849	scheme, realm = match.groups()
				1850	if scheme.lower() != 'basic':
				1851	URLopener.http_error_default(self, url, fp,
				1852	errcode, errmsg, headers)
				1853	name = 'retry_' + self.type + '_basic_auth'
				1854	if data is None:
				1855	return getattr(self,name)(url, realm)
				1856	else:
				1857	return getattr(self,name)(url, realm, data)
				1858
				1859	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1860	"""Error 407 -- proxy authentication required.
				1861	This function supports Basic authentication only."""
				1862	if not 'proxy-authenticate' in headers:
				1863	URLopener.http_error_default(self, url, fp,
				1864	errcode, errmsg, headers)
				1865	stuff = headers['proxy-authenticate']
				1866	import re
				1867	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1868	if not match:
				1869	URLopener.http_error_default(self, url, fp,
				1870	errcode, errmsg, headers)
				1871	scheme, realm = match.groups()
				1872	if scheme.lower() != 'basic':
				1873	URLopener.http_error_default(self, url, fp,
				1874	errcode, errmsg, headers)
				1875	name = 'retry_proxy_' + self.type + '_basic_auth'
				1876	if data is None:
				1877	return getattr(self,name)(url, realm)
				1878	else:
				1879	return getattr(self,name)(url, realm, data)
				1880
				1881	def retry_proxy_http_basic_auth(self, url, realm, data=None):
				1882	host, selector = urllib.parse.splithost(url)
				1883	newurl = 'http://' + host + selector
				1884	proxy = self.proxies['http']
				1885	urltype, proxyhost = urllib.parse.splittype(proxy)
				1886	proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
				1887	i = proxyhost.find('@') + 1
				1888	proxyhost = proxyhost[i:]
				1889	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1890	if not (user or passwd): return None
				1891	proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1892	quote(passwd, safe=''), proxyhost)
				1893	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1894	if data is None:
				1895	return self.open(newurl)
				1896	else:
				1897	return self.open(newurl, data)
				1898
				1899	def retry_proxy_https_basic_auth(self, url, realm, data=None):
				1900	host, selector = urllib.parse.splithost(url)
				1901	newurl = 'https://' + host + selector
				1902	proxy = self.proxies['https']
				1903	urltype, proxyhost = urllib.parse.splittype(proxy)
				1904	proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
				1905	i = proxyhost.find('@') + 1
				1906	proxyhost = proxyhost[i:]
				1907	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1908	if not (user or passwd): return None
				1909	proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1910	quote(passwd, safe=''), proxyhost)
				1911	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1912	if data is None:
				1913	return self.open(newurl)
				1914	else:
				1915	return self.open(newurl, data)
				1916
				1917	def retry_http_basic_auth(self, url, realm, data=None):
				1918	host, selector = urllib.parse.splithost(url)
				1919	i = host.find('@') + 1
				1920	host = host[i:]
				1921	user, passwd = self.get_user_passwd(host, realm, i)
				1922	if not (user or passwd): return None
				1923	host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1924	quote(passwd, safe=''), host)
				1925	newurl = 'http://' + host + selector
				1926	if data is None:
				1927	return self.open(newurl)
				1928	else:
				1929	return self.open(newurl, data)
				1930
				1931	def retry_https_basic_auth(self, url, realm, data=None):
				1932	host, selector = urllib.parse.splithost(url)
				1933	i = host.find('@') + 1
				1934	host = host[i:]
				1935	user, passwd = self.get_user_passwd(host, realm, i)
				1936	if not (user or passwd): return None
				1937	host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1938	quote(passwd, safe=''), host)
				1939	newurl = 'https://' + host + selector
				1940	if data is None:
				1941	return self.open(newurl)
				1942	else:
				1943	return self.open(newurl, data)
				1944
				1945	def get_user_passwd(self, host, realm, clear_cache = 0):
				1946	key = realm + '@' + host.lower()
				1947	if key in self.auth_cache:
				1948	if clear_cache:
				1949	del self.auth_cache[key]
				1950	else:
				1951	return self.auth_cache[key]
				1952	user, passwd = self.prompt_user_passwd(host, realm)
				1953	if user or passwd: self.auth_cache[key] = (user, passwd)
				1954	return user, passwd
				1955
				1956	def prompt_user_passwd(self, host, realm):
				1957	"""Override this in a GUI environment!"""
				1958	import getpass
				1959	try:
				1960	user = input("Enter username for %s at %s: " % (realm, host))
				1961	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1962	(user, realm, host))
				1963	return user, passwd
				1964	except KeyboardInterrupt:
				1965	print()
				1966	return None, None
				1967
				1968
				1969	# Utility functions
				1970
				1971	_localhost = None
				1972	def localhost():
				1973	"""Return the IP address of the magic hostname 'localhost'."""
				1974	global _localhost
				1975	if _localhost is None:
				1976	_localhost = socket.gethostbyname('localhost')
				1977	return _localhost
				1978
				1979	_thishost = None
				1980	def thishost():
				1981	"""Return the IP address of the current host."""
				1982	global _thishost
				1983	if _thishost is None:
				1984	_thishost = socket.gethostbyname(socket.gethostname())
				1985	return _thishost
				1986
				1987	_ftperrors = None
				1988	def ftperrors():
				1989	"""Return the set of errors raised by the FTP class."""
				1990	global _ftperrors
				1991	if _ftperrors is None:
				1992	import ftplib
				1993	_ftperrors = ftplib.all_errors
				1994	return _ftperrors
				1995
				1996	_noheaders = None
				1997	def noheaders():
				1998	"""Return an empty mimetools.Message object."""
				1999	global _noheaders
				2000	if _noheaders is None:
				2001	_noheaders = mimetools.message_from_string("")
				2002	return _noheaders
				2003
				2004
				2005	# Utility classes
				2006
				2007	class ftpwrapper:
				2008	"""Class used by open_ftp() for cache of open FTP connections."""
				2009
				2010	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2011	self.user = user
				2012	self.passwd = passwd
				2013	self.host = host
				2014	self.port = port
				2015	self.dirs = dirs
				2016	self.timeout = timeout
				2017	self.init()
				2018
				2019	def init(self):
				2020	import ftplib
				2021	self.busy = 0
				2022	self.ftp = ftplib.FTP()
				2023	self.ftp.connect(self.host, self.port, self.timeout)
				2024	self.ftp.login(self.user, self.passwd)
				2025	for dir in self.dirs:
				2026	self.ftp.cwd(dir)
				2027
				2028	def retrfile(self, file, type):
				2029	import ftplib
				2030	self.endtransfer()
				2031	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2032	else: cmd = 'TYPE ' + type; isdir = 0
				2033	try:
				2034	self.ftp.voidcmd(cmd)
				2035	except ftplib.all_errors:
				2036	self.init()
				2037	self.ftp.voidcmd(cmd)
				2038	conn = None
				2039	if file and not isdir:
				2040	# Try to retrieve as a file
				2041	try:
				2042	cmd = 'RETR ' + file
				2043	conn = self.ftp.ntransfercmd(cmd)
				2044	except ftplib.error_perm as reason:
				2045	if str(reason)[:3] != '550':
				2046	raise urllib.error.URLError('ftp error', reason).with_traceback(sys.exc_info()[2])
				2047	if not conn:
				2048	# Set transfer mode to ASCII!
				2049	self.ftp.voidcmd('TYPE A')
				2050	# Try a directory listing. Verify that directory exists.
				2051	if file:
				2052	pwd = self.ftp.pwd()
				2053	try:
				2054	try:
				2055	self.ftp.cwd(file)
				2056	except ftplib.error_perm as reason:
				2057	raise urllib.error.URLError('ftp error', reason) from reason
				2058	finally:
				2059	self.ftp.cwd(pwd)
				2060	cmd = 'LIST ' + file
				2061	else:
				2062	cmd = 'LIST'
				2063	conn = self.ftp.ntransfercmd(cmd)
				2064	self.busy = 1
				2065	# Pass back both a suitably decorated object and a retrieval length
				2066	return (urllib.response.addclosehook(conn[0].makefile('rb'),
				2067	self.endtransfer), conn[1])
				2068	def endtransfer(self):
				2069	if not self.busy:
				2070	return
				2071	self.busy = 0
				2072	try:
				2073	self.ftp.voidresp()
				2074	except ftperrors():
				2075	pass
				2076
				2077	def close(self):
				2078	self.endtransfer()
				2079	try:
				2080	self.ftp.close()
				2081	except ftperrors():
				2082	pass
				2083
				2084	# Proxy handling
				2085	def getproxies_environment():
				2086	"""Return a dictionary of scheme -> proxy server URL mappings.
				2087
				2088	Scan the environment for variables named <scheme>_proxy;
				2089	this seems to be the standard convention. If you need a
				2090	different way, you can pass a proxies dictionary to the
				2091	[Fancy]URLopener constructor.
				2092
				2093	"""
				2094	proxies = {}
				2095	for name, value in os.environ.items():
				2096	name = name.lower()
				2097	if name == 'no_proxy':
				2098	# handled in proxy_bypass_environment
				2099	continue
				2100	if value and name[-6:] == '_proxy':
				2101	proxies[name[:-6]] = value
				2102	return proxies
				2103
				2104	def proxy_bypass_environment(host):
				2105	"""Test if proxies should not be used for a particular host.
				2106
				2107	Checks the environment for a variable named no_proxy, which should
				2108	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2109	"""
				2110	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2111	# '*' is special case for always bypass
				2112	if no_proxy == '*':
				2113	return 1
				2114	# strip port off host
				2115	hostonly, port = urllib.parse.splitport(host)
				2116	# check if the host ends with any of the DNS suffixes
				2117	for name in no_proxy.split(','):
				2118	if name and (hostonly.endswith(name) or host.endswith(name)):
				2119	return 1
				2120	# otherwise, don't bypass
				2121	return 0
				2122
				2123
				2124	if sys.platform == 'darwin':
				2125	def getproxies_internetconfig():
				2126	"""Return a dictionary of scheme -> proxy server URL mappings.
				2127
				2128	By convention the mac uses Internet Config to store
				2129	proxies. An HTTP proxy, for instance, is stored under
				2130	the HttpProxy key.
				2131
				2132	"""
				2133	try:
				2134	import ic
				2135	except ImportError:
				2136	return {}
				2137
				2138	try:
				2139	config = ic.IC()
				2140	except ic.error:
				2141	return {}
				2142	proxies = {}
				2143	# HTTP:
				2144	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2145	try:
				2146	value = config['HTTPProxyHost']
				2147	except ic.error:
				2148	pass
				2149	else:
				2150	proxies['http'] = 'http://%s' % value
				2151	# FTP: XXX To be done.
				2152	# Gopher: XXX To be done.
				2153	return proxies
				2154
				2155	def proxy_bypass(host):
				2156	if getproxies_environment():
				2157	return proxy_bypass_environment(host)
				2158	else:
				2159	return 0
				2160
				2161	def getproxies():
				2162	return getproxies_environment() or getproxies_internetconfig()
				2163
				2164	elif os.name == 'nt':
				2165	def getproxies_registry():
				2166	"""Return a dictionary of scheme -> proxy server URL mappings.
				2167
				2168	Win32 uses the registry to store proxies.
				2169
				2170	"""
				2171	proxies = {}
				2172	try:
				2173	import _winreg
				2174	except ImportError:
				2175	# Std module, so should be around - but you never know!
				2176	return proxies
				2177	try:
				2178	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2179	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2180	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2181	'ProxyEnable')[0]
				2182	if proxyEnable:
				2183	# Returned as Unicode but problems if not converted to ASCII
				2184	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2185	'ProxyServer')[0])
				2186	if '=' in proxyServer:
				2187	# Per-protocol settings
				2188	for p in proxyServer.split(';'):
				2189	protocol, address = p.split('=', 1)
				2190	# See if address has a type:// prefix
				2191	import re
				2192	if not re.match('^([^/:]+)://', address):
				2193	address = '%s://%s' % (protocol, address)
				2194	proxies[protocol] = address
				2195	else:
				2196	# Use one setting for all protocols
				2197	if proxyServer[:5] == 'http:':
				2198	proxies['http'] = proxyServer
				2199	else:
				2200	proxies['http'] = 'http://%s' % proxyServer
				2201	proxies['ftp'] = 'ftp://%s' % proxyServer
				2202	internetSettings.Close()
				2203	except (WindowsError, ValueError, TypeError):
				2204	# Either registry key not found etc, or the value in an
				2205	# unexpected format.
				2206	# proxies already set up to be empty so nothing to do
				2207	pass
				2208	return proxies
				2209
				2210	def getproxies():
				2211	"""Return a dictionary of scheme -> proxy server URL mappings.
				2212
				2213	Returns settings gathered from the environment, if specified,
				2214	or the registry.
				2215
				2216	"""
				2217	return getproxies_environment() or getproxies_registry()
				2218
				2219	def proxy_bypass_registry(host):
				2220	try:
				2221	import _winreg
				2222	import re
				2223	except ImportError:
				2224	# Std modules, so should be around - but you never know!
				2225	return 0
				2226	try:
				2227	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2228	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2229	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2230	'ProxyEnable')[0]
				2231	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2232	'ProxyOverride')[0])
				2233	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2234	except WindowsError:
				2235	return 0
				2236	if not proxyEnable or not proxyOverride:
				2237	return 0
				2238	# try to make a host list from name and IP address.
				2239	rawHost, port = urllib.parse.splitport(host)
				2240	host = [rawHost]
				2241	try:
				2242	addr = socket.gethostbyname(rawHost)
				2243	if addr != rawHost:
				2244	host.append(addr)
				2245	except socket.error:
				2246	pass
				2247	try:
				2248	fqdn = socket.getfqdn(rawHost)
				2249	if fqdn != rawHost:
				2250	host.append(fqdn)
				2251	except socket.error:
				2252	pass
				2253	# make a check value list from the registry entry: replace the
				2254	# '<local>' string by the localhost entry and the corresponding
				2255	# canonical entry.
				2256	proxyOverride = proxyOverride.split(';')
				2257	i = 0
				2258	while i < len(proxyOverride):
				2259	if proxyOverride[i] == '<local>':
				2260	proxyOverride[i:i+1] = ['localhost',
				2261	'127.0.0.1',
				2262	socket.gethostname(),
				2263	socket.gethostbyname(
				2264	socket.gethostname())]
				2265	i += 1
				2266	# print proxyOverride
				2267	# now check if we match one of the registry values.
				2268	for test in proxyOverride:
				2269	test = test.replace(".", r"\.") # mask dots
				2270	test = test.replace("", r".") # change glob sequence
				2271	test = test.replace("?", r".") # change glob char
				2272	for val in host:
				2273	# print "%s <--> %s" %( test, val )
				2274	if re.match(test, val, re.I):
				2275	return 1
				2276	return 0
				2277
				2278	def proxy_bypass(host):
				2279	"""Return a dictionary of scheme -> proxy server URL mappings.
				2280
				2281	Returns settings gathered from the environment, if specified,
				2282	or the registry.
				2283
				2284	"""
				2285	if getproxies_environment():
				2286	return proxy_bypass_environment(host)
				2287	else:
				2288	return proxy_bypass_registry(host)
				2289
				2290	else:
				2291	# By default use environment variables
				2292	getproxies = getproxies_environment
				2293	proxy_bypass = proxy_bypass_environment