Blame - Lib/urllib/request.py - platform/external/python/cpython3

blob: da1314786b6fcbecd3564e1286b230f15d81f6b4 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
				50	import urllib2
				51
				52	# set up authentication info
				53	authinfo = urllib2.HTTPBasicAuthHandler()
				54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
				59	proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
				60
				61	# build a new opener that adds authentication and caching FTP handlers
				62	opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
				63
				64	# install it
				65	urllib2.install_opener(opener)
				66
				67	f = urllib2.urlopen('http://www.python.org/')
				68	"""
				69
				70	# XXX issues:
				71	# If an authentication error handler that tries to perform
				72	# authentication for some reason but fails, how should the error be
				73	# signalled? The client needs to know the HTTP error code. But if
				74	# the handler knows that the problem was, e.g., that it didn't know
				75	# that hash algo that requested in the challenge, it would be good to
				76	# pass that information along to the client, too.
				77	# ftp errors aren't handled cleanly
				78	# check digest against correct (i.e. non-apache) implementation
				79
				80	# Possible extensions:
				81	# complex proxies XXX not sure what exactly was meant by this
				82	# abstract factory for opener
				83
				84	import base64
				85	import email
				86	import hashlib
				87	import http.client
				88	import io
				89	import os
				90	import posixpath
				91	import random
				92	import re
				93	import socket
				94	import sys
				95	import time
				96	import urllib.parse, urllib.error, urllib.response
				97	import bisect
				98
				99	from io import StringIO
				100
				101	# check for SSL
				102	try:
				103	import ssl
				104	except:
				105	_have_ssl = False
				106	else:
				107	_have_ssl = True
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	108
				109	# used in User-Agent header sent
				110	__version__ = sys.version[:3]
				111
				112	_opener = None
				113	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				114	global _opener
				115	if _opener is None:
				116	_opener = build_opener()
				117	return _opener.open(url, data, timeout)
				118
				119	def install_opener(opener):
				120	global _opener
				121	_opener = opener
				122
				123	# TODO(jhylton): Make this work with the same global opener.
				124	_urlopener = None
				125	def urlretrieve(url, filename=None, reporthook=None, data=None):
				126	global _urlopener
				127	if not _urlopener:
				128	_urlopener = FancyURLopener()
				129	return _urlopener.retrieve(url, filename, reporthook, data)
				130
				131	def urlcleanup():
				132	if _urlopener:
				133	_urlopener.cleanup()
				134	global _opener
				135	if _opener:
				136	_opener = None
				137
				138	# copied from cookielib.py
				139	_cut_port_re = re.compile(r":\d+$")
				140	def request_host(request):
				141	"""Return request-host, as defined by RFC 2965.
				142
				143	Variation from RFC: returned value is lowercased, for convenient
				144	comparison.
				145
				146	"""
				147	url = request.get_full_url()
				148	host = urllib.parse.urlparse(url)[1]
				149	if host == "":
				150	host = request.get_header("Host", "")
				151
				152	# remove port, if present
				153	host = _cut_port_re.sub("", host, 1)
				154	return host.lower()
				155
				156	class Request:
				157
				158	def __init__(self, url, data=None, headers={},
				159	origin_req_host=None, unverifiable=False):
				160	# unwrap('<URL:type://host/path>') --> 'type://host/path'
				161	self.__original = urllib.parse.unwrap(url)
				162	self.type = None
				163	# self.__r_type is what's left after doing the splittype
				164	self.host = None
				165	self.port = None
				166	self.data = data
				167	self.headers = {}
				168	for key, value in headers.items():
				169	self.add_header(key, value)
				170	self.unredirected_hdrs = {}
				171	if origin_req_host is None:
				172	origin_req_host = request_host(self)
				173	self.origin_req_host = origin_req_host
				174	self.unverifiable = unverifiable
				175
				176	def __getattr__(self, attr):
				177	# XXX this is a fallback mechanism to guard against these
				178	# methods getting called in a non-standard order. this may be
				179	# too complicated and/or unnecessary.
				180	# XXX should the __r_XXX attributes be public?
				181	if attr[:12] == '_Request__r_':
				182	name = attr[12:]
				183	if hasattr(Request, 'get_' + name):
				184	getattr(self, 'get_' + name)()
				185	return getattr(self, attr)
				186	raise AttributeError(attr)
				187
				188	def get_method(self):
				189	if self.has_data():
				190	return "POST"
				191	else:
				192	return "GET"
				193
				194	# XXX these helper methods are lame
				195
				196	def add_data(self, data):
				197	self.data = data
				198
				199	def has_data(self):
				200	return self.data is not None
				201
				202	def get_data(self):
				203	return self.data
				204
				205	def get_full_url(self):
				206	return self.__original
				207
				208	def get_type(self):
				209	if self.type is None:
				210	self.type, self.__r_type = urllib.parse.splittype(self.__original)
				211	if self.type is None:
				212	raise ValueError("unknown url type: %s" % self.__original)
				213	return self.type
				214
				215	def get_host(self):
				216	if self.host is None:
				217	self.host, self.__r_host = urllib.parse.splithost(self.__r_type)
				218	if self.host:
				219	self.host = urllib.parse.unquote(self.host)
				220	return self.host
				221
				222	def get_selector(self):
				223	return self.__r_host
				224
				225	def set_proxy(self, host, type):
				226	self.host, self.type = host, type
				227	self.__r_host = self.__original
				228
				229	def get_origin_req_host(self):
				230	return self.origin_req_host
				231
				232	def is_unverifiable(self):
				233	return self.unverifiable
				234
				235	def add_header(self, key, val):
				236	# useful for something like authentication
				237	self.headers[key.capitalize()] = val
				238
				239	def add_unredirected_header(self, key, val):
				240	# will not be added to a redirected request
				241	self.unredirected_hdrs[key.capitalize()] = val
				242
				243	def has_header(self, header_name):
				244	return (header_name in self.headers or
				245	header_name in self.unredirected_hdrs)
				246
				247	def get_header(self, header_name, default=None):
				248	return self.headers.get(
				249	header_name,
				250	self.unredirected_hdrs.get(header_name, default))
				251
				252	def header_items(self):
				253	hdrs = self.unredirected_hdrs.copy()
				254	hdrs.update(self.headers)
				255	return list(hdrs.items())
				256
				257	class OpenerDirector:
				258	def __init__(self):
				259	client_version = "Python-urllib/%s" % __version__
				260	self.addheaders = [('User-agent', client_version)]
				261	# manage the individual handlers
				262	self.handlers = []
				263	self.handle_open = {}
				264	self.handle_error = {}
				265	self.process_response = {}
				266	self.process_request = {}
				267
				268	def add_handler(self, handler):
				269	if not hasattr(handler, "add_parent"):
				270	raise TypeError("expected BaseHandler instance, got %r" %
				271	type(handler))
				272
				273	added = False
				274	for meth in dir(handler):
				275	if meth in ["redirect_request", "do_open", "proxy_open"]:
				276	# oops, coincidental match
				277	continue
				278
				279	i = meth.find("_")
				280	protocol = meth[:i]
				281	condition = meth[i+1:]
				282
				283	if condition.startswith("error"):
				284	j = condition.find("_") + i + 1
				285	kind = meth[j+1:]
				286	try:
				287	kind = int(kind)
				288	except ValueError:
				289	pass
				290	lookup = self.handle_error.get(protocol, {})
				291	self.handle_error[protocol] = lookup
				292	elif condition == "open":
				293	kind = protocol
				294	lookup = self.handle_open
				295	elif condition == "response":
				296	kind = protocol
				297	lookup = self.process_response
				298	elif condition == "request":
				299	kind = protocol
				300	lookup = self.process_request
				301	else:
				302	continue
				303
				304	handlers = lookup.setdefault(kind, [])
				305	if handlers:
				306	bisect.insort(handlers, handler)
				307	else:
				308	handlers.append(handler)
				309	added = True
				310
				311	if added:
				312	# the handlers must work in an specific order, the order
				313	# is specified in a Handler attribute
				314	bisect.insort(self.handlers, handler)
				315	handler.add_parent(self)
				316
				317	def close(self):
				318	# Only exists for backwards compatibility.
				319	pass
				320
				321	def _call_chain(self, chain, kind, meth_name, *args):
				322	# Handlers raise an exception if no one else should try to handle
				323	# the request, or return None if they can't but another handler
				324	# could. Otherwise, they return the response.
				325	handlers = chain.get(kind, ())
				326	for handler in handlers:
				327	func = getattr(handler, meth_name)
				328
				329	result = func(*args)
				330	if result is not None:
				331	return result
				332
				333	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				334	# accept a URL or a Request object
				335	if isinstance(fullurl, str):
				336	req = Request(fullurl, data)
				337	else:
				338	req = fullurl
				339	if data is not None:
				340	req.add_data(data)
				341
				342	req.timeout = timeout
				343	protocol = req.get_type()
				344
				345	# pre-process request
				346	meth_name = protocol+"_request"
				347	for processor in self.process_request.get(protocol, []):
				348	meth = getattr(processor, meth_name)
				349	req = meth(req)
				350
				351	response = self._open(req, data)
				352
				353	# post-process response
				354	meth_name = protocol+"_response"
				355	for processor in self.process_response.get(protocol, []):
				356	meth = getattr(processor, meth_name)
				357	response = meth(req, response)
				358
				359	return response
				360
				361	def _open(self, req, data=None):
				362	result = self._call_chain(self.handle_open, 'default',
				363	'default_open', req)
				364	if result:
				365	return result
				366
				367	protocol = req.get_type()
				368	result = self._call_chain(self.handle_open, protocol, protocol +
				369	'_open', req)
				370	if result:
				371	return result
				372
				373	return self._call_chain(self.handle_open, 'unknown',
				374	'unknown_open', req)
				375
				376	def error(self, proto, *args):
				377	if proto in ('http', 'https'):
				378	# XXX http[s] protocols are special-cased
				379	dict = self.handle_error['http'] # https is not different than http
				380	proto = args[2] # YUCK!
				381	meth_name = 'http_error_%s' % proto
				382	http_err = 1
				383	orig_args = args
				384	else:
				385	dict = self.handle_error
				386	meth_name = proto + '_error'
				387	http_err = 0
				388	args = (dict, proto, meth_name) + args
				389	result = self._call_chain(*args)
				390	if result:
				391	return result
				392
				393	if http_err:
				394	args = (dict, 'default', 'http_error_default') + orig_args
				395	return self._call_chain(*args)
				396
				397	# XXX probably also want an abstract factory that knows when it makes
				398	# sense to skip a superclass in favor of a subclass and when it might
				399	# make sense to include both
				400
				401	def build_opener(*handlers):
				402	"""Create an opener object from a list of handlers.
				403
				404	The opener will use several default handlers, including support
				405	for HTTP and FTP.
				406
				407	If any of the handlers passed as arguments are subclasses of the
				408	default handlers, the default handlers will not be used.
				409	"""
				410	def isclass(obj):
				411	return isinstance(obj, type) or hasattr(obj, "__bases__")
				412
				413	opener = OpenerDirector()
				414	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				415	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				416	FTPHandler, FileHandler, HTTPErrorProcessor]
				417	if hasattr(http.client, "HTTPSConnection"):
				418	default_classes.append(HTTPSHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	419	skip = set()
				420	for klass in default_classes:
				421	for check in handlers:
				422	if isclass(check):
				423	if issubclass(check, klass):
				424	skip.add(klass)
				425	elif isinstance(check, klass):
				426	skip.add(klass)
				427	for klass in skip:
				428	default_classes.remove(klass)
				429
				430	for klass in default_classes:
				431	opener.add_handler(klass())
				432
				433	for h in handlers:
				434	if isclass(h):
				435	h = h()
				436	opener.add_handler(h)
				437	return opener
				438
				439	class BaseHandler:
				440	handler_order = 500
				441
				442	def add_parent(self, parent):
				443	self.parent = parent
				444
				445	def close(self):
				446	# Only exists for backwards compatibility
				447	pass
				448
				449	def __lt__(self, other):
				450	if not hasattr(other, "handler_order"):
				451	# Try to preserve the old behavior of having custom classes
				452	# inserted after default ones (works only for custom user
				453	# classes which are not aware of handler_order).
				454	return True
				455	return self.handler_order < other.handler_order
				456
				457
				458	class HTTPErrorProcessor(BaseHandler):
				459	"""Process HTTP error responses."""
				460	handler_order = 1000 # after all other processing
				461
				462	def http_response(self, request, response):
				463	code, msg, hdrs = response.code, response.msg, response.info()
				464
				465	# According to RFC 2616, "2xx" code indicates that the client's
				466	# request was successfully received, understood, and accepted.
				467	if not (200 <= code < 300):
				468	response = self.parent.error(
				469	'http', request, response, code, msg, hdrs)
				470
				471	return response
				472
				473	https_response = http_response
				474
				475	class HTTPDefaultErrorHandler(BaseHandler):
				476	def http_error_default(self, req, fp, code, msg, hdrs):
				477	raise urllib.error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
				478
				479	class HTTPRedirectHandler(BaseHandler):
				480	# maximum number of redirections to any single URL
				481	# this is needed because of the state that cookies introduce
				482	max_repeats = 4
				483	# maximum total number of redirections (regardless of URL) before
				484	# assuming we're in a loop
				485	max_redirections = 10
				486
				487	def redirect_request(self, req, fp, code, msg, headers, newurl):
				488	"""Return a Request or None in response to a redirect.
				489
				490	This is called by the http_error_30x methods when a
				491	redirection response is received. If a redirection should
				492	take place, return a new Request to allow http_error_30x to
				493	perform the redirect. Otherwise, raise HTTPError if no-one
				494	else should try to handle this url. Return None if you can't
				495	but another Handler might.
				496	"""
				497	m = req.get_method()
				498	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				499	or code in (301, 302, 303) and m == "POST")):
				500	raise urllib.error.HTTPError(req.get_full_url(),
				501	code, msg, headers, fp)
				502
				503	# Strictly (according to RFC 2616), 301 or 302 in response to
				504	# a POST MUST NOT cause a redirection without confirmation
				505	# from the user (of urllib2, in this case). In practice,
				506	# essentially all clients do redirect in this case, so we do
				507	# the same.
				508	# be conciliant with URIs containing a space
				509	newurl = newurl.replace(' ', '%20')
				510	CONTENT_HEADERS = ("content-length", "content-type")
				511	newheaders = dict((k, v) for k, v in req.headers.items()
				512	if k.lower() not in CONTENT_HEADERS)
				513	return Request(newurl,
				514	headers=newheaders,
				515	origin_req_host=req.get_origin_req_host(),
				516	unverifiable=True)
				517
				518	# Implementation note: To avoid the server sending us into an
				519	# infinite loop, the request object needs to track what URLs we
				520	# have already seen. Do this by adding a handler-specific
				521	# attribute to the Request object.
				522	def http_error_302(self, req, fp, code, msg, headers):
				523	# Some servers (incorrectly) return multiple Location headers
				524	# (so probably same goes for URI). Use first header.
				525	if "location" in headers:
				526	newurl = headers["location"]
				527	elif "uri" in headers:
				528	newurl = headers["uri"]
				529	else:
				530	return
				531	newurl = urllib.parse.urljoin(req.get_full_url(), newurl)
				532
				533	# XXX Probably want to forget about the state of the current
				534	# request, although that might interact poorly with other
				535	# handlers that also use handler-specific request attributes
				536	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				537	if new is None:
				538	return
				539
				540	# loop detection
				541	# .redirect_dict has a key url if url was previously visited.
				542	if hasattr(req, 'redirect_dict'):
				543	visited = new.redirect_dict = req.redirect_dict
				544	if (visited.get(newurl, 0) >= self.max_repeats or
				545	len(visited) >= self.max_redirections):
				546	raise urllib.error.HTTPError(req.get_full_url(), code,
				547	self.inf_msg + msg, headers, fp)
				548	else:
				549	visited = new.redirect_dict = req.redirect_dict = {}
				550	visited[newurl] = visited.get(newurl, 0) + 1
				551
				552	# Don't close the fp until we are sure that we won't use it
				553	# with HTTPError.
				554	fp.read()
				555	fp.close()
				556
				557	return self.parent.open(new)
				558
				559	http_error_301 = http_error_303 = http_error_307 = http_error_302
				560
				561	inf_msg = "The HTTP server returned a redirect error that would " \
				562	"lead to an infinite loop.\n" \
				563	"The last 30x error message was:\n"
				564
				565
				566	def _parse_proxy(proxy):
				567	"""Return (scheme, user, password, host/port) given a URL or an authority.
				568
				569	If a URL is supplied, it must have an authority (host:port) component.
				570	According to RFC 3986, having an authority component means the URL must
				571	have two slashes after the scheme:
				572
				573	>>> _parse_proxy('file:/ftp.example.com/')
				574	Traceback (most recent call last):
				575	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				576
				577	The first three items of the returned tuple may be None.
				578
				579	Examples of authority parsing:
				580
				581	>>> _parse_proxy('proxy.example.com')
				582	(None, None, None, 'proxy.example.com')
				583	>>> _parse_proxy('proxy.example.com:3128')
				584	(None, None, None, 'proxy.example.com:3128')
				585
				586	The authority component may optionally include userinfo (assumed to be
				587	username:password):
				588
				589	>>> _parse_proxy('joe:password@proxy.example.com')
				590	(None, 'joe', 'password', 'proxy.example.com')
				591	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				592	(None, 'joe', 'password', 'proxy.example.com:3128')
				593
				594	Same examples, but with URLs instead:
				595
				596	>>> _parse_proxy('http://proxy.example.com/')
				597	('http', None, None, 'proxy.example.com')
				598	>>> _parse_proxy('http://proxy.example.com:3128/')
				599	('http', None, None, 'proxy.example.com:3128')
				600	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				601	('http', 'joe', 'password', 'proxy.example.com')
				602	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				603	('http', 'joe', 'password', 'proxy.example.com:3128')
				604
				605	Everything after the authority is ignored:
				606
				607	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				608	('ftp', 'joe', 'password', 'proxy.example.com')
				609
				610	Test for no trailing '/' case:
				611
				612	>>> _parse_proxy('http://joe:password@proxy.example.com')
				613	('http', 'joe', 'password', 'proxy.example.com')
				614
				615	"""
				616	scheme, r_scheme = urllib.parse.splittype(proxy)
				617	if not r_scheme.startswith("/"):
				618	# authority
				619	scheme = None
				620	authority = proxy
				621	else:
				622	# URL
				623	if not r_scheme.startswith("//"):
				624	raise ValueError("proxy URL with no authority: %r" % proxy)
				625	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				626	# and 3.3.), path is empty or starts with '/'
				627	end = r_scheme.find("/", 2)
				628	if end == -1:
				629	end = None
				630	authority = r_scheme[2:end]
				631	userinfo, hostport = urllib.parse.splituser(authority)
				632	if userinfo is not None:
				633	user, password = urllib.parse.splitpasswd(userinfo)
				634	else:
				635	user = password = None
				636	return scheme, user, password, hostport
				637
				638	class ProxyHandler(BaseHandler):
				639	# Proxies must be in front
				640	handler_order = 100
				641
				642	def __init__(self, proxies=None):
				643	if proxies is None:
				644	proxies = getproxies()
				645	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				646	self.proxies = proxies
				647	for type, url in proxies.items():
				648	setattr(self, '%s_open' % type,
				649	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				650	meth(r, proxy, type))
				651
				652	def proxy_open(self, req, proxy, type):
				653	orig_type = req.get_type()
				654	proxy_type, user, password, hostport = _parse_proxy(proxy)
				655	if proxy_type is None:
				656	proxy_type = orig_type
				657	if user and password:
				658	user_pass = '%s:%s' % (unquote(user),
				659	urllib.parse.unquote(password))
				660	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				661	req.add_header('Proxy-authorization', 'Basic ' + creds)
				662	hostport = urllib.parse.unquote(hostport)
				663	req.set_proxy(hostport, proxy_type)
				664	if orig_type == proxy_type:
				665	# let other handlers take care of it
				666	return None
				667	else:
				668	# need to start over, because the other handlers don't
				669	# grok the proxy's URL type
				670	# e.g. if we have a constructor arg proxies like so:
				671	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				672	# a request for http://acme.example.com/a into one for
				673	# ftp://proxy.example.com/a
				674	return self.parent.open(req)
				675
				676	class HTTPPasswordMgr:
				677
				678	def __init__(self):
				679	self.passwd = {}
				680
				681	def add_password(self, realm, uri, user, passwd):
				682	# uri could be a single URI or a sequence
				683	if isinstance(uri, str):
				684	uri = [uri]
				685	if not realm in self.passwd:
				686	self.passwd[realm] = {}
				687	for default_port in True, False:
				688	reduced_uri = tuple(
				689	[self.reduce_uri(u, default_port) for u in uri])
				690	self.passwd[realm][reduced_uri] = (user, passwd)
				691
				692	def find_user_password(self, realm, authuri):
				693	domains = self.passwd.get(realm, {})
				694	for default_port in True, False:
				695	reduced_authuri = self.reduce_uri(authuri, default_port)
				696	for uris, authinfo in domains.items():
				697	for uri in uris:
				698	if self.is_suburi(uri, reduced_authuri):
				699	return authinfo
				700	return None, None
				701
				702	def reduce_uri(self, uri, default_port=True):
				703	"""Accept authority or URI and extract only the authority and path."""
				704	# note HTTP URLs do not have a userinfo component
				705	parts = urllib.parse.urlsplit(uri)
				706	if parts[1]:
				707	# URI
				708	scheme = parts[0]
				709	authority = parts[1]
				710	path = parts[2] or '/'
				711	else:
				712	# host or host:port
				713	scheme = None
				714	authority = uri
				715	path = '/'
				716	host, port = urllib.parse.splitport(authority)
				717	if default_port and port is None and scheme is not None:
				718	dport = {"http": 80,
				719	"https": 443,
				720	}.get(scheme)
				721	if dport is not None:
				722	authority = "%s:%d" % (host, dport)
				723	return authority, path
				724
				725	def is_suburi(self, base, test):
				726	"""Check if test is below base in a URI tree
				727
				728	Both args must be URIs in reduced form.
				729	"""
				730	if base == test:
				731	return True
				732	if base[0] != test[0]:
				733	return False
				734	common = posixpath.commonprefix((base[1], test[1]))
				735	if len(common) == len(base[1]):
				736	return True
				737	return False
				738
				739
				740	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				741
				742	def find_user_password(self, realm, authuri):
				743	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				744	authuri)
				745	if user is not None:
				746	return user, password
				747	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				748
				749
				750	class AbstractBasicAuthHandler:
				751
				752	# XXX this allows for multiple auth-schemes, but will stupidly pick
				753	# the last one with a realm specified.
				754
				755	# allow for double- and single-quoted realm values
				756	# (single quotes are a violation of the RFC, but appear in the wild)
				757	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				758	'realm=(["\'])(.*?)\\2', re.I)
				759
				760	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				761	# end of section 2, and section 1.2 immediately after "credentials"
				762	# production).
				763
				764	def __init__(self, password_mgr=None):
				765	if password_mgr is None:
				766	password_mgr = HTTPPasswordMgr()
				767	self.passwd = password_mgr
				768	self.add_password = self.passwd.add_password
				769
				770	def http_error_auth_reqed(self, authreq, host, req, headers):
				771	# host may be an authority (without userinfo) or a URL with an
				772	# authority
				773	# XXX could be multiple headers
				774	authreq = headers.get(authreq, None)
				775	if authreq:
				776	mo = AbstractBasicAuthHandler.rx.search(authreq)
				777	if mo:
				778	scheme, quote, realm = mo.groups()
				779	if scheme.lower() == 'basic':
				780	return self.retry_http_basic_auth(host, req, realm)
				781
				782	def retry_http_basic_auth(self, host, req, realm):
				783	user, pw = self.passwd.find_user_password(realm, host)
				784	if pw is not None:
				785	raw = "%s:%s" % (user, pw)
				786	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				787	if req.headers.get(self.auth_header, None) == auth:
				788	return None
				789	req.add_header(self.auth_header, auth)
				790	return self.parent.open(req)
				791	else:
				792	return None
				793
				794
				795	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				796
				797	auth_header = 'Authorization'
				798
				799	def http_error_401(self, req, fp, code, msg, headers):
				800	url = req.get_full_url()
				801	return self.http_error_auth_reqed('www-authenticate',
				802	url, req, headers)
				803
				804
				805	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				806
				807	auth_header = 'Proxy-authorization'
				808
				809	def http_error_407(self, req, fp, code, msg, headers):
				810	# http_error_auth_reqed requires that there is no userinfo component in
				811	# authority. Assume there isn't one, since urllib2 does not (and
				812	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				813	# userinfo.
				814	authority = req.get_host()
				815	return self.http_error_auth_reqed('proxy-authenticate',
				816	authority, req, headers)
				817
				818
				819	def randombytes(n):
				820	"""Return n random bytes."""
				821	return os.urandom(n)
				822
				823	class AbstractDigestAuthHandler:
				824	# Digest authentication is specified in RFC 2617.
				825
				826	# XXX The client does not inspect the Authentication-Info header
				827	# in a successful response.
				828
				829	# XXX It should be possible to test this implementation against
				830	# a mock server that just generates a static set of challenges.
				831
				832	# XXX qop="auth-int" supports is shaky
				833
				834	def __init__(self, passwd=None):
				835	if passwd is None:
				836	passwd = HTTPPasswordMgr()
				837	self.passwd = passwd
				838	self.add_password = self.passwd.add_password
				839	self.retried = 0
				840	self.nonce_count = 0
				841
				842	def reset_retry_count(self):
				843	self.retried = 0
				844
				845	def http_error_auth_reqed(self, auth_header, host, req, headers):
				846	authreq = headers.get(auth_header, None)
				847	if self.retried > 5:
				848	# Don't fail endlessly - if we failed once, we'll probably
				849	# fail a second time. Hm. Unless the Password Manager is
				850	# prompting for the information. Crap. This isn't great
				851	# but it's better than the current 'repeat until recursion
				852	# depth exceeded' approach <wink>
				853	raise urllib.error.HTTPError(req.get_full_url(), 401,
				854	"digest auth failed",
				855	headers, None)
				856	else:
				857	self.retried += 1
				858	if authreq:
				859	scheme = authreq.split()[0]
				860	if scheme.lower() == 'digest':
				861	return self.retry_http_digest_auth(req, authreq)
				862
				863	def retry_http_digest_auth(self, req, auth):
				864	token, challenge = auth.split(' ', 1)
				865	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				866	auth = self.get_authorization(req, chal)
				867	if auth:
				868	auth_val = 'Digest %s' % auth
				869	if req.headers.get(self.auth_header, None) == auth_val:
				870	return None
				871	req.add_unredirected_header(self.auth_header, auth_val)
				872	resp = self.parent.open(req)
				873	return resp
				874
				875	def get_cnonce(self, nonce):
				876	# The cnonce-value is an opaque
				877	# quoted string value provided by the client and used by both client
				878	# and server to avoid chosen plaintext attacks, to provide mutual
				879	# authentication, and to provide some message integrity protection.
				880	# This isn't a fabulous effort, but it's probably Good Enough.
				881	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				882	b = s.encode("ascii") + randombytes(8)
				883	dig = hashlib.sha1(b).hexdigest()
				884	return dig[:16]
				885
				886	def get_authorization(self, req, chal):
				887	try:
				888	realm = chal['realm']
				889	nonce = chal['nonce']
				890	qop = chal.get('qop')
				891	algorithm = chal.get('algorithm', 'MD5')
				892	# mod_digest doesn't send an opaque, even though it isn't
				893	# supposed to be optional
				894	opaque = chal.get('opaque', None)
				895	except KeyError:
				896	return None
				897
				898	H, KD = self.get_algorithm_impls(algorithm)
				899	if H is None:
				900	return None
				901
				902	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				903	if user is None:
				904	return None
				905
				906	# XXX not implemented yet
				907	if req.has_data():
				908	entdig = self.get_entity_digest(req.get_data(), chal)
				909	else:
				910	entdig = None
				911
				912	A1 = "%s:%s:%s" % (user, realm, pw)
				913	A2 = "%s:%s" % (req.get_method(),
				914	# XXX selector: what about proxies and full urls
				915	req.get_selector())
				916	if qop == 'auth':
				917	self.nonce_count += 1
				918	ncvalue = '%08x' % self.nonce_count
				919	cnonce = self.get_cnonce(nonce)
				920	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				921	respdig = KD(H(A1), noncebit)
				922	elif qop is None:
				923	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				924	else:
				925	# XXX handle auth-int.
				926	raise urllib.error.URLError("qop '%s' is not supported." % qop)
				927
				928	# XXX should the partial digests be encoded too?
				929
				930	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				931	'response="%s"' % (user, realm, nonce, req.get_selector(),
				932	respdig)
				933	if opaque:
				934	base += ', opaque="%s"' % opaque
				935	if entdig:
				936	base += ', digest="%s"' % entdig
				937	base += ', algorithm="%s"' % algorithm
				938	if qop:
				939	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				940	return base
				941
				942	def get_algorithm_impls(self, algorithm):
				943	# lambdas assume digest modules are imported at the top level
				944	if algorithm == 'MD5':
				945	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				946	elif algorithm == 'SHA':
				947	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				948	# XXX MD5-sess
				949	KD = lambda s, d: H("%s:%s" % (s, d))
				950	return H, KD
				951
				952	def get_entity_digest(self, data, chal):
				953	# XXX not implemented yet
				954	return None
				955
				956
				957	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				958	"""An authentication protocol defined by RFC 2069
				959
				960	Digest authentication improves on basic authentication because it
				961	does not transmit passwords in the clear.
				962	"""
				963
				964	auth_header = 'Authorization'
				965	handler_order = 490 # before Basic auth
				966
				967	def http_error_401(self, req, fp, code, msg, headers):
				968	host = urllib.parse.urlparse(req.get_full_url())[1]
				969	retry = self.http_error_auth_reqed('www-authenticate',
				970	host, req, headers)
				971	self.reset_retry_count()
				972	return retry
				973
				974
				975	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				976
				977	auth_header = 'Proxy-Authorization'
				978	handler_order = 490 # before Basic auth
				979
				980	def http_error_407(self, req, fp, code, msg, headers):
				981	host = req.get_host()
				982	retry = self.http_error_auth_reqed('proxy-authenticate',
				983	host, req, headers)
				984	self.reset_retry_count()
				985	return retry
				986
				987	class AbstractHTTPHandler(BaseHandler):
				988
				989	def __init__(self, debuglevel=0):
				990	self._debuglevel = debuglevel
				991
				992	def set_http_debuglevel(self, level):
				993	self._debuglevel = level
				994
				995	def do_request_(self, request):
				996	host = request.get_host()
				997	if not host:
				998	raise urllib.error.URLError('no host given')
				999
				1000	if request.has_data(): # POST
				1001	data = request.get_data()
				1002	if not request.has_header('Content-type'):
				1003	request.add_unredirected_header(
				1004	'Content-type',
				1005	'application/x-www-form-urlencoded')
				1006	if not request.has_header('Content-length'):
				1007	request.add_unredirected_header(
				1008	'Content-length', '%d' % len(data))
				1009
				1010	scheme, sel = urllib.parse.splittype(request.get_selector())
				1011	sel_host, sel_path = urllib.parse.splithost(sel)
				1012	if not request.has_header('Host'):
				1013	request.add_unredirected_header('Host', sel_host or host)
				1014	for name, value in self.parent.addheaders:
				1015	name = name.capitalize()
				1016	if not request.has_header(name):
				1017	request.add_unredirected_header(name, value)
				1018
				1019	return request
				1020
				1021	def do_open(self, http_class, req):
				1022	"""Return an addinfourl object for the request, using http_class.
				1023
				1024	http_class must implement the HTTPConnection API from http.client.
				1025	The addinfourl return value is a file-like object. It also
				1026	has methods and attributes including:
				1027	- info(): return a mimetools.Message object for the headers
				1028	- geturl(): return the original request URL
				1029	- code: HTTP status code
				1030	"""
				1031	host = req.get_host()
				1032	if not host:
				1033	raise urllib.error.URLError('no host given')
				1034
				1035	h = http_class(host, timeout=req.timeout) # will parse host:port
				1036	headers = dict(req.headers)
				1037	headers.update(req.unredirected_hdrs)
				1038
				1039	# TODO(jhylton): Should this be redesigned to handle
				1040	# persistent connections?
				1041
				1042	# We want to make an HTTP/1.1 request, but the addinfourl
				1043	# class isn't prepared to deal with a persistent connection.
				1044	# It will try to read all remaining data from the socket,
				1045	# which will block while the server waits for the next request.
				1046	# So make sure the connection gets closed after the (only)
				1047	# request.
				1048	headers["Connection"] = "close"
				1049	headers = dict(
				1050	(name.title(), val) for name, val in headers.items())
				1051	try:
				1052	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1053	r = h.getresponse()
				1054	except socket.error as err: # XXX what error?
				1055	raise urllib.error.URLError(err)
				1056
				1057	resp = urllib.response.addinfourl(r.fp, r.msg, req.get_full_url())
				1058	resp.code = r.status
				1059	resp.msg = r.reason
				1060	return resp
				1061
				1062
				1063	class HTTPHandler(AbstractHTTPHandler):
				1064
				1065	def http_open(self, req):
				1066	return self.do_open(http.client.HTTPConnection, req)
				1067
				1068	http_request = AbstractHTTPHandler.do_request_
				1069
				1070	if hasattr(http.client, 'HTTPSConnection'):
				1071	class HTTPSHandler(AbstractHTTPHandler):
				1072
				1073	def https_open(self, req):
				1074	return self.do_open(http.client.HTTPSConnection, req)
				1075
				1076	https_request = AbstractHTTPHandler.do_request_
				1077
				1078	class HTTPCookieProcessor(BaseHandler):
				1079	def __init__(self, cookiejar=None):
				1080	import http.cookiejar
				1081	if cookiejar is None:
				1082	cookiejar = http.cookiejar.CookieJar()
				1083	self.cookiejar = cookiejar
				1084
				1085	def http_request(self, request):
				1086	self.cookiejar.add_cookie_header(request)
				1087	return request
				1088
				1089	def http_response(self, request, response):
				1090	self.cookiejar.extract_cookies(response, request)
				1091	return response
				1092
				1093	https_request = http_request
				1094	https_response = http_response
				1095
				1096	class UnknownHandler(BaseHandler):
				1097	def unknown_open(self, req):
				1098	type = req.get_type()
				1099	raise urllib.error.URLError('unknown url type: %s' % type)
				1100
				1101	def parse_keqv_list(l):
				1102	"""Parse list of key=value strings where keys are not duplicated."""
				1103	parsed = {}
				1104	for elt in l:
				1105	k, v = elt.split('=', 1)
				1106	if v[0] == '"' and v[-1] == '"':
				1107	v = v[1:-1]
				1108	parsed[k] = v
				1109	return parsed
				1110
				1111	def parse_http_list(s):
				1112	"""Parse lists as described by RFC 2068 Section 2.
				1113
				1114	In particular, parse comma-separated lists where the elements of
				1115	the list may include quoted-strings. A quoted-string could
				1116	contain a comma. A non-quoted string could have quotes in the
				1117	middle. Neither commas nor quotes count if they are escaped.
				1118	Only double-quotes count, not single-quotes.
				1119	"""
				1120	res = []
				1121	part = ''
				1122
				1123	escape = quote = False
				1124	for cur in s:
				1125	if escape:
				1126	part += cur
				1127	escape = False
				1128	continue
				1129	if quote:
				1130	if cur == '\\':
				1131	escape = True
				1132	continue
				1133	elif cur == '"':
				1134	quote = False
				1135	part += cur
				1136	continue
				1137
				1138	if cur == ',':
				1139	res.append(part)
				1140	part = ''
				1141	continue
				1142
				1143	if cur == '"':
				1144	quote = True
				1145
				1146	part += cur
				1147
				1148	# append last part
				1149	if part:
				1150	res.append(part)
				1151
				1152	return [part.strip() for part in res]
				1153
				1154	class FileHandler(BaseHandler):
				1155	# Use local file or FTP depending on form of URL
				1156	def file_open(self, req):
				1157	url = req.get_selector()
				1158	if url[:2] == '//' and url[2:3] != '/':
				1159	req.type = 'ftp'
				1160	return self.parent.open(req)
				1161	else:
				1162	return self.open_local_file(req)
				1163
				1164	# names for the localhost
				1165	names = None
				1166	def get_names(self):
				1167	if FileHandler.names is None:
				1168	try:
				1169	FileHandler.names = (socket.gethostbyname('localhost'),
				1170	socket.gethostbyname(socket.gethostname()))
				1171	except socket.gaierror:
				1172	FileHandler.names = (socket.gethostbyname('localhost'),)
				1173	return FileHandler.names
				1174
				1175	# not entirely sure what the rules are here
				1176	def open_local_file(self, req):
				1177	import email.utils
				1178	import mimetypes
				1179	host = req.get_host()
				1180	file = req.get_selector()
				1181	localfile = url2pathname(file)
				1182	try:
				1183	stats = os.stat(localfile)
				1184	size = stats.st_size
				1185	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1186	mtype = mimetypes.guess_type(file)[0]
				1187	headers = email.message_from_string(
				1188	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1189	(mtype or 'text/plain', size, modified))
				1190	if host:
				1191	host, port = urllib.parse.splitport(host)
				1192	if not host or \
				1193	(not port and _safe_gethostbyname(host) in self.get_names()):
				1194	return urllib.response.addinfourl(open(localfile, 'rb'),
				1195	headers, 'file:'+file)
				1196	except OSError as msg:
				1197	# urllib2 users shouldn't expect OSErrors coming from urlopen()
				1198	raise urllib.error.URLError(msg)
				1199	raise urllib.error.URLError('file not on local host')
				1200
				1201	def _safe_gethostbyname(host):
				1202	try:
				1203	return socket.gethostbyname(host)
				1204	except socket.gaierror:
				1205	return None
				1206
				1207	class FTPHandler(BaseHandler):
				1208	def ftp_open(self, req):
				1209	import ftplib
				1210	import mimetypes
				1211	host = req.get_host()
				1212	if not host:
				1213	raise urllib.error.URLError('ftp error: no host given')
				1214	host, port = urllib.parse.splitport(host)
				1215	if port is None:
				1216	port = ftplib.FTP_PORT
				1217	else:
				1218	port = int(port)
				1219
				1220	# username/password handling
				1221	user, host = urllib.parse.splituser(host)
				1222	if user:
				1223	user, passwd = urllib.parse.splitpasswd(user)
				1224	else:
				1225	passwd = None
				1226	host = urllib.parse.unquote(host)
				1227	user = urllib.parse.unquote(user or '')
				1228	passwd = urllib.parse.unquote(passwd or '')
				1229
				1230	try:
				1231	host = socket.gethostbyname(host)
				1232	except socket.error as msg:
				1233	raise urllib.error.URLError(msg)
				1234	path, attrs = urllib.parse.splitattr(req.get_selector())
				1235	dirs = path.split('/')
				1236	dirs = list(map(urllib.parse.unquote, dirs))
				1237	dirs, file = dirs[:-1], dirs[-1]
				1238	if dirs and not dirs[0]:
				1239	dirs = dirs[1:]
				1240	try:
				1241	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1242	type = file and 'I' or 'D'
				1243	for attr in attrs:
				1244	attr, value = urllib.parse.splitvalue(attr)
				1245	if attr.lower() == 'type' and \
				1246	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1247	type = value.upper()
				1248	fp, retrlen = fw.retrfile(file, type)
				1249	headers = ""
				1250	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1251	if mtype:
				1252	headers += "Content-type: %s\n" % mtype
				1253	if retrlen is not None and retrlen >= 0:
				1254	headers += "Content-length: %d\n" % retrlen
				1255	headers = email.message_from_string(headers)
				1256	return urllib.response.addinfourl(fp, headers, req.get_full_url())
				1257	except ftplib.all_errors as msg:
				1258	exc = urllib.error.URLError('ftp error: %s' % msg)
				1259	raise exc.with_traceback(sys.exc_info()[2])
				1260
				1261	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1262	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1263	return fw
				1264
				1265	class CacheFTPHandler(FTPHandler):
				1266	# XXX would be nice to have pluggable cache strategies
				1267	# XXX this stuff is definitely not thread safe
				1268	def __init__(self):
				1269	self.cache = {}
				1270	self.timeout = {}
				1271	self.soonest = 0
				1272	self.delay = 60
				1273	self.max_conns = 16
				1274
				1275	def setTimeout(self, t):
				1276	self.delay = t
				1277
				1278	def setMaxConns(self, m):
				1279	self.max_conns = m
				1280
				1281	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1282	key = user, host, port, '/'.join(dirs), timeout
				1283	if key in self.cache:
				1284	self.timeout[key] = time.time() + self.delay
				1285	else:
				1286	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1287	dirs, timeout)
				1288	self.timeout[key] = time.time() + self.delay
				1289	self.check_cache()
				1290	return self.cache[key]
				1291
				1292	def check_cache(self):
				1293	# first check for old ones
				1294	t = time.time()
				1295	if self.soonest <= t:
				1296	for k, v in list(self.timeout.items()):
				1297	if v < t:
				1298	self.cache[k].close()
				1299	del self.cache[k]
				1300	del self.timeout[k]
				1301	self.soonest = min(list(self.timeout.values()))
				1302
				1303	# then check the size
				1304	if len(self.cache) == self.max_conns:
				1305	for k, v in list(self.timeout.items()):
				1306	if v == self.soonest:
				1307	del self.cache[k]
				1308	del self.timeout[k]
				1309	break
				1310	self.soonest = min(list(self.timeout.values()))
				1311
				1312	# Code move from the old urllib module
				1313
				1314	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1315
				1316	# Helper for non-unix systems
				1317	if os.name == 'mac':
				1318	from macurl2path import url2pathname, pathname2url
				1319	elif os.name == 'nt':
				1320	from nturl2path import url2pathname, pathname2url
				1321	else:
				1322	def url2pathname(pathname):
				1323	"""OS-specific conversion from a relative URL of the 'file' scheme
				1324	to a file system path; not recommended for general use."""
				1325	return urllib.parse.unquote(pathname)
				1326
				1327	def pathname2url(pathname):
				1328	"""OS-specific conversion from a file system path to a relative URL
				1329	of the 'file' scheme; not recommended for general use."""
				1330	return urllib.parse.quote(pathname)
				1331
				1332	# This really consists of two pieces:
				1333	# (1) a class which handles opening of all sorts of URLs
				1334	# (plus assorted utilities etc.)
				1335	# (2) a set of functions for parsing URLs
				1336	# XXX Should these be separated out into different modules?
				1337
				1338
				1339	ftpcache = {}
				1340	class URLopener:
				1341	"""Class to open URLs.
				1342	This is a class rather than just a subroutine because we may need
				1343	more than one set of global protocol-specific options.
				1344	Note -- this is a base class for those who don't want the
				1345	automatic handling of errors type 302 (relocated) and 401
				1346	(authorization needed)."""
				1347
				1348	__tempfiles = None
				1349
				1350	version = "Python-urllib/%s" % __version__
				1351
				1352	# Constructor
				1353	def __init__(self, proxies=None, **x509):
				1354	if proxies is None:
				1355	proxies = getproxies()
				1356	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1357	self.proxies = proxies
				1358	self.key_file = x509.get('key_file')
				1359	self.cert_file = x509.get('cert_file')
				1360	self.addheaders = [('User-Agent', self.version)]
				1361	self.__tempfiles = []
				1362	self.__unlink = os.unlink # See cleanup()
				1363	self.tempcache = None
				1364	# Undocumented feature: if you assign {} to tempcache,
				1365	# it is used to cache files retrieved with
				1366	# self.retrieve(). This is not enabled by default
				1367	# since it does not work for changing documents (and I
				1368	# haven't got the logic to check expiration headers
				1369	# yet).
				1370	self.ftpcache = ftpcache
				1371	# Undocumented feature: you can use a different
				1372	# ftp cache by assigning to the .ftpcache member;
				1373	# in case you want logically independent URL openers
				1374	# XXX This is not threadsafe. Bah.
				1375
				1376	def __del__(self):
				1377	self.close()
				1378
				1379	def close(self):
				1380	self.cleanup()
				1381
				1382	def cleanup(self):
				1383	# This code sometimes runs when the rest of this module
				1384	# has already been deleted, so it can't use any globals
				1385	# or import anything.
				1386	if self.__tempfiles:
				1387	for file in self.__tempfiles:
				1388	try:
				1389	self.__unlink(file)
				1390	except OSError:
				1391	pass
				1392	del self.__tempfiles[:]
				1393	if self.tempcache:
				1394	self.tempcache.clear()
				1395
				1396	def addheader(self, *args):
				1397	"""Add a header to be used by the HTTP interface only
				1398	e.g. u.addheader('Accept', 'sound/basic')"""
				1399	self.addheaders.append(args)
				1400
				1401	# External interface
				1402	def open(self, fullurl, data=None):
				1403	"""Use URLopener().open(file) instead of open(file, 'r')."""
				1404	fullurl = urllib.parse.unwrap(urllib.parse.toBytes(fullurl))
				1405	if self.tempcache and fullurl in self.tempcache:
				1406	filename, headers = self.tempcache[fullurl]
				1407	fp = open(filename, 'rb')
				1408	return urllib.response.addinfourl(fp, headers, fullurl)
				1409	urltype, url = urllib.parse.splittype(fullurl)
				1410	if not urltype:
				1411	urltype = 'file'
				1412	if urltype in self.proxies:
				1413	proxy = self.proxies[urltype]
				1414	urltype, proxyhost = urllib.parse.splittype(proxy)
				1415	host, selector = urllib.parse.splithost(proxyhost)
				1416	url = (host, fullurl) # Signal special case to open_*()
				1417	else:
				1418	proxy = None
				1419	name = 'open_' + urltype
				1420	self.type = urltype
				1421	name = name.replace('-', '_')
				1422	if not hasattr(self, name):
				1423	if proxy:
				1424	return self.open_unknown_proxy(proxy, fullurl, data)
				1425	else:
				1426	return self.open_unknown(fullurl, data)
				1427	try:
				1428	if data is None:
				1429	return getattr(self, name)(url)
				1430	else:
				1431	return getattr(self, name)(url, data)
				1432	except socket.error as msg:
				1433	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1434
				1435	def open_unknown(self, fullurl, data=None):
				1436	"""Overridable interface to open unknown URL type."""
				1437	type, url = urllib.parse.splittype(fullurl)
				1438	raise IOError('url error', 'unknown url type', type)
				1439
				1440	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1441	"""Overridable interface to open unknown URL type."""
				1442	type, url = urllib.parse.splittype(fullurl)
				1443	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1444
				1445	# External interface
				1446	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1447	"""retrieve(url) returns (filename, headers) for a local object
				1448	or (tempfilename, headers) for a remote object."""
				1449	url = urllib.parse.unwrap(urllib.parse.toBytes(url))
				1450	if self.tempcache and url in self.tempcache:
				1451	return self.tempcache[url]
				1452	type, url1 = urllib.parse.splittype(url)
				1453	if filename is None and (not type or type == 'file'):
				1454	try:
				1455	fp = self.open_local_file(url1)
				1456	hdrs = fp.info()
				1457	del fp
				1458	return url2pathname(urllib.parse.splithost(url1)[1]), hdrs
				1459	except IOError as msg:
				1460	pass
				1461	fp = self.open(url, data)
				1462	headers = fp.info()
				1463	if filename:
				1464	tfp = open(filename, 'wb')
				1465	else:
				1466	import tempfile
				1467	garbage, path = urllib.parse.splittype(url)
				1468	garbage, path = urllib.parse.splithost(path or "")
				1469	path, garbage = urllib.parse.splitquery(path or "")
				1470	path, garbage = urllib.parse.splitattr(path or "")
				1471	suffix = os.path.splitext(path)[1]
				1472	(fd, filename) = tempfile.mkstemp(suffix)
				1473	self.__tempfiles.append(filename)
				1474	tfp = os.fdopen(fd, 'wb')
				1475	result = filename, headers
				1476	if self.tempcache is not None:
				1477	self.tempcache[url] = result
				1478	bs = 1024*8
				1479	size = -1
				1480	read = 0
				1481	blocknum = 0
				1482	if reporthook:
				1483	if "content-length" in headers:
				1484	size = int(headers["Content-Length"])
				1485	reporthook(blocknum, bs, size)
				1486	while 1:
				1487	block = fp.read(bs)
				1488	if not block:
				1489	break
				1490	read += len(block)
				1491	tfp.write(block)
				1492	blocknum += 1
				1493	if reporthook:
				1494	reporthook(blocknum, bs, size)
				1495	fp.close()
				1496	tfp.close()
				1497	del fp
				1498	del tfp
				1499
				1500	# raise exception if actual size does not match content-length header
				1501	if size >= 0 and read < size:
				1502	raise urllib.error.ContentTooShortError(
				1503	"retrieval incomplete: got only %i out of %i bytes"
				1504	% (read, size), result)
				1505
				1506	return result
				1507
				1508	# Each method named open_<type> knows how to open that type of URL
				1509
				1510	def _open_generic_http(self, connection_factory, url, data):
				1511	"""Make an HTTP connection using connection_class.
				1512
				1513	This is an internal method that should be called from
				1514	open_http() or open_https().
				1515
				1516	Arguments:
				1517	- connection_factory should take a host name and return an
				1518	HTTPConnection instance.
				1519	- url is the url to retrieval or a host, relative-path pair.
				1520	- data is payload for a POST request or None.
				1521	"""
				1522
				1523	user_passwd = None
				1524	proxy_passwd= None
				1525	if isinstance(url, str):
				1526	host, selector = urllib.parse.splithost(url)
				1527	if host:
				1528	user_passwd, host = urllib.parse.splituser(host)
				1529	host = urllib.parse.unquote(host)
				1530	realhost = host
				1531	else:
				1532	host, selector = url
				1533	# check whether the proxy contains authorization information
				1534	proxy_passwd, host = urllib.parse.splituser(host)
				1535	# now we proceed with the url we want to obtain
				1536	urltype, rest = urllib.parse.splittype(selector)
				1537	url = rest
				1538	user_passwd = None
				1539	if urltype.lower() != 'http':
				1540	realhost = None
				1541	else:
				1542	realhost, rest = urllib.parse.splithost(rest)
				1543	if realhost:
				1544	user_passwd, realhost = urllib.parse.splituser(realhost)
				1545	if user_passwd:
				1546	selector = "%s://%s%s" % (urltype, realhost, rest)
				1547	if proxy_bypass(realhost):
				1548	host = realhost
				1549
				1550	#print "proxy via http:", host, selector
				1551	if not host: raise IOError('http error', 'no host given')
				1552
				1553	if proxy_passwd:
				1554	import base64
				1555	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1556	else:
				1557	proxy_auth = None
				1558
				1559	if user_passwd:
				1560	import base64
				1561	auth = base64.b64encode(user_passwd).strip()
				1562	else:
				1563	auth = None
				1564	http_conn = connection_factory(host)
				1565	# XXX We should fix urllib so that it works with HTTP/1.1.
				1566	http_conn._http_vsn = 10
				1567	http_conn._http_vsn_str = "HTTP/1.0"
				1568
				1569	headers = {}
				1570	if proxy_auth:
				1571	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1572	if auth:
				1573	headers["Authorization"] = "Basic %s" % auth
				1574	if realhost:
				1575	headers["Host"] = realhost
				1576	for header, value in self.addheaders:
				1577	headers[header] = value
				1578
				1579	if data is not None:
				1580	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1581	http_conn.request("POST", selector, data, headers)
				1582	else:
				1583	http_conn.request("GET", selector, headers=headers)
				1584
				1585	try:
				1586	response = http_conn.getresponse()
				1587	except http.client.BadStatusLine:
				1588	# something went wrong with the HTTP status line
				1589	raise urllib.error.URLError("http protocol error: bad status line")
				1590
				1591	# According to RFC 2616, "2xx" code indicates that the client's
				1592	# request was successfully received, understood, and accepted.
				1593	if 200 <= response.status < 300:
				1594	return urllib.response.addinfourl(response.fp, response.msg,
				1595	"http:" + url,
				1596	response.status)
				1597	else:
				1598	return self.http_error(
				1599	url, response.fp,
				1600	response.status, response.reason, response.msg, data)
				1601
				1602	def open_http(self, url, data=None):
				1603	"""Use HTTP protocol."""
				1604	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1605
				1606	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1607	"""Handle http errors.
				1608
				1609	Derived class can override this, or provide specific handlers
				1610	named http_error_DDD where DDD is the 3-digit error code."""
				1611	# First check if there's a specific handler for this error
				1612	name = 'http_error_%d' % errcode
				1613	if hasattr(self, name):
				1614	method = getattr(self, name)
				1615	if data is None:
				1616	result = method(url, fp, errcode, errmsg, headers)
				1617	else:
				1618	result = method(url, fp, errcode, errmsg, headers, data)
				1619	if result: return result
				1620	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1621
				1622	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1623	"""Default error handler: close the connection and raise IOError."""
				1624	void = fp.read()
				1625	fp.close()
				1626	raise urllib.error.HTTPError(url, errcode, errmsg, headers, None)
				1627
				1628	if _have_ssl:
				1629	def _https_connection(self, host):
				1630	return http.client.HTTPSConnection(host,
				1631	key_file=self.key_file,
				1632	cert_file=self.cert_file)
				1633
				1634	def open_https(self, url, data=None):
				1635	"""Use HTTPS protocol."""
				1636	return self._open_generic_http(self._https_connection, url, data)
				1637
				1638	def open_file(self, url):
				1639	"""Use local file or FTP depending on form of URL."""
				1640	if not isinstance(url, str):
				1641	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1642	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1643	return self.open_ftp(url)
				1644	else:
				1645	return self.open_local_file(url)
				1646
				1647	def open_local_file(self, url):
				1648	"""Use local file."""
				1649	import mimetypes, email.utils
				1650	from io import StringIO
				1651	host, file = urllib.parse.splithost(url)
				1652	localname = url2pathname(file)
				1653	try:
				1654	stats = os.stat(localname)
				1655	except OSError as e:
				1656	raise URLError(e.errno, e.strerror, e.filename)
				1657	size = stats.st_size
				1658	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1659	mtype = mimetypes.guess_type(url)[0]
				1660	headers = email.message_from_string(
				1661	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1662	(mtype or 'text/plain', size, modified))
				1663	if not host:
				1664	urlfile = file
				1665	if file[:1] == '/':
				1666	urlfile = 'file://' + file
				1667	return urllib.response.addinfourl(open(localname, 'rb'),
				1668	headers, urlfile)
				1669	host, port = urllib.parse.splitport(host)
				1670	if (not port
				1671	and socket.gethostbyname(host) in (localhost(), thishost())):
				1672	urlfile = file
				1673	if file[:1] == '/':
				1674	urlfile = 'file://' + file
				1675	return urllib.response.addinfourl(open(localname, 'rb'),
				1676	headers, urlfile)
				1677	raise URLError('local file error', 'not on local host')
				1678
				1679	def open_ftp(self, url):
				1680	"""Use FTP protocol."""
				1681	if not isinstance(url, str):
				1682	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1683	import mimetypes
				1684	from io import StringIO
				1685	host, path = urllib.parse.splithost(url)
				1686	if not host: raise URLError('ftp error', 'no host given')
				1687	host, port = urllib.parse.splitport(host)
				1688	user, host = urllib.parse.splituser(host)
				1689	if user: user, passwd = urllib.parse.splitpasswd(user)
				1690	else: passwd = None
				1691	host = urllib.parse.unquote(host)
				1692	user = urllib.parse.unquote(user or '')
				1693	passwd = urllib.parse.unquote(passwd or '')
				1694	host = socket.gethostbyname(host)
				1695	if not port:
				1696	import ftplib
				1697	port = ftplib.FTP_PORT
				1698	else:
				1699	port = int(port)
				1700	path, attrs = urllib.parse.splitattr(path)
				1701	path = urllib.parse.unquote(path)
				1702	dirs = path.split('/')
				1703	dirs, file = dirs[:-1], dirs[-1]
				1704	if dirs and not dirs[0]: dirs = dirs[1:]
				1705	if dirs and not dirs[0]: dirs[0] = '/'
				1706	key = user, host, port, '/'.join(dirs)
				1707	# XXX thread unsafe!
				1708	if len(self.ftpcache) > MAXFTPCACHE:
				1709	# Prune the cache, rather arbitrarily
				1710	for k in self.ftpcache.keys():
				1711	if k != key:
				1712	v = self.ftpcache[k]
				1713	del self.ftpcache[k]
				1714	v.close()
				1715	try:
				1716	if not key in self.ftpcache:
				1717	self.ftpcache[key] = \
				1718	ftpwrapper(user, passwd, host, port, dirs)
				1719	if not file: type = 'D'
				1720	else: type = 'I'
				1721	for attr in attrs:
				1722	attr, value = urllib.parse.splitvalue(attr)
				1723	if attr.lower() == 'type' and \
				1724	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1725	type = value.upper()
				1726	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1727	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1728	headers = ""
				1729	if mtype:
				1730	headers += "Content-Type: %s\n" % mtype
				1731	if retrlen is not None and retrlen >= 0:
				1732	headers += "Content-Length: %d\n" % retrlen
				1733	headers = email.message_from_string(headers)
				1734	return urllib.response.addinfourl(fp, headers, "ftp:" + url)
				1735	except ftperrors() as msg:
				1736	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1737
				1738	def open_data(self, url, data=None):
				1739	"""Use "data" URL."""
				1740	if not isinstance(url, str):
				1741	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1742	# ignore POSTed data
				1743	#
				1744	# syntax of data URLs:
				1745	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1746	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1747	# data := *urlchar
				1748	# parameter := attribute "=" value
				1749	try:
				1750	[type, data] = url.split(',', 1)
				1751	except ValueError:
				1752	raise IOError('data error', 'bad data URL')
				1753	if not type:
				1754	type = 'text/plain;charset=US-ASCII'
				1755	semi = type.rfind(';')
				1756	if semi >= 0 and '=' not in type[semi:]:
				1757	encoding = type[semi+1:]
				1758	type = type[:semi]
				1759	else:
				1760	encoding = ''
				1761	msg = []
				1762	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1763	time.gmtime(time.time())))
				1764	msg.append('Content-type: %s' % type)
				1765	if encoding == 'base64':
				1766	import base64
				1767	data = base64.decodestring(data)
				1768	else:
				1769	data = urllib.parse.unquote(data)
				1770	msg.append('Content-Length: %d' % len(data))
				1771	msg.append('')
				1772	msg.append(data)
				1773	msg = '\n'.join(msg)
				1774	headers = mimetools.message_from_string(msg)
				1775	#f.fileno = None # needed for addinfourl
				1776	return urllib.response.addinfourl(f, headers, url)
				1777
				1778
				1779	class FancyURLopener(URLopener):
				1780	"""Derived class with handlers for errors we can handle (perhaps)."""
				1781
				1782	def __init__(self, args, *kwargs):
				1783	URLopener.__init__(self, args, *kwargs)
				1784	self.auth_cache = {}
				1785	self.tries = 0
				1786	self.maxtries = 10
				1787
				1788	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1789	"""Default error handling -- don't raise an exception."""
				1790	return urllib.response.addinfourl(fp, headers, "http:" + url, errcode)
				1791
				1792	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1793	"""Error 302 -- relocated (temporarily)."""
				1794	self.tries += 1
				1795	if self.maxtries and self.tries >= self.maxtries:
				1796	if hasattr(self, "http_error_500"):
				1797	meth = self.http_error_500
				1798	else:
				1799	meth = self.http_error_default
				1800	self.tries = 0
				1801	return meth(url, fp, 500,
				1802	"Internal Server Error: Redirect Recursion", headers)
				1803	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1804	data)
				1805	self.tries = 0
				1806	return result
				1807
				1808	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1809	if 'location' in headers:
				1810	newurl = headers['location']
				1811	elif 'uri' in headers:
				1812	newurl = headers['uri']
				1813	else:
				1814	return
				1815	void = fp.read()
				1816	fp.close()
				1817	# In case the server sent a relative URL, join with original:
				1818	newurl = basejoin(self.type + ":" + url, newurl)
				1819	return self.open(newurl)
				1820
				1821	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1822	"""Error 301 -- also relocated (permanently)."""
				1823	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1824
				1825	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1826	"""Error 303 -- also relocated (essentially identical to 302)."""
				1827	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1828
				1829	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1830	"""Error 307 -- relocated, but turn POST into error."""
				1831	if data is None:
				1832	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1833	else:
				1834	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1835
				1836	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1837	"""Error 401 -- authentication required.
				1838	This function supports Basic authentication only."""
				1839	if not 'www-authenticate' in headers:
				1840	URLopener.http_error_default(self, url, fp,
				1841	errcode, errmsg, headers)
				1842	stuff = headers['www-authenticate']
				1843	import re
				1844	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1845	if not match:
				1846	URLopener.http_error_default(self, url, fp,
				1847	errcode, errmsg, headers)
				1848	scheme, realm = match.groups()
				1849	if scheme.lower() != 'basic':
				1850	URLopener.http_error_default(self, url, fp,
				1851	errcode, errmsg, headers)
				1852	name = 'retry_' + self.type + '_basic_auth'
				1853	if data is None:
				1854	return getattr(self,name)(url, realm)
				1855	else:
				1856	return getattr(self,name)(url, realm, data)
				1857
				1858	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1859	"""Error 407 -- proxy authentication required.
				1860	This function supports Basic authentication only."""
				1861	if not 'proxy-authenticate' in headers:
				1862	URLopener.http_error_default(self, url, fp,
				1863	errcode, errmsg, headers)
				1864	stuff = headers['proxy-authenticate']
				1865	import re
				1866	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1867	if not match:
				1868	URLopener.http_error_default(self, url, fp,
				1869	errcode, errmsg, headers)
				1870	scheme, realm = match.groups()
				1871	if scheme.lower() != 'basic':
				1872	URLopener.http_error_default(self, url, fp,
				1873	errcode, errmsg, headers)
				1874	name = 'retry_proxy_' + self.type + '_basic_auth'
				1875	if data is None:
				1876	return getattr(self,name)(url, realm)
				1877	else:
				1878	return getattr(self,name)(url, realm, data)
				1879
				1880	def retry_proxy_http_basic_auth(self, url, realm, data=None):
				1881	host, selector = urllib.parse.splithost(url)
				1882	newurl = 'http://' + host + selector
				1883	proxy = self.proxies['http']
				1884	urltype, proxyhost = urllib.parse.splittype(proxy)
				1885	proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
				1886	i = proxyhost.find('@') + 1
				1887	proxyhost = proxyhost[i:]
				1888	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1889	if not (user or passwd): return None
				1890	proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1891	quote(passwd, safe=''), proxyhost)
				1892	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1893	if data is None:
				1894	return self.open(newurl)
				1895	else:
				1896	return self.open(newurl, data)
				1897
				1898	def retry_proxy_https_basic_auth(self, url, realm, data=None):
				1899	host, selector = urllib.parse.splithost(url)
				1900	newurl = 'https://' + host + selector
				1901	proxy = self.proxies['https']
				1902	urltype, proxyhost = urllib.parse.splittype(proxy)
				1903	proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
				1904	i = proxyhost.find('@') + 1
				1905	proxyhost = proxyhost[i:]
				1906	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1907	if not (user or passwd): return None
				1908	proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1909	quote(passwd, safe=''), proxyhost)
				1910	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1911	if data is None:
				1912	return self.open(newurl)
				1913	else:
				1914	return self.open(newurl, data)
				1915
				1916	def retry_http_basic_auth(self, url, realm, data=None):
				1917	host, selector = urllib.parse.splithost(url)
				1918	i = host.find('@') + 1
				1919	host = host[i:]
				1920	user, passwd = self.get_user_passwd(host, realm, i)
				1921	if not (user or passwd): return None
				1922	host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1923	quote(passwd, safe=''), host)
				1924	newurl = 'http://' + host + selector
				1925	if data is None:
				1926	return self.open(newurl)
				1927	else:
				1928	return self.open(newurl, data)
				1929
				1930	def retry_https_basic_auth(self, url, realm, data=None):
				1931	host, selector = urllib.parse.splithost(url)
				1932	i = host.find('@') + 1
				1933	host = host[i:]
				1934	user, passwd = self.get_user_passwd(host, realm, i)
				1935	if not (user or passwd): return None
				1936	host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1937	quote(passwd, safe=''), host)
				1938	newurl = 'https://' + host + selector
				1939	if data is None:
				1940	return self.open(newurl)
				1941	else:
				1942	return self.open(newurl, data)
				1943
				1944	def get_user_passwd(self, host, realm, clear_cache = 0):
				1945	key = realm + '@' + host.lower()
				1946	if key in self.auth_cache:
				1947	if clear_cache:
				1948	del self.auth_cache[key]
				1949	else:
				1950	return self.auth_cache[key]
				1951	user, passwd = self.prompt_user_passwd(host, realm)
				1952	if user or passwd: self.auth_cache[key] = (user, passwd)
				1953	return user, passwd
				1954
				1955	def prompt_user_passwd(self, host, realm):
				1956	"""Override this in a GUI environment!"""
				1957	import getpass
				1958	try:
				1959	user = input("Enter username for %s at %s: " % (realm, host))
				1960	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1961	(user, realm, host))
				1962	return user, passwd
				1963	except KeyboardInterrupt:
				1964	print()
				1965	return None, None
				1966
				1967
				1968	# Utility functions
				1969
				1970	_localhost = None
				1971	def localhost():
				1972	"""Return the IP address of the magic hostname 'localhost'."""
				1973	global _localhost
				1974	if _localhost is None:
				1975	_localhost = socket.gethostbyname('localhost')
				1976	return _localhost
				1977
				1978	_thishost = None
				1979	def thishost():
				1980	"""Return the IP address of the current host."""
				1981	global _thishost
				1982	if _thishost is None:
				1983	_thishost = socket.gethostbyname(socket.gethostname())
				1984	return _thishost
				1985
				1986	_ftperrors = None
				1987	def ftperrors():
				1988	"""Return the set of errors raised by the FTP class."""
				1989	global _ftperrors
				1990	if _ftperrors is None:
				1991	import ftplib
				1992	_ftperrors = ftplib.all_errors
				1993	return _ftperrors
				1994
				1995	_noheaders = None
				1996	def noheaders():
				1997	"""Return an empty mimetools.Message object."""
				1998	global _noheaders
				1999	if _noheaders is None:
				2000	_noheaders = mimetools.message_from_string("")
				2001	return _noheaders
				2002
				2003
				2004	# Utility classes
				2005
				2006	class ftpwrapper:
				2007	"""Class used by open_ftp() for cache of open FTP connections."""
				2008
				2009	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2010	self.user = user
				2011	self.passwd = passwd
				2012	self.host = host
				2013	self.port = port
				2014	self.dirs = dirs
				2015	self.timeout = timeout
				2016	self.init()
				2017
				2018	def init(self):
				2019	import ftplib
				2020	self.busy = 0
				2021	self.ftp = ftplib.FTP()
				2022	self.ftp.connect(self.host, self.port, self.timeout)
				2023	self.ftp.login(self.user, self.passwd)
				2024	for dir in self.dirs:
				2025	self.ftp.cwd(dir)
				2026
				2027	def retrfile(self, file, type):
				2028	import ftplib
				2029	self.endtransfer()
				2030	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2031	else: cmd = 'TYPE ' + type; isdir = 0
				2032	try:
				2033	self.ftp.voidcmd(cmd)
				2034	except ftplib.all_errors:
				2035	self.init()
				2036	self.ftp.voidcmd(cmd)
				2037	conn = None
				2038	if file and not isdir:
				2039	# Try to retrieve as a file
				2040	try:
				2041	cmd = 'RETR ' + file
				2042	conn = self.ftp.ntransfercmd(cmd)
				2043	except ftplib.error_perm as reason:
				2044	if str(reason)[:3] != '550':
				2045	raise urllib.error.URLError('ftp error', reason).with_traceback(sys.exc_info()[2])
				2046	if not conn:
				2047	# Set transfer mode to ASCII!
				2048	self.ftp.voidcmd('TYPE A')
				2049	# Try a directory listing. Verify that directory exists.
				2050	if file:
				2051	pwd = self.ftp.pwd()
				2052	try:
				2053	try:
				2054	self.ftp.cwd(file)
				2055	except ftplib.error_perm as reason:
				2056	raise urllib.error.URLError('ftp error', reason) from reason
				2057	finally:
				2058	self.ftp.cwd(pwd)
				2059	cmd = 'LIST ' + file
				2060	else:
				2061	cmd = 'LIST'
				2062	conn = self.ftp.ntransfercmd(cmd)
				2063	self.busy = 1
				2064	# Pass back both a suitably decorated object and a retrieval length
				2065	return (urllib.response.addclosehook(conn[0].makefile('rb'),
				2066	self.endtransfer), conn[1])
				2067	def endtransfer(self):
				2068	if not self.busy:
				2069	return
				2070	self.busy = 0
				2071	try:
				2072	self.ftp.voidresp()
				2073	except ftperrors():
				2074	pass
				2075
				2076	def close(self):
				2077	self.endtransfer()
				2078	try:
				2079	self.ftp.close()
				2080	except ftperrors():
				2081	pass
				2082
				2083	# Proxy handling
				2084	def getproxies_environment():
				2085	"""Return a dictionary of scheme -> proxy server URL mappings.
				2086
				2087	Scan the environment for variables named <scheme>_proxy;
				2088	this seems to be the standard convention. If you need a
				2089	different way, you can pass a proxies dictionary to the
				2090	[Fancy]URLopener constructor.
				2091
				2092	"""
				2093	proxies = {}
				2094	for name, value in os.environ.items():
				2095	name = name.lower()
				2096	if name == 'no_proxy':
				2097	# handled in proxy_bypass_environment
				2098	continue
				2099	if value and name[-6:] == '_proxy':
				2100	proxies[name[:-6]] = value
				2101	return proxies
				2102
				2103	def proxy_bypass_environment(host):
				2104	"""Test if proxies should not be used for a particular host.
				2105
				2106	Checks the environment for a variable named no_proxy, which should
				2107	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2108	"""
				2109	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2110	# '*' is special case for always bypass
				2111	if no_proxy == '*':
				2112	return 1
				2113	# strip port off host
				2114	hostonly, port = urllib.parse.splitport(host)
				2115	# check if the host ends with any of the DNS suffixes
				2116	for name in no_proxy.split(','):
				2117	if name and (hostonly.endswith(name) or host.endswith(name)):
				2118	return 1
				2119	# otherwise, don't bypass
				2120	return 0
				2121
				2122
				2123	if sys.platform == 'darwin':
				2124	def getproxies_internetconfig():
				2125	"""Return a dictionary of scheme -> proxy server URL mappings.
				2126
				2127	By convention the mac uses Internet Config to store
				2128	proxies. An HTTP proxy, for instance, is stored under
				2129	the HttpProxy key.
				2130
				2131	"""
				2132	try:
				2133	import ic
				2134	except ImportError:
				2135	return {}
				2136
				2137	try:
				2138	config = ic.IC()
				2139	except ic.error:
				2140	return {}
				2141	proxies = {}
				2142	# HTTP:
				2143	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2144	try:
				2145	value = config['HTTPProxyHost']
				2146	except ic.error:
				2147	pass
				2148	else:
				2149	proxies['http'] = 'http://%s' % value
				2150	# FTP: XXX To be done.
				2151	# Gopher: XXX To be done.
				2152	return proxies
				2153
				2154	def proxy_bypass(host):
				2155	if getproxies_environment():
				2156	return proxy_bypass_environment(host)
				2157	else:
				2158	return 0
				2159
				2160	def getproxies():
				2161	return getproxies_environment() or getproxies_internetconfig()
				2162
				2163	elif os.name == 'nt':
				2164	def getproxies_registry():
				2165	"""Return a dictionary of scheme -> proxy server URL mappings.
				2166
				2167	Win32 uses the registry to store proxies.
				2168
				2169	"""
				2170	proxies = {}
				2171	try:
				2172	import _winreg
				2173	except ImportError:
				2174	# Std module, so should be around - but you never know!
				2175	return proxies
				2176	try:
				2177	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2178	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2179	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2180	'ProxyEnable')[0]
				2181	if proxyEnable:
				2182	# Returned as Unicode but problems if not converted to ASCII
				2183	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2184	'ProxyServer')[0])
				2185	if '=' in proxyServer:
				2186	# Per-protocol settings
				2187	for p in proxyServer.split(';'):
				2188	protocol, address = p.split('=', 1)
				2189	# See if address has a type:// prefix
				2190	import re
				2191	if not re.match('^([^/:]+)://', address):
				2192	address = '%s://%s' % (protocol, address)
				2193	proxies[protocol] = address
				2194	else:
				2195	# Use one setting for all protocols
				2196	if proxyServer[:5] == 'http:':
				2197	proxies['http'] = proxyServer
				2198	else:
				2199	proxies['http'] = 'http://%s' % proxyServer
				2200	proxies['ftp'] = 'ftp://%s' % proxyServer
				2201	internetSettings.Close()
				2202	except (WindowsError, ValueError, TypeError):
				2203	# Either registry key not found etc, or the value in an
				2204	# unexpected format.
				2205	# proxies already set up to be empty so nothing to do
				2206	pass
				2207	return proxies
				2208
				2209	def getproxies():
				2210	"""Return a dictionary of scheme -> proxy server URL mappings.
				2211
				2212	Returns settings gathered from the environment, if specified,
				2213	or the registry.
				2214
				2215	"""
				2216	return getproxies_environment() or getproxies_registry()
				2217
				2218	def proxy_bypass_registry(host):
				2219	try:
				2220	import _winreg
				2221	import re
				2222	except ImportError:
				2223	# Std modules, so should be around - but you never know!
				2224	return 0
				2225	try:
				2226	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2227	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2228	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2229	'ProxyEnable')[0]
				2230	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2231	'ProxyOverride')[0])
				2232	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2233	except WindowsError:
				2234	return 0
				2235	if not proxyEnable or not proxyOverride:
				2236	return 0
				2237	# try to make a host list from name and IP address.
				2238	rawHost, port = urllib.parse.splitport(host)
				2239	host = [rawHost]
				2240	try:
				2241	addr = socket.gethostbyname(rawHost)
				2242	if addr != rawHost:
				2243	host.append(addr)
				2244	except socket.error:
				2245	pass
				2246	try:
				2247	fqdn = socket.getfqdn(rawHost)
				2248	if fqdn != rawHost:
				2249	host.append(fqdn)
				2250	except socket.error:
				2251	pass
				2252	# make a check value list from the registry entry: replace the
				2253	# '<local>' string by the localhost entry and the corresponding
				2254	# canonical entry.
				2255	proxyOverride = proxyOverride.split(';')
				2256	i = 0
				2257	while i < len(proxyOverride):
				2258	if proxyOverride[i] == '<local>':
				2259	proxyOverride[i:i+1] = ['localhost',
				2260	'127.0.0.1',
				2261	socket.gethostname(),
				2262	socket.gethostbyname(
				2263	socket.gethostname())]
				2264	i += 1
				2265	# print proxyOverride
				2266	# now check if we match one of the registry values.
				2267	for test in proxyOverride:
				2268	test = test.replace(".", r"\.") # mask dots
				2269	test = test.replace("", r".") # change glob sequence
				2270	test = test.replace("?", r".") # change glob char
				2271	for val in host:
				2272	# print "%s <--> %s" %( test, val )
				2273	if re.match(test, val, re.I):
				2274	return 1
				2275	return 0
				2276
				2277	def proxy_bypass(host):
				2278	"""Return a dictionary of scheme -> proxy server URL mappings.
				2279
				2280	Returns settings gathered from the environment, if specified,
				2281	or the registry.
				2282
				2283	"""
				2284	if getproxies_environment():
				2285	return proxy_bypass_environment(host)
				2286	else:
				2287	return proxy_bypass_registry(host)
				2288
				2289	else:
				2290	# By default use environment variables
				2291	getproxies = getproxies_environment
				2292	proxy_bypass = proxy_bypass_environment