Blame - Lib/urllib/request.py - platform/external/python/cpython3

blob: cd4729a95e85c8828ce87c2854e9036014b7332a [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
				50	import urllib2
				51
				52	# set up authentication info
				53	authinfo = urllib2.HTTPBasicAuthHandler()
				54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
				59	proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
				60
				61	# build a new opener that adds authentication and caching FTP handlers
				62	opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
				63
				64	# install it
				65	urllib2.install_opener(opener)
				66
				67	f = urllib2.urlopen('http://www.python.org/')
				68	"""
				69
				70	# XXX issues:
				71	# If an authentication error handler that tries to perform
				72	# authentication for some reason but fails, how should the error be
				73	# signalled? The client needs to know the HTTP error code. But if
				74	# the handler knows that the problem was, e.g., that it didn't know
				75	# that hash algo that requested in the challenge, it would be good to
				76	# pass that information along to the client, too.
				77	# ftp errors aren't handled cleanly
				78	# check digest against correct (i.e. non-apache) implementation
				79
				80	# Possible extensions:
				81	# complex proxies XXX not sure what exactly was meant by this
				82	# abstract factory for opener
				83
				84	import base64
				85	import email
				86	import hashlib
				87	import http.client
				88	import io
				89	import os
				90	import posixpath
				91	import random
				92	import re
				93	import socket
				94	import sys
				95	import time
				96	import urllib.parse, urllib.error, urllib.response
				97	import bisect
				98
				99	from io import StringIO
				100
				101	# check for SSL
				102	try:
				103	import ssl
				104	except:
				105	_have_ssl = False
				106	else:
				107	_have_ssl = True
				108	assert _have_ssl
				109
				110	# used in User-Agent header sent
				111	__version__ = sys.version[:3]
				112
				113	_opener = None
				114	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				115	global _opener
				116	if _opener is None:
				117	_opener = build_opener()
				118	return _opener.open(url, data, timeout)
				119
				120	def install_opener(opener):
				121	global _opener
				122	_opener = opener
				123
				124	# TODO(jhylton): Make this work with the same global opener.
				125	_urlopener = None
				126	def urlretrieve(url, filename=None, reporthook=None, data=None):
				127	global _urlopener
				128	if not _urlopener:
				129	_urlopener = FancyURLopener()
				130	return _urlopener.retrieve(url, filename, reporthook, data)
				131
				132	def urlcleanup():
				133	if _urlopener:
				134	_urlopener.cleanup()
				135	global _opener
				136	if _opener:
				137	_opener = None
				138
				139	# copied from cookielib.py
				140	_cut_port_re = re.compile(r":\d+$")
				141	def request_host(request):
				142	"""Return request-host, as defined by RFC 2965.
				143
				144	Variation from RFC: returned value is lowercased, for convenient
				145	comparison.
				146
				147	"""
				148	url = request.get_full_url()
				149	host = urllib.parse.urlparse(url)[1]
				150	if host == "":
				151	host = request.get_header("Host", "")
				152
				153	# remove port, if present
				154	host = _cut_port_re.sub("", host, 1)
				155	return host.lower()
				156
				157	class Request:
				158
				159	def __init__(self, url, data=None, headers={},
				160	origin_req_host=None, unverifiable=False):
				161	# unwrap('<URL:type://host/path>') --> 'type://host/path'
				162	self.__original = urllib.parse.unwrap(url)
				163	self.type = None
				164	# self.__r_type is what's left after doing the splittype
				165	self.host = None
				166	self.port = None
				167	self.data = data
				168	self.headers = {}
				169	for key, value in headers.items():
				170	self.add_header(key, value)
				171	self.unredirected_hdrs = {}
				172	if origin_req_host is None:
				173	origin_req_host = request_host(self)
				174	self.origin_req_host = origin_req_host
				175	self.unverifiable = unverifiable
				176
				177	def __getattr__(self, attr):
				178	# XXX this is a fallback mechanism to guard against these
				179	# methods getting called in a non-standard order. this may be
				180	# too complicated and/or unnecessary.
				181	# XXX should the __r_XXX attributes be public?
				182	if attr[:12] == '_Request__r_':
				183	name = attr[12:]
				184	if hasattr(Request, 'get_' + name):
				185	getattr(self, 'get_' + name)()
				186	return getattr(self, attr)
				187	raise AttributeError(attr)
				188
				189	def get_method(self):
				190	if self.has_data():
				191	return "POST"
				192	else:
				193	return "GET"
				194
				195	# XXX these helper methods are lame
				196
				197	def add_data(self, data):
				198	self.data = data
				199
				200	def has_data(self):
				201	return self.data is not None
				202
				203	def get_data(self):
				204	return self.data
				205
				206	def get_full_url(self):
				207	return self.__original
				208
				209	def get_type(self):
				210	if self.type is None:
				211	self.type, self.__r_type = urllib.parse.splittype(self.__original)
				212	if self.type is None:
				213	raise ValueError("unknown url type: %s" % self.__original)
				214	return self.type
				215
				216	def get_host(self):
				217	if self.host is None:
				218	self.host, self.__r_host = urllib.parse.splithost(self.__r_type)
				219	if self.host:
				220	self.host = urllib.parse.unquote(self.host)
				221	return self.host
				222
				223	def get_selector(self):
				224	return self.__r_host
				225
				226	def set_proxy(self, host, type):
				227	self.host, self.type = host, type
				228	self.__r_host = self.__original
				229
				230	def get_origin_req_host(self):
				231	return self.origin_req_host
				232
				233	def is_unverifiable(self):
				234	return self.unverifiable
				235
				236	def add_header(self, key, val):
				237	# useful for something like authentication
				238	self.headers[key.capitalize()] = val
				239
				240	def add_unredirected_header(self, key, val):
				241	# will not be added to a redirected request
				242	self.unredirected_hdrs[key.capitalize()] = val
				243
				244	def has_header(self, header_name):
				245	return (header_name in self.headers or
				246	header_name in self.unredirected_hdrs)
				247
				248	def get_header(self, header_name, default=None):
				249	return self.headers.get(
				250	header_name,
				251	self.unredirected_hdrs.get(header_name, default))
				252
				253	def header_items(self):
				254	hdrs = self.unredirected_hdrs.copy()
				255	hdrs.update(self.headers)
				256	return list(hdrs.items())
				257
				258	class OpenerDirector:
				259	def __init__(self):
				260	client_version = "Python-urllib/%s" % __version__
				261	self.addheaders = [('User-agent', client_version)]
				262	# manage the individual handlers
				263	self.handlers = []
				264	self.handle_open = {}
				265	self.handle_error = {}
				266	self.process_response = {}
				267	self.process_request = {}
				268
				269	def add_handler(self, handler):
				270	if not hasattr(handler, "add_parent"):
				271	raise TypeError("expected BaseHandler instance, got %r" %
				272	type(handler))
				273
				274	added = False
				275	for meth in dir(handler):
				276	if meth in ["redirect_request", "do_open", "proxy_open"]:
				277	# oops, coincidental match
				278	continue
				279
				280	i = meth.find("_")
				281	protocol = meth[:i]
				282	condition = meth[i+1:]
				283
				284	if condition.startswith("error"):
				285	j = condition.find("_") + i + 1
				286	kind = meth[j+1:]
				287	try:
				288	kind = int(kind)
				289	except ValueError:
				290	pass
				291	lookup = self.handle_error.get(protocol, {})
				292	self.handle_error[protocol] = lookup
				293	elif condition == "open":
				294	kind = protocol
				295	lookup = self.handle_open
				296	elif condition == "response":
				297	kind = protocol
				298	lookup = self.process_response
				299	elif condition == "request":
				300	kind = protocol
				301	lookup = self.process_request
				302	else:
				303	continue
				304
				305	handlers = lookup.setdefault(kind, [])
				306	if handlers:
				307	bisect.insort(handlers, handler)
				308	else:
				309	handlers.append(handler)
				310	added = True
				311
				312	if added:
				313	# the handlers must work in an specific order, the order
				314	# is specified in a Handler attribute
				315	bisect.insort(self.handlers, handler)
				316	handler.add_parent(self)
				317
				318	def close(self):
				319	# Only exists for backwards compatibility.
				320	pass
				321
				322	def _call_chain(self, chain, kind, meth_name, *args):
				323	# Handlers raise an exception if no one else should try to handle
				324	# the request, or return None if they can't but another handler
				325	# could. Otherwise, they return the response.
				326	handlers = chain.get(kind, ())
				327	for handler in handlers:
				328	func = getattr(handler, meth_name)
				329
				330	result = func(*args)
				331	if result is not None:
				332	return result
				333
				334	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				335	# accept a URL or a Request object
				336	if isinstance(fullurl, str):
				337	req = Request(fullurl, data)
				338	else:
				339	req = fullurl
				340	if data is not None:
				341	req.add_data(data)
				342
				343	req.timeout = timeout
				344	protocol = req.get_type()
				345
				346	# pre-process request
				347	meth_name = protocol+"_request"
				348	for processor in self.process_request.get(protocol, []):
				349	meth = getattr(processor, meth_name)
				350	req = meth(req)
				351
				352	response = self._open(req, data)
				353
				354	# post-process response
				355	meth_name = protocol+"_response"
				356	for processor in self.process_response.get(protocol, []):
				357	meth = getattr(processor, meth_name)
				358	response = meth(req, response)
				359
				360	return response
				361
				362	def _open(self, req, data=None):
				363	result = self._call_chain(self.handle_open, 'default',
				364	'default_open', req)
				365	if result:
				366	return result
				367
				368	protocol = req.get_type()
				369	result = self._call_chain(self.handle_open, protocol, protocol +
				370	'_open', req)
				371	if result:
				372	return result
				373
				374	return self._call_chain(self.handle_open, 'unknown',
				375	'unknown_open', req)
				376
				377	def error(self, proto, *args):
				378	if proto in ('http', 'https'):
				379	# XXX http[s] protocols are special-cased
				380	dict = self.handle_error['http'] # https is not different than http
				381	proto = args[2] # YUCK!
				382	meth_name = 'http_error_%s' % proto
				383	http_err = 1
				384	orig_args = args
				385	else:
				386	dict = self.handle_error
				387	meth_name = proto + '_error'
				388	http_err = 0
				389	args = (dict, proto, meth_name) + args
				390	result = self._call_chain(*args)
				391	if result:
				392	return result
				393
				394	if http_err:
				395	args = (dict, 'default', 'http_error_default') + orig_args
				396	return self._call_chain(*args)
				397
				398	# XXX probably also want an abstract factory that knows when it makes
				399	# sense to skip a superclass in favor of a subclass and when it might
				400	# make sense to include both
				401
				402	def build_opener(*handlers):
				403	"""Create an opener object from a list of handlers.
				404
				405	The opener will use several default handlers, including support
				406	for HTTP and FTP.
				407
				408	If any of the handlers passed as arguments are subclasses of the
				409	default handlers, the default handlers will not be used.
				410	"""
				411	def isclass(obj):
				412	return isinstance(obj, type) or hasattr(obj, "__bases__")
				413
				414	opener = OpenerDirector()
				415	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				416	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				417	FTPHandler, FileHandler, HTTPErrorProcessor]
				418	if hasattr(http.client, "HTTPSConnection"):
				419	default_classes.append(HTTPSHandler)
				420	else:
				421	import pdb; pdb.set_trace()
				422	skip = set()
				423	for klass in default_classes:
				424	for check in handlers:
				425	if isclass(check):
				426	if issubclass(check, klass):
				427	skip.add(klass)
				428	elif isinstance(check, klass):
				429	skip.add(klass)
				430	for klass in skip:
				431	default_classes.remove(klass)
				432
				433	for klass in default_classes:
				434	opener.add_handler(klass())
				435
				436	for h in handlers:
				437	if isclass(h):
				438	h = h()
				439	opener.add_handler(h)
				440	return opener
				441
				442	class BaseHandler:
				443	handler_order = 500
				444
				445	def add_parent(self, parent):
				446	self.parent = parent
				447
				448	def close(self):
				449	# Only exists for backwards compatibility
				450	pass
				451
				452	def __lt__(self, other):
				453	if not hasattr(other, "handler_order"):
				454	# Try to preserve the old behavior of having custom classes
				455	# inserted after default ones (works only for custom user
				456	# classes which are not aware of handler_order).
				457	return True
				458	return self.handler_order < other.handler_order
				459
				460
				461	class HTTPErrorProcessor(BaseHandler):
				462	"""Process HTTP error responses."""
				463	handler_order = 1000 # after all other processing
				464
				465	def http_response(self, request, response):
				466	code, msg, hdrs = response.code, response.msg, response.info()
				467
				468	# According to RFC 2616, "2xx" code indicates that the client's
				469	# request was successfully received, understood, and accepted.
				470	if not (200 <= code < 300):
				471	response = self.parent.error(
				472	'http', request, response, code, msg, hdrs)
				473
				474	return response
				475
				476	https_response = http_response
				477
				478	class HTTPDefaultErrorHandler(BaseHandler):
				479	def http_error_default(self, req, fp, code, msg, hdrs):
				480	raise urllib.error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
				481
				482	class HTTPRedirectHandler(BaseHandler):
				483	# maximum number of redirections to any single URL
				484	# this is needed because of the state that cookies introduce
				485	max_repeats = 4
				486	# maximum total number of redirections (regardless of URL) before
				487	# assuming we're in a loop
				488	max_redirections = 10
				489
				490	def redirect_request(self, req, fp, code, msg, headers, newurl):
				491	"""Return a Request or None in response to a redirect.
				492
				493	This is called by the http_error_30x methods when a
				494	redirection response is received. If a redirection should
				495	take place, return a new Request to allow http_error_30x to
				496	perform the redirect. Otherwise, raise HTTPError if no-one
				497	else should try to handle this url. Return None if you can't
				498	but another Handler might.
				499	"""
				500	m = req.get_method()
				501	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				502	or code in (301, 302, 303) and m == "POST")):
				503	raise urllib.error.HTTPError(req.get_full_url(),
				504	code, msg, headers, fp)
				505
				506	# Strictly (according to RFC 2616), 301 or 302 in response to
				507	# a POST MUST NOT cause a redirection without confirmation
				508	# from the user (of urllib2, in this case). In practice,
				509	# essentially all clients do redirect in this case, so we do
				510	# the same.
				511	# be conciliant with URIs containing a space
				512	newurl = newurl.replace(' ', '%20')
				513	CONTENT_HEADERS = ("content-length", "content-type")
				514	newheaders = dict((k, v) for k, v in req.headers.items()
				515	if k.lower() not in CONTENT_HEADERS)
				516	return Request(newurl,
				517	headers=newheaders,
				518	origin_req_host=req.get_origin_req_host(),
				519	unverifiable=True)
				520
				521	# Implementation note: To avoid the server sending us into an
				522	# infinite loop, the request object needs to track what URLs we
				523	# have already seen. Do this by adding a handler-specific
				524	# attribute to the Request object.
				525	def http_error_302(self, req, fp, code, msg, headers):
				526	# Some servers (incorrectly) return multiple Location headers
				527	# (so probably same goes for URI). Use first header.
				528	if "location" in headers:
				529	newurl = headers["location"]
				530	elif "uri" in headers:
				531	newurl = headers["uri"]
				532	else:
				533	return
				534	newurl = urllib.parse.urljoin(req.get_full_url(), newurl)
				535
				536	# XXX Probably want to forget about the state of the current
				537	# request, although that might interact poorly with other
				538	# handlers that also use handler-specific request attributes
				539	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				540	if new is None:
				541	return
				542
				543	# loop detection
				544	# .redirect_dict has a key url if url was previously visited.
				545	if hasattr(req, 'redirect_dict'):
				546	visited = new.redirect_dict = req.redirect_dict
				547	if (visited.get(newurl, 0) >= self.max_repeats or
				548	len(visited) >= self.max_redirections):
				549	raise urllib.error.HTTPError(req.get_full_url(), code,
				550	self.inf_msg + msg, headers, fp)
				551	else:
				552	visited = new.redirect_dict = req.redirect_dict = {}
				553	visited[newurl] = visited.get(newurl, 0) + 1
				554
				555	# Don't close the fp until we are sure that we won't use it
				556	# with HTTPError.
				557	fp.read()
				558	fp.close()
				559
				560	return self.parent.open(new)
				561
				562	http_error_301 = http_error_303 = http_error_307 = http_error_302
				563
				564	inf_msg = "The HTTP server returned a redirect error that would " \
				565	"lead to an infinite loop.\n" \
				566	"The last 30x error message was:\n"
				567
				568
				569	def _parse_proxy(proxy):
				570	"""Return (scheme, user, password, host/port) given a URL or an authority.
				571
				572	If a URL is supplied, it must have an authority (host:port) component.
				573	According to RFC 3986, having an authority component means the URL must
				574	have two slashes after the scheme:
				575
				576	>>> _parse_proxy('file:/ftp.example.com/')
				577	Traceback (most recent call last):
				578	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				579
				580	The first three items of the returned tuple may be None.
				581
				582	Examples of authority parsing:
				583
				584	>>> _parse_proxy('proxy.example.com')
				585	(None, None, None, 'proxy.example.com')
				586	>>> _parse_proxy('proxy.example.com:3128')
				587	(None, None, None, 'proxy.example.com:3128')
				588
				589	The authority component may optionally include userinfo (assumed to be
				590	username:password):
				591
				592	>>> _parse_proxy('joe:password@proxy.example.com')
				593	(None, 'joe', 'password', 'proxy.example.com')
				594	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				595	(None, 'joe', 'password', 'proxy.example.com:3128')
				596
				597	Same examples, but with URLs instead:
				598
				599	>>> _parse_proxy('http://proxy.example.com/')
				600	('http', None, None, 'proxy.example.com')
				601	>>> _parse_proxy('http://proxy.example.com:3128/')
				602	('http', None, None, 'proxy.example.com:3128')
				603	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				604	('http', 'joe', 'password', 'proxy.example.com')
				605	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				606	('http', 'joe', 'password', 'proxy.example.com:3128')
				607
				608	Everything after the authority is ignored:
				609
				610	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				611	('ftp', 'joe', 'password', 'proxy.example.com')
				612
				613	Test for no trailing '/' case:
				614
				615	>>> _parse_proxy('http://joe:password@proxy.example.com')
				616	('http', 'joe', 'password', 'proxy.example.com')
				617
				618	"""
				619	scheme, r_scheme = urllib.parse.splittype(proxy)
				620	if not r_scheme.startswith("/"):
				621	# authority
				622	scheme = None
				623	authority = proxy
				624	else:
				625	# URL
				626	if not r_scheme.startswith("//"):
				627	raise ValueError("proxy URL with no authority: %r" % proxy)
				628	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				629	# and 3.3.), path is empty or starts with '/'
				630	end = r_scheme.find("/", 2)
				631	if end == -1:
				632	end = None
				633	authority = r_scheme[2:end]
				634	userinfo, hostport = urllib.parse.splituser(authority)
				635	if userinfo is not None:
				636	user, password = urllib.parse.splitpasswd(userinfo)
				637	else:
				638	user = password = None
				639	return scheme, user, password, hostport
				640
				641	class ProxyHandler(BaseHandler):
				642	# Proxies must be in front
				643	handler_order = 100
				644
				645	def __init__(self, proxies=None):
				646	if proxies is None:
				647	proxies = getproxies()
				648	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				649	self.proxies = proxies
				650	for type, url in proxies.items():
				651	setattr(self, '%s_open' % type,
				652	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				653	meth(r, proxy, type))
				654
				655	def proxy_open(self, req, proxy, type):
				656	orig_type = req.get_type()
				657	proxy_type, user, password, hostport = _parse_proxy(proxy)
				658	if proxy_type is None:
				659	proxy_type = orig_type
				660	if user and password:
				661	user_pass = '%s:%s' % (unquote(user),
				662	urllib.parse.unquote(password))
				663	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				664	req.add_header('Proxy-authorization', 'Basic ' + creds)
				665	hostport = urllib.parse.unquote(hostport)
				666	req.set_proxy(hostport, proxy_type)
				667	if orig_type == proxy_type:
				668	# let other handlers take care of it
				669	return None
				670	else:
				671	# need to start over, because the other handlers don't
				672	# grok the proxy's URL type
				673	# e.g. if we have a constructor arg proxies like so:
				674	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				675	# a request for http://acme.example.com/a into one for
				676	# ftp://proxy.example.com/a
				677	return self.parent.open(req)
				678
				679	class HTTPPasswordMgr:
				680
				681	def __init__(self):
				682	self.passwd = {}
				683
				684	def add_password(self, realm, uri, user, passwd):
				685	# uri could be a single URI or a sequence
				686	if isinstance(uri, str):
				687	uri = [uri]
				688	if not realm in self.passwd:
				689	self.passwd[realm] = {}
				690	for default_port in True, False:
				691	reduced_uri = tuple(
				692	[self.reduce_uri(u, default_port) for u in uri])
				693	self.passwd[realm][reduced_uri] = (user, passwd)
				694
				695	def find_user_password(self, realm, authuri):
				696	domains = self.passwd.get(realm, {})
				697	for default_port in True, False:
				698	reduced_authuri = self.reduce_uri(authuri, default_port)
				699	for uris, authinfo in domains.items():
				700	for uri in uris:
				701	if self.is_suburi(uri, reduced_authuri):
				702	return authinfo
				703	return None, None
				704
				705	def reduce_uri(self, uri, default_port=True):
				706	"""Accept authority or URI and extract only the authority and path."""
				707	# note HTTP URLs do not have a userinfo component
				708	parts = urllib.parse.urlsplit(uri)
				709	if parts[1]:
				710	# URI
				711	scheme = parts[0]
				712	authority = parts[1]
				713	path = parts[2] or '/'
				714	else:
				715	# host or host:port
				716	scheme = None
				717	authority = uri
				718	path = '/'
				719	host, port = urllib.parse.splitport(authority)
				720	if default_port and port is None and scheme is not None:
				721	dport = {"http": 80,
				722	"https": 443,
				723	}.get(scheme)
				724	if dport is not None:
				725	authority = "%s:%d" % (host, dport)
				726	return authority, path
				727
				728	def is_suburi(self, base, test):
				729	"""Check if test is below base in a URI tree
				730
				731	Both args must be URIs in reduced form.
				732	"""
				733	if base == test:
				734	return True
				735	if base[0] != test[0]:
				736	return False
				737	common = posixpath.commonprefix((base[1], test[1]))
				738	if len(common) == len(base[1]):
				739	return True
				740	return False
				741
				742
				743	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				744
				745	def find_user_password(self, realm, authuri):
				746	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				747	authuri)
				748	if user is not None:
				749	return user, password
				750	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				751
				752
				753	class AbstractBasicAuthHandler:
				754
				755	# XXX this allows for multiple auth-schemes, but will stupidly pick
				756	# the last one with a realm specified.
				757
				758	# allow for double- and single-quoted realm values
				759	# (single quotes are a violation of the RFC, but appear in the wild)
				760	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				761	'realm=(["\'])(.*?)\\2', re.I)
				762
				763	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				764	# end of section 2, and section 1.2 immediately after "credentials"
				765	# production).
				766
				767	def __init__(self, password_mgr=None):
				768	if password_mgr is None:
				769	password_mgr = HTTPPasswordMgr()
				770	self.passwd = password_mgr
				771	self.add_password = self.passwd.add_password
				772
				773	def http_error_auth_reqed(self, authreq, host, req, headers):
				774	# host may be an authority (without userinfo) or a URL with an
				775	# authority
				776	# XXX could be multiple headers
				777	authreq = headers.get(authreq, None)
				778	if authreq:
				779	mo = AbstractBasicAuthHandler.rx.search(authreq)
				780	if mo:
				781	scheme, quote, realm = mo.groups()
				782	if scheme.lower() == 'basic':
				783	return self.retry_http_basic_auth(host, req, realm)
				784
				785	def retry_http_basic_auth(self, host, req, realm):
				786	user, pw = self.passwd.find_user_password(realm, host)
				787	if pw is not None:
				788	raw = "%s:%s" % (user, pw)
				789	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				790	if req.headers.get(self.auth_header, None) == auth:
				791	return None
				792	req.add_header(self.auth_header, auth)
				793	return self.parent.open(req)
				794	else:
				795	return None
				796
				797
				798	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				799
				800	auth_header = 'Authorization'
				801
				802	def http_error_401(self, req, fp, code, msg, headers):
				803	url = req.get_full_url()
				804	return self.http_error_auth_reqed('www-authenticate',
				805	url, req, headers)
				806
				807
				808	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				809
				810	auth_header = 'Proxy-authorization'
				811
				812	def http_error_407(self, req, fp, code, msg, headers):
				813	# http_error_auth_reqed requires that there is no userinfo component in
				814	# authority. Assume there isn't one, since urllib2 does not (and
				815	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				816	# userinfo.
				817	authority = req.get_host()
				818	return self.http_error_auth_reqed('proxy-authenticate',
				819	authority, req, headers)
				820
				821
				822	def randombytes(n):
				823	"""Return n random bytes."""
				824	return os.urandom(n)
				825
				826	class AbstractDigestAuthHandler:
				827	# Digest authentication is specified in RFC 2617.
				828
				829	# XXX The client does not inspect the Authentication-Info header
				830	# in a successful response.
				831
				832	# XXX It should be possible to test this implementation against
				833	# a mock server that just generates a static set of challenges.
				834
				835	# XXX qop="auth-int" supports is shaky
				836
				837	def __init__(self, passwd=None):
				838	if passwd is None:
				839	passwd = HTTPPasswordMgr()
				840	self.passwd = passwd
				841	self.add_password = self.passwd.add_password
				842	self.retried = 0
				843	self.nonce_count = 0
				844
				845	def reset_retry_count(self):
				846	self.retried = 0
				847
				848	def http_error_auth_reqed(self, auth_header, host, req, headers):
				849	authreq = headers.get(auth_header, None)
				850	if self.retried > 5:
				851	# Don't fail endlessly - if we failed once, we'll probably
				852	# fail a second time. Hm. Unless the Password Manager is
				853	# prompting for the information. Crap. This isn't great
				854	# but it's better than the current 'repeat until recursion
				855	# depth exceeded' approach <wink>
				856	raise urllib.error.HTTPError(req.get_full_url(), 401,
				857	"digest auth failed",
				858	headers, None)
				859	else:
				860	self.retried += 1
				861	if authreq:
				862	scheme = authreq.split()[0]
				863	if scheme.lower() == 'digest':
				864	return self.retry_http_digest_auth(req, authreq)
				865
				866	def retry_http_digest_auth(self, req, auth):
				867	token, challenge = auth.split(' ', 1)
				868	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				869	auth = self.get_authorization(req, chal)
				870	if auth:
				871	auth_val = 'Digest %s' % auth
				872	if req.headers.get(self.auth_header, None) == auth_val:
				873	return None
				874	req.add_unredirected_header(self.auth_header, auth_val)
				875	resp = self.parent.open(req)
				876	return resp
				877
				878	def get_cnonce(self, nonce):
				879	# The cnonce-value is an opaque
				880	# quoted string value provided by the client and used by both client
				881	# and server to avoid chosen plaintext attacks, to provide mutual
				882	# authentication, and to provide some message integrity protection.
				883	# This isn't a fabulous effort, but it's probably Good Enough.
				884	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				885	b = s.encode("ascii") + randombytes(8)
				886	dig = hashlib.sha1(b).hexdigest()
				887	return dig[:16]
				888
				889	def get_authorization(self, req, chal):
				890	try:
				891	realm = chal['realm']
				892	nonce = chal['nonce']
				893	qop = chal.get('qop')
				894	algorithm = chal.get('algorithm', 'MD5')
				895	# mod_digest doesn't send an opaque, even though it isn't
				896	# supposed to be optional
				897	opaque = chal.get('opaque', None)
				898	except KeyError:
				899	return None
				900
				901	H, KD = self.get_algorithm_impls(algorithm)
				902	if H is None:
				903	return None
				904
				905	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				906	if user is None:
				907	return None
				908
				909	# XXX not implemented yet
				910	if req.has_data():
				911	entdig = self.get_entity_digest(req.get_data(), chal)
				912	else:
				913	entdig = None
				914
				915	A1 = "%s:%s:%s" % (user, realm, pw)
				916	A2 = "%s:%s" % (req.get_method(),
				917	# XXX selector: what about proxies and full urls
				918	req.get_selector())
				919	if qop == 'auth':
				920	self.nonce_count += 1
				921	ncvalue = '%08x' % self.nonce_count
				922	cnonce = self.get_cnonce(nonce)
				923	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				924	respdig = KD(H(A1), noncebit)
				925	elif qop is None:
				926	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				927	else:
				928	# XXX handle auth-int.
				929	raise urllib.error.URLError("qop '%s' is not supported." % qop)
				930
				931	# XXX should the partial digests be encoded too?
				932
				933	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				934	'response="%s"' % (user, realm, nonce, req.get_selector(),
				935	respdig)
				936	if opaque:
				937	base += ', opaque="%s"' % opaque
				938	if entdig:
				939	base += ', digest="%s"' % entdig
				940	base += ', algorithm="%s"' % algorithm
				941	if qop:
				942	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				943	return base
				944
				945	def get_algorithm_impls(self, algorithm):
				946	# lambdas assume digest modules are imported at the top level
				947	if algorithm == 'MD5':
				948	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				949	elif algorithm == 'SHA':
				950	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				951	# XXX MD5-sess
				952	KD = lambda s, d: H("%s:%s" % (s, d))
				953	return H, KD
				954
				955	def get_entity_digest(self, data, chal):
				956	# XXX not implemented yet
				957	return None
				958
				959
				960	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				961	"""An authentication protocol defined by RFC 2069
				962
				963	Digest authentication improves on basic authentication because it
				964	does not transmit passwords in the clear.
				965	"""
				966
				967	auth_header = 'Authorization'
				968	handler_order = 490 # before Basic auth
				969
				970	def http_error_401(self, req, fp, code, msg, headers):
				971	host = urllib.parse.urlparse(req.get_full_url())[1]
				972	retry = self.http_error_auth_reqed('www-authenticate',
				973	host, req, headers)
				974	self.reset_retry_count()
				975	return retry
				976
				977
				978	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				979
				980	auth_header = 'Proxy-Authorization'
				981	handler_order = 490 # before Basic auth
				982
				983	def http_error_407(self, req, fp, code, msg, headers):
				984	host = req.get_host()
				985	retry = self.http_error_auth_reqed('proxy-authenticate',
				986	host, req, headers)
				987	self.reset_retry_count()
				988	return retry
				989
				990	class AbstractHTTPHandler(BaseHandler):
				991
				992	def __init__(self, debuglevel=0):
				993	self._debuglevel = debuglevel
				994
				995	def set_http_debuglevel(self, level):
				996	self._debuglevel = level
				997
				998	def do_request_(self, request):
				999	host = request.get_host()
				1000	if not host:
				1001	raise urllib.error.URLError('no host given')
				1002
				1003	if request.has_data(): # POST
				1004	data = request.get_data()
				1005	if not request.has_header('Content-type'):
				1006	request.add_unredirected_header(
				1007	'Content-type',
				1008	'application/x-www-form-urlencoded')
				1009	if not request.has_header('Content-length'):
				1010	request.add_unredirected_header(
				1011	'Content-length', '%d' % len(data))
				1012
				1013	scheme, sel = urllib.parse.splittype(request.get_selector())
				1014	sel_host, sel_path = urllib.parse.splithost(sel)
				1015	if not request.has_header('Host'):
				1016	request.add_unredirected_header('Host', sel_host or host)
				1017	for name, value in self.parent.addheaders:
				1018	name = name.capitalize()
				1019	if not request.has_header(name):
				1020	request.add_unredirected_header(name, value)
				1021
				1022	return request
				1023
				1024	def do_open(self, http_class, req):
				1025	"""Return an addinfourl object for the request, using http_class.
				1026
				1027	http_class must implement the HTTPConnection API from http.client.
				1028	The addinfourl return value is a file-like object. It also
				1029	has methods and attributes including:
				1030	- info(): return a mimetools.Message object for the headers
				1031	- geturl(): return the original request URL
				1032	- code: HTTP status code
				1033	"""
				1034	host = req.get_host()
				1035	if not host:
				1036	raise urllib.error.URLError('no host given')
				1037
				1038	h = http_class(host, timeout=req.timeout) # will parse host:port
				1039	headers = dict(req.headers)
				1040	headers.update(req.unredirected_hdrs)
				1041
				1042	# TODO(jhylton): Should this be redesigned to handle
				1043	# persistent connections?
				1044
				1045	# We want to make an HTTP/1.1 request, but the addinfourl
				1046	# class isn't prepared to deal with a persistent connection.
				1047	# It will try to read all remaining data from the socket,
				1048	# which will block while the server waits for the next request.
				1049	# So make sure the connection gets closed after the (only)
				1050	# request.
				1051	headers["Connection"] = "close"
				1052	headers = dict(
				1053	(name.title(), val) for name, val in headers.items())
				1054	try:
				1055	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1056	r = h.getresponse()
				1057	except socket.error as err: # XXX what error?
				1058	raise urllib.error.URLError(err)
				1059
				1060	resp = urllib.response.addinfourl(r.fp, r.msg, req.get_full_url())
				1061	resp.code = r.status
				1062	resp.msg = r.reason
				1063	return resp
				1064
				1065
				1066	class HTTPHandler(AbstractHTTPHandler):
				1067
				1068	def http_open(self, req):
				1069	return self.do_open(http.client.HTTPConnection, req)
				1070
				1071	http_request = AbstractHTTPHandler.do_request_
				1072
				1073	if hasattr(http.client, 'HTTPSConnection'):
				1074	class HTTPSHandler(AbstractHTTPHandler):
				1075
				1076	def https_open(self, req):
				1077	return self.do_open(http.client.HTTPSConnection, req)
				1078
				1079	https_request = AbstractHTTPHandler.do_request_
				1080
				1081	class HTTPCookieProcessor(BaseHandler):
				1082	def __init__(self, cookiejar=None):
				1083	import http.cookiejar
				1084	if cookiejar is None:
				1085	cookiejar = http.cookiejar.CookieJar()
				1086	self.cookiejar = cookiejar
				1087
				1088	def http_request(self, request):
				1089	self.cookiejar.add_cookie_header(request)
				1090	return request
				1091
				1092	def http_response(self, request, response):
				1093	self.cookiejar.extract_cookies(response, request)
				1094	return response
				1095
				1096	https_request = http_request
				1097	https_response = http_response
				1098
				1099	class UnknownHandler(BaseHandler):
				1100	def unknown_open(self, req):
				1101	type = req.get_type()
				1102	raise urllib.error.URLError('unknown url type: %s' % type)
				1103
				1104	def parse_keqv_list(l):
				1105	"""Parse list of key=value strings where keys are not duplicated."""
				1106	parsed = {}
				1107	for elt in l:
				1108	k, v = elt.split('=', 1)
				1109	if v[0] == '"' and v[-1] == '"':
				1110	v = v[1:-1]
				1111	parsed[k] = v
				1112	return parsed
				1113
				1114	def parse_http_list(s):
				1115	"""Parse lists as described by RFC 2068 Section 2.
				1116
				1117	In particular, parse comma-separated lists where the elements of
				1118	the list may include quoted-strings. A quoted-string could
				1119	contain a comma. A non-quoted string could have quotes in the
				1120	middle. Neither commas nor quotes count if they are escaped.
				1121	Only double-quotes count, not single-quotes.
				1122	"""
				1123	res = []
				1124	part = ''
				1125
				1126	escape = quote = False
				1127	for cur in s:
				1128	if escape:
				1129	part += cur
				1130	escape = False
				1131	continue
				1132	if quote:
				1133	if cur == '\\':
				1134	escape = True
				1135	continue
				1136	elif cur == '"':
				1137	quote = False
				1138	part += cur
				1139	continue
				1140
				1141	if cur == ',':
				1142	res.append(part)
				1143	part = ''
				1144	continue
				1145
				1146	if cur == '"':
				1147	quote = True
				1148
				1149	part += cur
				1150
				1151	# append last part
				1152	if part:
				1153	res.append(part)
				1154
				1155	return [part.strip() for part in res]
				1156
				1157	class FileHandler(BaseHandler):
				1158	# Use local file or FTP depending on form of URL
				1159	def file_open(self, req):
				1160	url = req.get_selector()
				1161	if url[:2] == '//' and url[2:3] != '/':
				1162	req.type = 'ftp'
				1163	return self.parent.open(req)
				1164	else:
				1165	return self.open_local_file(req)
				1166
				1167	# names for the localhost
				1168	names = None
				1169	def get_names(self):
				1170	if FileHandler.names is None:
				1171	try:
				1172	FileHandler.names = (socket.gethostbyname('localhost'),
				1173	socket.gethostbyname(socket.gethostname()))
				1174	except socket.gaierror:
				1175	FileHandler.names = (socket.gethostbyname('localhost'),)
				1176	return FileHandler.names
				1177
				1178	# not entirely sure what the rules are here
				1179	def open_local_file(self, req):
				1180	import email.utils
				1181	import mimetypes
				1182	host = req.get_host()
				1183	file = req.get_selector()
				1184	localfile = url2pathname(file)
				1185	try:
				1186	stats = os.stat(localfile)
				1187	size = stats.st_size
				1188	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1189	mtype = mimetypes.guess_type(file)[0]
				1190	headers = email.message_from_string(
				1191	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1192	(mtype or 'text/plain', size, modified))
				1193	if host:
				1194	host, port = urllib.parse.splitport(host)
				1195	if not host or \
				1196	(not port and _safe_gethostbyname(host) in self.get_names()):
				1197	return urllib.response.addinfourl(open(localfile, 'rb'),
				1198	headers, 'file:'+file)
				1199	except OSError as msg:
				1200	# urllib2 users shouldn't expect OSErrors coming from urlopen()
				1201	raise urllib.error.URLError(msg)
				1202	raise urllib.error.URLError('file not on local host')
				1203
				1204	def _safe_gethostbyname(host):
				1205	try:
				1206	return socket.gethostbyname(host)
				1207	except socket.gaierror:
				1208	return None
				1209
				1210	class FTPHandler(BaseHandler):
				1211	def ftp_open(self, req):
				1212	import ftplib
				1213	import mimetypes
				1214	host = req.get_host()
				1215	if not host:
				1216	raise urllib.error.URLError('ftp error: no host given')
				1217	host, port = urllib.parse.splitport(host)
				1218	if port is None:
				1219	port = ftplib.FTP_PORT
				1220	else:
				1221	port = int(port)
				1222
				1223	# username/password handling
				1224	user, host = urllib.parse.splituser(host)
				1225	if user:
				1226	user, passwd = urllib.parse.splitpasswd(user)
				1227	else:
				1228	passwd = None
				1229	host = urllib.parse.unquote(host)
				1230	user = urllib.parse.unquote(user or '')
				1231	passwd = urllib.parse.unquote(passwd or '')
				1232
				1233	try:
				1234	host = socket.gethostbyname(host)
				1235	except socket.error as msg:
				1236	raise urllib.error.URLError(msg)
				1237	path, attrs = urllib.parse.splitattr(req.get_selector())
				1238	dirs = path.split('/')
				1239	dirs = list(map(urllib.parse.unquote, dirs))
				1240	dirs, file = dirs[:-1], dirs[-1]
				1241	if dirs and not dirs[0]:
				1242	dirs = dirs[1:]
				1243	try:
				1244	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1245	type = file and 'I' or 'D'
				1246	for attr in attrs:
				1247	attr, value = urllib.parse.splitvalue(attr)
				1248	if attr.lower() == 'type' and \
				1249	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1250	type = value.upper()
				1251	fp, retrlen = fw.retrfile(file, type)
				1252	headers = ""
				1253	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1254	if mtype:
				1255	headers += "Content-type: %s\n" % mtype
				1256	if retrlen is not None and retrlen >= 0:
				1257	headers += "Content-length: %d\n" % retrlen
				1258	headers = email.message_from_string(headers)
				1259	return urllib.response.addinfourl(fp, headers, req.get_full_url())
				1260	except ftplib.all_errors as msg:
				1261	exc = urllib.error.URLError('ftp error: %s' % msg)
				1262	raise exc.with_traceback(sys.exc_info()[2])
				1263
				1264	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1265	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1266	return fw
				1267
				1268	class CacheFTPHandler(FTPHandler):
				1269	# XXX would be nice to have pluggable cache strategies
				1270	# XXX this stuff is definitely not thread safe
				1271	def __init__(self):
				1272	self.cache = {}
				1273	self.timeout = {}
				1274	self.soonest = 0
				1275	self.delay = 60
				1276	self.max_conns = 16
				1277
				1278	def setTimeout(self, t):
				1279	self.delay = t
				1280
				1281	def setMaxConns(self, m):
				1282	self.max_conns = m
				1283
				1284	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1285	key = user, host, port, '/'.join(dirs), timeout
				1286	if key in self.cache:
				1287	self.timeout[key] = time.time() + self.delay
				1288	else:
				1289	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1290	dirs, timeout)
				1291	self.timeout[key] = time.time() + self.delay
				1292	self.check_cache()
				1293	return self.cache[key]
				1294
				1295	def check_cache(self):
				1296	# first check for old ones
				1297	t = time.time()
				1298	if self.soonest <= t:
				1299	for k, v in list(self.timeout.items()):
				1300	if v < t:
				1301	self.cache[k].close()
				1302	del self.cache[k]
				1303	del self.timeout[k]
				1304	self.soonest = min(list(self.timeout.values()))
				1305
				1306	# then check the size
				1307	if len(self.cache) == self.max_conns:
				1308	for k, v in list(self.timeout.items()):
				1309	if v == self.soonest:
				1310	del self.cache[k]
				1311	del self.timeout[k]
				1312	break
				1313	self.soonest = min(list(self.timeout.values()))
				1314
				1315	# Code move from the old urllib module
				1316
				1317	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1318
				1319	# Helper for non-unix systems
				1320	if os.name == 'mac':
				1321	from macurl2path import url2pathname, pathname2url
				1322	elif os.name == 'nt':
				1323	from nturl2path import url2pathname, pathname2url
				1324	else:
				1325	def url2pathname(pathname):
				1326	"""OS-specific conversion from a relative URL of the 'file' scheme
				1327	to a file system path; not recommended for general use."""
				1328	return urllib.parse.unquote(pathname)
				1329
				1330	def pathname2url(pathname):
				1331	"""OS-specific conversion from a file system path to a relative URL
				1332	of the 'file' scheme; not recommended for general use."""
				1333	return urllib.parse.quote(pathname)
				1334
				1335	# This really consists of two pieces:
				1336	# (1) a class which handles opening of all sorts of URLs
				1337	# (plus assorted utilities etc.)
				1338	# (2) a set of functions for parsing URLs
				1339	# XXX Should these be separated out into different modules?
				1340
				1341
				1342	ftpcache = {}
				1343	class URLopener:
				1344	"""Class to open URLs.
				1345	This is a class rather than just a subroutine because we may need
				1346	more than one set of global protocol-specific options.
				1347	Note -- this is a base class for those who don't want the
				1348	automatic handling of errors type 302 (relocated) and 401
				1349	(authorization needed)."""
				1350
				1351	__tempfiles = None
				1352
				1353	version = "Python-urllib/%s" % __version__
				1354
				1355	# Constructor
				1356	def __init__(self, proxies=None, **x509):
				1357	if proxies is None:
				1358	proxies = getproxies()
				1359	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1360	self.proxies = proxies
				1361	self.key_file = x509.get('key_file')
				1362	self.cert_file = x509.get('cert_file')
				1363	self.addheaders = [('User-Agent', self.version)]
				1364	self.__tempfiles = []
				1365	self.__unlink = os.unlink # See cleanup()
				1366	self.tempcache = None
				1367	# Undocumented feature: if you assign {} to tempcache,
				1368	# it is used to cache files retrieved with
				1369	# self.retrieve(). This is not enabled by default
				1370	# since it does not work for changing documents (and I
				1371	# haven't got the logic to check expiration headers
				1372	# yet).
				1373	self.ftpcache = ftpcache
				1374	# Undocumented feature: you can use a different
				1375	# ftp cache by assigning to the .ftpcache member;
				1376	# in case you want logically independent URL openers
				1377	# XXX This is not threadsafe. Bah.
				1378
				1379	def __del__(self):
				1380	self.close()
				1381
				1382	def close(self):
				1383	self.cleanup()
				1384
				1385	def cleanup(self):
				1386	# This code sometimes runs when the rest of this module
				1387	# has already been deleted, so it can't use any globals
				1388	# or import anything.
				1389	if self.__tempfiles:
				1390	for file in self.__tempfiles:
				1391	try:
				1392	self.__unlink(file)
				1393	except OSError:
				1394	pass
				1395	del self.__tempfiles[:]
				1396	if self.tempcache:
				1397	self.tempcache.clear()
				1398
				1399	def addheader(self, *args):
				1400	"""Add a header to be used by the HTTP interface only
				1401	e.g. u.addheader('Accept', 'sound/basic')"""
				1402	self.addheaders.append(args)
				1403
				1404	# External interface
				1405	def open(self, fullurl, data=None):
				1406	"""Use URLopener().open(file) instead of open(file, 'r')."""
				1407	fullurl = urllib.parse.unwrap(urllib.parse.toBytes(fullurl))
				1408	if self.tempcache and fullurl in self.tempcache:
				1409	filename, headers = self.tempcache[fullurl]
				1410	fp = open(filename, 'rb')
				1411	return urllib.response.addinfourl(fp, headers, fullurl)
				1412	urltype, url = urllib.parse.splittype(fullurl)
				1413	if not urltype:
				1414	urltype = 'file'
				1415	if urltype in self.proxies:
				1416	proxy = self.proxies[urltype]
				1417	urltype, proxyhost = urllib.parse.splittype(proxy)
				1418	host, selector = urllib.parse.splithost(proxyhost)
				1419	url = (host, fullurl) # Signal special case to open_*()
				1420	else:
				1421	proxy = None
				1422	name = 'open_' + urltype
				1423	self.type = urltype
				1424	name = name.replace('-', '_')
				1425	if not hasattr(self, name):
				1426	if proxy:
				1427	return self.open_unknown_proxy(proxy, fullurl, data)
				1428	else:
				1429	return self.open_unknown(fullurl, data)
				1430	try:
				1431	if data is None:
				1432	return getattr(self, name)(url)
				1433	else:
				1434	return getattr(self, name)(url, data)
				1435	except socket.error as msg:
				1436	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1437
				1438	def open_unknown(self, fullurl, data=None):
				1439	"""Overridable interface to open unknown URL type."""
				1440	type, url = urllib.parse.splittype(fullurl)
				1441	raise IOError('url error', 'unknown url type', type)
				1442
				1443	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1444	"""Overridable interface to open unknown URL type."""
				1445	type, url = urllib.parse.splittype(fullurl)
				1446	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1447
				1448	# External interface
				1449	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1450	"""retrieve(url) returns (filename, headers) for a local object
				1451	or (tempfilename, headers) for a remote object."""
				1452	url = urllib.parse.unwrap(urllib.parse.toBytes(url))
				1453	if self.tempcache and url in self.tempcache:
				1454	return self.tempcache[url]
				1455	type, url1 = urllib.parse.splittype(url)
				1456	if filename is None and (not type or type == 'file'):
				1457	try:
				1458	fp = self.open_local_file(url1)
				1459	hdrs = fp.info()
				1460	del fp
				1461	return url2pathname(urllib.parse.splithost(url1)[1]), hdrs
				1462	except IOError as msg:
				1463	pass
				1464	fp = self.open(url, data)
				1465	headers = fp.info()
				1466	if filename:
				1467	tfp = open(filename, 'wb')
				1468	else:
				1469	import tempfile
				1470	garbage, path = urllib.parse.splittype(url)
				1471	garbage, path = urllib.parse.splithost(path or "")
				1472	path, garbage = urllib.parse.splitquery(path or "")
				1473	path, garbage = urllib.parse.splitattr(path or "")
				1474	suffix = os.path.splitext(path)[1]
				1475	(fd, filename) = tempfile.mkstemp(suffix)
				1476	self.__tempfiles.append(filename)
				1477	tfp = os.fdopen(fd, 'wb')
				1478	result = filename, headers
				1479	if self.tempcache is not None:
				1480	self.tempcache[url] = result
				1481	bs = 1024*8
				1482	size = -1
				1483	read = 0
				1484	blocknum = 0
				1485	if reporthook:
				1486	if "content-length" in headers:
				1487	size = int(headers["Content-Length"])
				1488	reporthook(blocknum, bs, size)
				1489	while 1:
				1490	block = fp.read(bs)
				1491	if not block:
				1492	break
				1493	read += len(block)
				1494	tfp.write(block)
				1495	blocknum += 1
				1496	if reporthook:
				1497	reporthook(blocknum, bs, size)
				1498	fp.close()
				1499	tfp.close()
				1500	del fp
				1501	del tfp
				1502
				1503	# raise exception if actual size does not match content-length header
				1504	if size >= 0 and read < size:
				1505	raise urllib.error.ContentTooShortError(
				1506	"retrieval incomplete: got only %i out of %i bytes"
				1507	% (read, size), result)
				1508
				1509	return result
				1510
				1511	# Each method named open_<type> knows how to open that type of URL
				1512
				1513	def _open_generic_http(self, connection_factory, url, data):
				1514	"""Make an HTTP connection using connection_class.
				1515
				1516	This is an internal method that should be called from
				1517	open_http() or open_https().
				1518
				1519	Arguments:
				1520	- connection_factory should take a host name and return an
				1521	HTTPConnection instance.
				1522	- url is the url to retrieval or a host, relative-path pair.
				1523	- data is payload for a POST request or None.
				1524	"""
				1525
				1526	user_passwd = None
				1527	proxy_passwd= None
				1528	if isinstance(url, str):
				1529	host, selector = urllib.parse.splithost(url)
				1530	if host:
				1531	user_passwd, host = urllib.parse.splituser(host)
				1532	host = urllib.parse.unquote(host)
				1533	realhost = host
				1534	else:
				1535	host, selector = url
				1536	# check whether the proxy contains authorization information
				1537	proxy_passwd, host = urllib.parse.splituser(host)
				1538	# now we proceed with the url we want to obtain
				1539	urltype, rest = urllib.parse.splittype(selector)
				1540	url = rest
				1541	user_passwd = None
				1542	if urltype.lower() != 'http':
				1543	realhost = None
				1544	else:
				1545	realhost, rest = urllib.parse.splithost(rest)
				1546	if realhost:
				1547	user_passwd, realhost = urllib.parse.splituser(realhost)
				1548	if user_passwd:
				1549	selector = "%s://%s%s" % (urltype, realhost, rest)
				1550	if proxy_bypass(realhost):
				1551	host = realhost
				1552
				1553	#print "proxy via http:", host, selector
				1554	if not host: raise IOError('http error', 'no host given')
				1555
				1556	if proxy_passwd:
				1557	import base64
				1558	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1559	else:
				1560	proxy_auth = None
				1561
				1562	if user_passwd:
				1563	import base64
				1564	auth = base64.b64encode(user_passwd).strip()
				1565	else:
				1566	auth = None
				1567	http_conn = connection_factory(host)
				1568	# XXX We should fix urllib so that it works with HTTP/1.1.
				1569	http_conn._http_vsn = 10
				1570	http_conn._http_vsn_str = "HTTP/1.0"
				1571
				1572	headers = {}
				1573	if proxy_auth:
				1574	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1575	if auth:
				1576	headers["Authorization"] = "Basic %s" % auth
				1577	if realhost:
				1578	headers["Host"] = realhost
				1579	for header, value in self.addheaders:
				1580	headers[header] = value
				1581
				1582	if data is not None:
				1583	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1584	http_conn.request("POST", selector, data, headers)
				1585	else:
				1586	http_conn.request("GET", selector, headers=headers)
				1587
				1588	try:
				1589	response = http_conn.getresponse()
				1590	except http.client.BadStatusLine:
				1591	# something went wrong with the HTTP status line
				1592	raise urllib.error.URLError("http protocol error: bad status line")
				1593
				1594	# According to RFC 2616, "2xx" code indicates that the client's
				1595	# request was successfully received, understood, and accepted.
				1596	if 200 <= response.status < 300:
				1597	return urllib.response.addinfourl(response.fp, response.msg,
				1598	"http:" + url,
				1599	response.status)
				1600	else:
				1601	return self.http_error(
				1602	url, response.fp,
				1603	response.status, response.reason, response.msg, data)
				1604
				1605	def open_http(self, url, data=None):
				1606	"""Use HTTP protocol."""
				1607	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1608
				1609	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1610	"""Handle http errors.
				1611
				1612	Derived class can override this, or provide specific handlers
				1613	named http_error_DDD where DDD is the 3-digit error code."""
				1614	# First check if there's a specific handler for this error
				1615	name = 'http_error_%d' % errcode
				1616	if hasattr(self, name):
				1617	method = getattr(self, name)
				1618	if data is None:
				1619	result = method(url, fp, errcode, errmsg, headers)
				1620	else:
				1621	result = method(url, fp, errcode, errmsg, headers, data)
				1622	if result: return result
				1623	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1624
				1625	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1626	"""Default error handler: close the connection and raise IOError."""
				1627	void = fp.read()
				1628	fp.close()
				1629	raise urllib.error.HTTPError(url, errcode, errmsg, headers, None)
				1630
				1631	if _have_ssl:
				1632	def _https_connection(self, host):
				1633	return http.client.HTTPSConnection(host,
				1634	key_file=self.key_file,
				1635	cert_file=self.cert_file)
				1636
				1637	def open_https(self, url, data=None):
				1638	"""Use HTTPS protocol."""
				1639	return self._open_generic_http(self._https_connection, url, data)
				1640
				1641	def open_file(self, url):
				1642	"""Use local file or FTP depending on form of URL."""
				1643	if not isinstance(url, str):
				1644	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1645	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1646	return self.open_ftp(url)
				1647	else:
				1648	return self.open_local_file(url)
				1649
				1650	def open_local_file(self, url):
				1651	"""Use local file."""
				1652	import mimetypes, email.utils
				1653	from io import StringIO
				1654	host, file = urllib.parse.splithost(url)
				1655	localname = url2pathname(file)
				1656	try:
				1657	stats = os.stat(localname)
				1658	except OSError as e:
				1659	raise URLError(e.errno, e.strerror, e.filename)
				1660	size = stats.st_size
				1661	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1662	mtype = mimetypes.guess_type(url)[0]
				1663	headers = email.message_from_string(
				1664	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1665	(mtype or 'text/plain', size, modified))
				1666	if not host:
				1667	urlfile = file
				1668	if file[:1] == '/':
				1669	urlfile = 'file://' + file
				1670	return urllib.response.addinfourl(open(localname, 'rb'),
				1671	headers, urlfile)
				1672	host, port = urllib.parse.splitport(host)
				1673	if (not port
				1674	and socket.gethostbyname(host) in (localhost(), thishost())):
				1675	urlfile = file
				1676	if file[:1] == '/':
				1677	urlfile = 'file://' + file
				1678	return urllib.response.addinfourl(open(localname, 'rb'),
				1679	headers, urlfile)
				1680	raise URLError('local file error', 'not on local host')
				1681
				1682	def open_ftp(self, url):
				1683	"""Use FTP protocol."""
				1684	if not isinstance(url, str):
				1685	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1686	import mimetypes
				1687	from io import StringIO
				1688	host, path = urllib.parse.splithost(url)
				1689	if not host: raise URLError('ftp error', 'no host given')
				1690	host, port = urllib.parse.splitport(host)
				1691	user, host = urllib.parse.splituser(host)
				1692	if user: user, passwd = urllib.parse.splitpasswd(user)
				1693	else: passwd = None
				1694	host = urllib.parse.unquote(host)
				1695	user = urllib.parse.unquote(user or '')
				1696	passwd = urllib.parse.unquote(passwd or '')
				1697	host = socket.gethostbyname(host)
				1698	if not port:
				1699	import ftplib
				1700	port = ftplib.FTP_PORT
				1701	else:
				1702	port = int(port)
				1703	path, attrs = urllib.parse.splitattr(path)
				1704	path = urllib.parse.unquote(path)
				1705	dirs = path.split('/')
				1706	dirs, file = dirs[:-1], dirs[-1]
				1707	if dirs and not dirs[0]: dirs = dirs[1:]
				1708	if dirs and not dirs[0]: dirs[0] = '/'
				1709	key = user, host, port, '/'.join(dirs)
				1710	# XXX thread unsafe!
				1711	if len(self.ftpcache) > MAXFTPCACHE:
				1712	# Prune the cache, rather arbitrarily
				1713	for k in self.ftpcache.keys():
				1714	if k != key:
				1715	v = self.ftpcache[k]
				1716	del self.ftpcache[k]
				1717	v.close()
				1718	try:
				1719	if not key in self.ftpcache:
				1720	self.ftpcache[key] = \
				1721	ftpwrapper(user, passwd, host, port, dirs)
				1722	if not file: type = 'D'
				1723	else: type = 'I'
				1724	for attr in attrs:
				1725	attr, value = urllib.parse.splitvalue(attr)
				1726	if attr.lower() == 'type' and \
				1727	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1728	type = value.upper()
				1729	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1730	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1731	headers = ""
				1732	if mtype:
				1733	headers += "Content-Type: %s\n" % mtype
				1734	if retrlen is not None and retrlen >= 0:
				1735	headers += "Content-Length: %d\n" % retrlen
				1736	headers = email.message_from_string(headers)
				1737	return urllib.response.addinfourl(fp, headers, "ftp:" + url)
				1738	except ftperrors() as msg:
				1739	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1740
				1741	def open_data(self, url, data=None):
				1742	"""Use "data" URL."""
				1743	if not isinstance(url, str):
				1744	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1745	# ignore POSTed data
				1746	#
				1747	# syntax of data URLs:
				1748	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1749	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1750	# data := *urlchar
				1751	# parameter := attribute "=" value
				1752	try:
				1753	[type, data] = url.split(',', 1)
				1754	except ValueError:
				1755	raise IOError('data error', 'bad data URL')
				1756	if not type:
				1757	type = 'text/plain;charset=US-ASCII'
				1758	semi = type.rfind(';')
				1759	if semi >= 0 and '=' not in type[semi:]:
				1760	encoding = type[semi+1:]
				1761	type = type[:semi]
				1762	else:
				1763	encoding = ''
				1764	msg = []
				1765	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1766	time.gmtime(time.time())))
				1767	msg.append('Content-type: %s' % type)
				1768	if encoding == 'base64':
				1769	import base64
				1770	data = base64.decodestring(data)
				1771	else:
				1772	data = urllib.parse.unquote(data)
				1773	msg.append('Content-Length: %d' % len(data))
				1774	msg.append('')
				1775	msg.append(data)
				1776	msg = '\n'.join(msg)
				1777	headers = mimetools.message_from_string(msg)
				1778	#f.fileno = None # needed for addinfourl
				1779	return urllib.response.addinfourl(f, headers, url)
				1780
				1781
				1782	class FancyURLopener(URLopener):
				1783	"""Derived class with handlers for errors we can handle (perhaps)."""
				1784
				1785	def __init__(self, args, *kwargs):
				1786	URLopener.__init__(self, args, *kwargs)
				1787	self.auth_cache = {}
				1788	self.tries = 0
				1789	self.maxtries = 10
				1790
				1791	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1792	"""Default error handling -- don't raise an exception."""
				1793	return urllib.response.addinfourl(fp, headers, "http:" + url, errcode)
				1794
				1795	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1796	"""Error 302 -- relocated (temporarily)."""
				1797	self.tries += 1
				1798	if self.maxtries and self.tries >= self.maxtries:
				1799	if hasattr(self, "http_error_500"):
				1800	meth = self.http_error_500
				1801	else:
				1802	meth = self.http_error_default
				1803	self.tries = 0
				1804	return meth(url, fp, 500,
				1805	"Internal Server Error: Redirect Recursion", headers)
				1806	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1807	data)
				1808	self.tries = 0
				1809	return result
				1810
				1811	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1812	if 'location' in headers:
				1813	newurl = headers['location']
				1814	elif 'uri' in headers:
				1815	newurl = headers['uri']
				1816	else:
				1817	return
				1818	void = fp.read()
				1819	fp.close()
				1820	# In case the server sent a relative URL, join with original:
				1821	newurl = basejoin(self.type + ":" + url, newurl)
				1822	return self.open(newurl)
				1823
				1824	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1825	"""Error 301 -- also relocated (permanently)."""
				1826	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1827
				1828	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1829	"""Error 303 -- also relocated (essentially identical to 302)."""
				1830	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1831
				1832	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1833	"""Error 307 -- relocated, but turn POST into error."""
				1834	if data is None:
				1835	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1836	else:
				1837	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1838
				1839	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1840	"""Error 401 -- authentication required.
				1841	This function supports Basic authentication only."""
				1842	if not 'www-authenticate' in headers:
				1843	URLopener.http_error_default(self, url, fp,
				1844	errcode, errmsg, headers)
				1845	stuff = headers['www-authenticate']
				1846	import re
				1847	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1848	if not match:
				1849	URLopener.http_error_default(self, url, fp,
				1850	errcode, errmsg, headers)
				1851	scheme, realm = match.groups()
				1852	if scheme.lower() != 'basic':
				1853	URLopener.http_error_default(self, url, fp,
				1854	errcode, errmsg, headers)
				1855	name = 'retry_' + self.type + '_basic_auth'
				1856	if data is None:
				1857	return getattr(self,name)(url, realm)
				1858	else:
				1859	return getattr(self,name)(url, realm, data)
				1860
				1861	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1862	"""Error 407 -- proxy authentication required.
				1863	This function supports Basic authentication only."""
				1864	if not 'proxy-authenticate' in headers:
				1865	URLopener.http_error_default(self, url, fp,
				1866	errcode, errmsg, headers)
				1867	stuff = headers['proxy-authenticate']
				1868	import re
				1869	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1870	if not match:
				1871	URLopener.http_error_default(self, url, fp,
				1872	errcode, errmsg, headers)
				1873	scheme, realm = match.groups()
				1874	if scheme.lower() != 'basic':
				1875	URLopener.http_error_default(self, url, fp,
				1876	errcode, errmsg, headers)
				1877	name = 'retry_proxy_' + self.type + '_basic_auth'
				1878	if data is None:
				1879	return getattr(self,name)(url, realm)
				1880	else:
				1881	return getattr(self,name)(url, realm, data)
				1882
				1883	def retry_proxy_http_basic_auth(self, url, realm, data=None):
				1884	host, selector = urllib.parse.splithost(url)
				1885	newurl = 'http://' + host + selector
				1886	proxy = self.proxies['http']
				1887	urltype, proxyhost = urllib.parse.splittype(proxy)
				1888	proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
				1889	i = proxyhost.find('@') + 1
				1890	proxyhost = proxyhost[i:]
				1891	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1892	if not (user or passwd): return None
				1893	proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1894	quote(passwd, safe=''), proxyhost)
				1895	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1896	if data is None:
				1897	return self.open(newurl)
				1898	else:
				1899	return self.open(newurl, data)
				1900
				1901	def retry_proxy_https_basic_auth(self, url, realm, data=None):
				1902	host, selector = urllib.parse.splithost(url)
				1903	newurl = 'https://' + host + selector
				1904	proxy = self.proxies['https']
				1905	urltype, proxyhost = urllib.parse.splittype(proxy)
				1906	proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
				1907	i = proxyhost.find('@') + 1
				1908	proxyhost = proxyhost[i:]
				1909	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1910	if not (user or passwd): return None
				1911	proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1912	quote(passwd, safe=''), proxyhost)
				1913	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1914	if data is None:
				1915	return self.open(newurl)
				1916	else:
				1917	return self.open(newurl, data)
				1918
				1919	def retry_http_basic_auth(self, url, realm, data=None):
				1920	host, selector = urllib.parse.splithost(url)
				1921	i = host.find('@') + 1
				1922	host = host[i:]
				1923	user, passwd = self.get_user_passwd(host, realm, i)
				1924	if not (user or passwd): return None
				1925	host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1926	quote(passwd, safe=''), host)
				1927	newurl = 'http://' + host + selector
				1928	if data is None:
				1929	return self.open(newurl)
				1930	else:
				1931	return self.open(newurl, data)
				1932
				1933	def retry_https_basic_auth(self, url, realm, data=None):
				1934	host, selector = urllib.parse.splithost(url)
				1935	i = host.find('@') + 1
				1936	host = host[i:]
				1937	user, passwd = self.get_user_passwd(host, realm, i)
				1938	if not (user or passwd): return None
				1939	host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
				1940	quote(passwd, safe=''), host)
				1941	newurl = 'https://' + host + selector
				1942	if data is None:
				1943	return self.open(newurl)
				1944	else:
				1945	return self.open(newurl, data)
				1946
				1947	def get_user_passwd(self, host, realm, clear_cache = 0):
				1948	key = realm + '@' + host.lower()
				1949	if key in self.auth_cache:
				1950	if clear_cache:
				1951	del self.auth_cache[key]
				1952	else:
				1953	return self.auth_cache[key]
				1954	user, passwd = self.prompt_user_passwd(host, realm)
				1955	if user or passwd: self.auth_cache[key] = (user, passwd)
				1956	return user, passwd
				1957
				1958	def prompt_user_passwd(self, host, realm):
				1959	"""Override this in a GUI environment!"""
				1960	import getpass
				1961	try:
				1962	user = input("Enter username for %s at %s: " % (realm, host))
				1963	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1964	(user, realm, host))
				1965	return user, passwd
				1966	except KeyboardInterrupt:
				1967	print()
				1968	return None, None
				1969
				1970
				1971	# Utility functions
				1972
				1973	_localhost = None
				1974	def localhost():
				1975	"""Return the IP address of the magic hostname 'localhost'."""
				1976	global _localhost
				1977	if _localhost is None:
				1978	_localhost = socket.gethostbyname('localhost')
				1979	return _localhost
				1980
				1981	_thishost = None
				1982	def thishost():
				1983	"""Return the IP address of the current host."""
				1984	global _thishost
				1985	if _thishost is None:
				1986	_thishost = socket.gethostbyname(socket.gethostname())
				1987	return _thishost
				1988
				1989	_ftperrors = None
				1990	def ftperrors():
				1991	"""Return the set of errors raised by the FTP class."""
				1992	global _ftperrors
				1993	if _ftperrors is None:
				1994	import ftplib
				1995	_ftperrors = ftplib.all_errors
				1996	return _ftperrors
				1997
				1998	_noheaders = None
				1999	def noheaders():
				2000	"""Return an empty mimetools.Message object."""
				2001	global _noheaders
				2002	if _noheaders is None:
				2003	_noheaders = mimetools.message_from_string("")
				2004	return _noheaders
				2005
				2006
				2007	# Utility classes
				2008
				2009	class ftpwrapper:
				2010	"""Class used by open_ftp() for cache of open FTP connections."""
				2011
				2012	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2013	self.user = user
				2014	self.passwd = passwd
				2015	self.host = host
				2016	self.port = port
				2017	self.dirs = dirs
				2018	self.timeout = timeout
				2019	self.init()
				2020
				2021	def init(self):
				2022	import ftplib
				2023	self.busy = 0
				2024	self.ftp = ftplib.FTP()
				2025	self.ftp.connect(self.host, self.port, self.timeout)
				2026	self.ftp.login(self.user, self.passwd)
				2027	for dir in self.dirs:
				2028	self.ftp.cwd(dir)
				2029
				2030	def retrfile(self, file, type):
				2031	import ftplib
				2032	self.endtransfer()
				2033	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2034	else: cmd = 'TYPE ' + type; isdir = 0
				2035	try:
				2036	self.ftp.voidcmd(cmd)
				2037	except ftplib.all_errors:
				2038	self.init()
				2039	self.ftp.voidcmd(cmd)
				2040	conn = None
				2041	if file and not isdir:
				2042	# Try to retrieve as a file
				2043	try:
				2044	cmd = 'RETR ' + file
				2045	conn = self.ftp.ntransfercmd(cmd)
				2046	except ftplib.error_perm as reason:
				2047	if str(reason)[:3] != '550':
				2048	raise urllib.error.URLError('ftp error', reason).with_traceback(sys.exc_info()[2])
				2049	if not conn:
				2050	# Set transfer mode to ASCII!
				2051	self.ftp.voidcmd('TYPE A')
				2052	# Try a directory listing. Verify that directory exists.
				2053	if file:
				2054	pwd = self.ftp.pwd()
				2055	try:
				2056	try:
				2057	self.ftp.cwd(file)
				2058	except ftplib.error_perm as reason:
				2059	raise urllib.error.URLError('ftp error', reason) from reason
				2060	finally:
				2061	self.ftp.cwd(pwd)
				2062	cmd = 'LIST ' + file
				2063	else:
				2064	cmd = 'LIST'
				2065	conn = self.ftp.ntransfercmd(cmd)
				2066	self.busy = 1
				2067	# Pass back both a suitably decorated object and a retrieval length
				2068	return (urllib.response.addclosehook(conn[0].makefile('rb'),
				2069	self.endtransfer), conn[1])
				2070	def endtransfer(self):
				2071	if not self.busy:
				2072	return
				2073	self.busy = 0
				2074	try:
				2075	self.ftp.voidresp()
				2076	except ftperrors():
				2077	pass
				2078
				2079	def close(self):
				2080	self.endtransfer()
				2081	try:
				2082	self.ftp.close()
				2083	except ftperrors():
				2084	pass
				2085
				2086	# Proxy handling
				2087	def getproxies_environment():
				2088	"""Return a dictionary of scheme -> proxy server URL mappings.
				2089
				2090	Scan the environment for variables named <scheme>_proxy;
				2091	this seems to be the standard convention. If you need a
				2092	different way, you can pass a proxies dictionary to the
				2093	[Fancy]URLopener constructor.
				2094
				2095	"""
				2096	proxies = {}
				2097	for name, value in os.environ.items():
				2098	name = name.lower()
				2099	if name == 'no_proxy':
				2100	# handled in proxy_bypass_environment
				2101	continue
				2102	if value and name[-6:] == '_proxy':
				2103	proxies[name[:-6]] = value
				2104	return proxies
				2105
				2106	def proxy_bypass_environment(host):
				2107	"""Test if proxies should not be used for a particular host.
				2108
				2109	Checks the environment for a variable named no_proxy, which should
				2110	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2111	"""
				2112	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2113	# '*' is special case for always bypass
				2114	if no_proxy == '*':
				2115	return 1
				2116	# strip port off host
				2117	hostonly, port = urllib.parse.splitport(host)
				2118	# check if the host ends with any of the DNS suffixes
				2119	for name in no_proxy.split(','):
				2120	if name and (hostonly.endswith(name) or host.endswith(name)):
				2121	return 1
				2122	# otherwise, don't bypass
				2123	return 0
				2124
				2125
				2126	if sys.platform == 'darwin':
				2127	def getproxies_internetconfig():
				2128	"""Return a dictionary of scheme -> proxy server URL mappings.
				2129
				2130	By convention the mac uses Internet Config to store
				2131	proxies. An HTTP proxy, for instance, is stored under
				2132	the HttpProxy key.
				2133
				2134	"""
				2135	try:
				2136	import ic
				2137	except ImportError:
				2138	return {}
				2139
				2140	try:
				2141	config = ic.IC()
				2142	except ic.error:
				2143	return {}
				2144	proxies = {}
				2145	# HTTP:
				2146	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2147	try:
				2148	value = config['HTTPProxyHost']
				2149	except ic.error:
				2150	pass
				2151	else:
				2152	proxies['http'] = 'http://%s' % value
				2153	# FTP: XXX To be done.
				2154	# Gopher: XXX To be done.
				2155	return proxies
				2156
				2157	def proxy_bypass(host):
				2158	if getproxies_environment():
				2159	return proxy_bypass_environment(host)
				2160	else:
				2161	return 0
				2162
				2163	def getproxies():
				2164	return getproxies_environment() or getproxies_internetconfig()
				2165
				2166	elif os.name == 'nt':
				2167	def getproxies_registry():
				2168	"""Return a dictionary of scheme -> proxy server URL mappings.
				2169
				2170	Win32 uses the registry to store proxies.
				2171
				2172	"""
				2173	proxies = {}
				2174	try:
				2175	import _winreg
				2176	except ImportError:
				2177	# Std module, so should be around - but you never know!
				2178	return proxies
				2179	try:
				2180	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2181	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2182	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2183	'ProxyEnable')[0]
				2184	if proxyEnable:
				2185	# Returned as Unicode but problems if not converted to ASCII
				2186	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2187	'ProxyServer')[0])
				2188	if '=' in proxyServer:
				2189	# Per-protocol settings
				2190	for p in proxyServer.split(';'):
				2191	protocol, address = p.split('=', 1)
				2192	# See if address has a type:// prefix
				2193	import re
				2194	if not re.match('^([^/:]+)://', address):
				2195	address = '%s://%s' % (protocol, address)
				2196	proxies[protocol] = address
				2197	else:
				2198	# Use one setting for all protocols
				2199	if proxyServer[:5] == 'http:':
				2200	proxies['http'] = proxyServer
				2201	else:
				2202	proxies['http'] = 'http://%s' % proxyServer
				2203	proxies['ftp'] = 'ftp://%s' % proxyServer
				2204	internetSettings.Close()
				2205	except (WindowsError, ValueError, TypeError):
				2206	# Either registry key not found etc, or the value in an
				2207	# unexpected format.
				2208	# proxies already set up to be empty so nothing to do
				2209	pass
				2210	return proxies
				2211
				2212	def getproxies():
				2213	"""Return a dictionary of scheme -> proxy server URL mappings.
				2214
				2215	Returns settings gathered from the environment, if specified,
				2216	or the registry.
				2217
				2218	"""
				2219	return getproxies_environment() or getproxies_registry()
				2220
				2221	def proxy_bypass_registry(host):
				2222	try:
				2223	import _winreg
				2224	import re
				2225	except ImportError:
				2226	# Std modules, so should be around - but you never know!
				2227	return 0
				2228	try:
				2229	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2230	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2231	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2232	'ProxyEnable')[0]
				2233	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2234	'ProxyOverride')[0])
				2235	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2236	except WindowsError:
				2237	return 0
				2238	if not proxyEnable or not proxyOverride:
				2239	return 0
				2240	# try to make a host list from name and IP address.
				2241	rawHost, port = urllib.parse.splitport(host)
				2242	host = [rawHost]
				2243	try:
				2244	addr = socket.gethostbyname(rawHost)
				2245	if addr != rawHost:
				2246	host.append(addr)
				2247	except socket.error:
				2248	pass
				2249	try:
				2250	fqdn = socket.getfqdn(rawHost)
				2251	if fqdn != rawHost:
				2252	host.append(fqdn)
				2253	except socket.error:
				2254	pass
				2255	# make a check value list from the registry entry: replace the
				2256	# '<local>' string by the localhost entry and the corresponding
				2257	# canonical entry.
				2258	proxyOverride = proxyOverride.split(';')
				2259	i = 0
				2260	while i < len(proxyOverride):
				2261	if proxyOverride[i] == '<local>':
				2262	proxyOverride[i:i+1] = ['localhost',
				2263	'127.0.0.1',
				2264	socket.gethostname(),
				2265	socket.gethostbyname(
				2266	socket.gethostname())]
				2267	i += 1
				2268	# print proxyOverride
				2269	# now check if we match one of the registry values.
				2270	for test in proxyOverride:
				2271	test = test.replace(".", r"\.") # mask dots
				2272	test = test.replace("", r".") # change glob sequence
				2273	test = test.replace("?", r".") # change glob char
				2274	for val in host:
				2275	# print "%s <--> %s" %( test, val )
				2276	if re.match(test, val, re.I):
				2277	return 1
				2278	return 0
				2279
				2280	def proxy_bypass(host):
				2281	"""Return a dictionary of scheme -> proxy server URL mappings.
				2282
				2283	Returns settings gathered from the environment, if specified,
				2284	or the registry.
				2285
				2286	"""
				2287	if getproxies_environment():
				2288	return proxy_bypass_environment(host)
				2289	else:
				2290	return proxy_bypass_registry(host)
				2291
				2292	else:
				2293	# By default use environment variables
				2294	getproxies = getproxies_environment
				2295	proxy_bypass = proxy_bypass_environment