Blame - Lib/urllib/request.py - platform/external/python/cpython3

blob: 63ce6d43799ee73c8f94512a31cfacc7f04716b4 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	50	import urllib.request
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	51
				52	# set up authentication info
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	53	authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	59	proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	60
				61	# build a new opener that adds authentication and caching FTP handlers
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	62	opener = urllib.request.build_opener(proxy_support, authinfo,
				63	urllib.request.CacheFTPHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	64
				65	# install it
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	66	urllib.request.install_opener(opener)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	67
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	68	f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	69	"""
				70
				71	# XXX issues:
				72	# If an authentication error handler that tries to perform
				73	# authentication for some reason but fails, how should the error be
				74	# signalled? The client needs to know the HTTP error code. But if
				75	# the handler knows that the problem was, e.g., that it didn't know
				76	# that hash algo that requested in the challenge, it would be good to
				77	# pass that information along to the client, too.
				78	# ftp errors aren't handled cleanly
				79	# check digest against correct (i.e. non-apache) implementation
				80
				81	# Possible extensions:
				82	# complex proxies XXX not sure what exactly was meant by this
				83	# abstract factory for opener
				84
				85	import base64
				86	import email
				87	import hashlib
				88	import http.client
				89	import io
				90	import os
				91	import posixpath
				92	import random
				93	import re
				94	import socket
				95	import sys
				96	import time
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	97	import bisect
				98
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	99	from urllib.error import URLError, HTTPError, ContentTooShortError
				100	from urllib.parse import (
				101	urlparse, urlsplit, urljoin, unwrap, quote, unquote,
				102	splittype, splithost, splitport, splituser, splitpasswd,
Facundo Batista	f24802c	2008-08-17 03:36:03 +0000	[diff] [blame]	103	splitattr, splitquery, splitvalue, to_bytes, urlunparse)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	104	from urllib.response import addinfourl, addclosehook
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	105
				106	# check for SSL
				107	try:
				108	import ssl
				109	except:
				110	_have_ssl = False
				111	else:
				112	_have_ssl = True
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	113
				114	# used in User-Agent header sent
				115	__version__ = sys.version[:3]
				116
				117	_opener = None
				118	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				119	global _opener
				120	if _opener is None:
				121	_opener = build_opener()
				122	return _opener.open(url, data, timeout)
				123
				124	def install_opener(opener):
				125	global _opener
				126	_opener = opener
				127
				128	# TODO(jhylton): Make this work with the same global opener.
				129	_urlopener = None
				130	def urlretrieve(url, filename=None, reporthook=None, data=None):
				131	global _urlopener
				132	if not _urlopener:
				133	_urlopener = FancyURLopener()
				134	return _urlopener.retrieve(url, filename, reporthook, data)
				135
				136	def urlcleanup():
				137	if _urlopener:
				138	_urlopener.cleanup()
				139	global _opener
				140	if _opener:
				141	_opener = None
				142
				143	# copied from cookielib.py
Antoine Pitrou	fd03645	2008-08-19 17:56:33 +0000	[diff] [blame]	144	_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	145	def request_host(request):
				146	"""Return request-host, as defined by RFC 2965.
				147
				148	Variation from RFC: returned value is lowercased, for convenient
				149	comparison.
				150
				151	"""
				152	url = request.get_full_url()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	153	host = urlparse(url)[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	154	if host == "":
				155	host = request.get_header("Host", "")
				156
				157	# remove port, if present
				158	host = _cut_port_re.sub("", host, 1)
				159	return host.lower()
				160
				161	class Request:
				162
				163	def __init__(self, url, data=None, headers={},
				164	origin_req_host=None, unverifiable=False):
				165	# unwrap('<URL:type://host/path>') --> 'type://host/path'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	166	self.__original = unwrap(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	167	self.type = None
				168	# self.__r_type is what's left after doing the splittype
				169	self.host = None
				170	self.port = None
				171	self.data = data
				172	self.headers = {}
				173	for key, value in headers.items():
				174	self.add_header(key, value)
				175	self.unredirected_hdrs = {}
				176	if origin_req_host is None:
				177	origin_req_host = request_host(self)
				178	self.origin_req_host = origin_req_host
				179	self.unverifiable = unverifiable
				180
				181	def __getattr__(self, attr):
				182	# XXX this is a fallback mechanism to guard against these
				183	# methods getting called in a non-standard order. this may be
				184	# too complicated and/or unnecessary.
				185	# XXX should the __r_XXX attributes be public?
				186	if attr[:12] == '_Request__r_':
				187	name = attr[12:]
				188	if hasattr(Request, 'get_' + name):
				189	getattr(self, 'get_' + name)()
				190	return getattr(self, attr)
				191	raise AttributeError(attr)
				192
				193	def get_method(self):
				194	if self.has_data():
				195	return "POST"
				196	else:
				197	return "GET"
				198
				199	# XXX these helper methods are lame
				200
				201	def add_data(self, data):
				202	self.data = data
				203
				204	def has_data(self):
				205	return self.data is not None
				206
				207	def get_data(self):
				208	return self.data
				209
				210	def get_full_url(self):
				211	return self.__original
				212
				213	def get_type(self):
				214	if self.type is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	215	self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	216	if self.type is None:
				217	raise ValueError("unknown url type: %s" % self.__original)
				218	return self.type
				219
				220	def get_host(self):
				221	if self.host is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	222	self.host, self.__r_host = splithost(self.__r_type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	223	if self.host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	224	self.host = unquote(self.host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	225	return self.host
				226
				227	def get_selector(self):
				228	return self.__r_host
				229
				230	def set_proxy(self, host, type):
				231	self.host, self.type = host, type
				232	self.__r_host = self.__original
				233
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame]	234	def has_proxy(self):
				235	return self.__r_host == self.__original
				236
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	237	def get_origin_req_host(self):
				238	return self.origin_req_host
				239
				240	def is_unverifiable(self):
				241	return self.unverifiable
				242
				243	def add_header(self, key, val):
				244	# useful for something like authentication
				245	self.headers[key.capitalize()] = val
				246
				247	def add_unredirected_header(self, key, val):
				248	# will not be added to a redirected request
				249	self.unredirected_hdrs[key.capitalize()] = val
				250
				251	def has_header(self, header_name):
				252	return (header_name in self.headers or
				253	header_name in self.unredirected_hdrs)
				254
				255	def get_header(self, header_name, default=None):
				256	return self.headers.get(
				257	header_name,
				258	self.unredirected_hdrs.get(header_name, default))
				259
				260	def header_items(self):
				261	hdrs = self.unredirected_hdrs.copy()
				262	hdrs.update(self.headers)
				263	return list(hdrs.items())
				264
				265	class OpenerDirector:
				266	def __init__(self):
				267	client_version = "Python-urllib/%s" % __version__
				268	self.addheaders = [('User-agent', client_version)]
				269	# manage the individual handlers
				270	self.handlers = []
				271	self.handle_open = {}
				272	self.handle_error = {}
				273	self.process_response = {}
				274	self.process_request = {}
				275
				276	def add_handler(self, handler):
				277	if not hasattr(handler, "add_parent"):
				278	raise TypeError("expected BaseHandler instance, got %r" %
				279	type(handler))
				280
				281	added = False
				282	for meth in dir(handler):
				283	if meth in ["redirect_request", "do_open", "proxy_open"]:
				284	# oops, coincidental match
				285	continue
				286
				287	i = meth.find("_")
				288	protocol = meth[:i]
				289	condition = meth[i+1:]
				290
				291	if condition.startswith("error"):
				292	j = condition.find("_") + i + 1
				293	kind = meth[j+1:]
				294	try:
				295	kind = int(kind)
				296	except ValueError:
				297	pass
				298	lookup = self.handle_error.get(protocol, {})
				299	self.handle_error[protocol] = lookup
				300	elif condition == "open":
				301	kind = protocol
				302	lookup = self.handle_open
				303	elif condition == "response":
				304	kind = protocol
				305	lookup = self.process_response
				306	elif condition == "request":
				307	kind = protocol
				308	lookup = self.process_request
				309	else:
				310	continue
				311
				312	handlers = lookup.setdefault(kind, [])
				313	if handlers:
				314	bisect.insort(handlers, handler)
				315	else:
				316	handlers.append(handler)
				317	added = True
				318
				319	if added:
				320	# the handlers must work in an specific order, the order
				321	# is specified in a Handler attribute
				322	bisect.insort(self.handlers, handler)
				323	handler.add_parent(self)
				324
				325	def close(self):
				326	# Only exists for backwards compatibility.
				327	pass
				328
				329	def _call_chain(self, chain, kind, meth_name, *args):
				330	# Handlers raise an exception if no one else should try to handle
				331	# the request, or return None if they can't but another handler
				332	# could. Otherwise, they return the response.
				333	handlers = chain.get(kind, ())
				334	for handler in handlers:
				335	func = getattr(handler, meth_name)
				336
				337	result = func(*args)
				338	if result is not None:
				339	return result
				340
				341	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				342	# accept a URL or a Request object
				343	if isinstance(fullurl, str):
				344	req = Request(fullurl, data)
				345	else:
				346	req = fullurl
				347	if data is not None:
				348	req.add_data(data)
				349
				350	req.timeout = timeout
				351	protocol = req.get_type()
				352
				353	# pre-process request
				354	meth_name = protocol+"_request"
				355	for processor in self.process_request.get(protocol, []):
				356	meth = getattr(processor, meth_name)
				357	req = meth(req)
				358
				359	response = self._open(req, data)
				360
				361	# post-process response
				362	meth_name = protocol+"_response"
				363	for processor in self.process_response.get(protocol, []):
				364	meth = getattr(processor, meth_name)
				365	response = meth(req, response)
				366
				367	return response
				368
				369	def _open(self, req, data=None):
				370	result = self._call_chain(self.handle_open, 'default',
				371	'default_open', req)
				372	if result:
				373	return result
				374
				375	protocol = req.get_type()
				376	result = self._call_chain(self.handle_open, protocol, protocol +
				377	'_open', req)
				378	if result:
				379	return result
				380
				381	return self._call_chain(self.handle_open, 'unknown',
				382	'unknown_open', req)
				383
				384	def error(self, proto, *args):
				385	if proto in ('http', 'https'):
				386	# XXX http[s] protocols are special-cased
				387	dict = self.handle_error['http'] # https is not different than http
				388	proto = args[2] # YUCK!
				389	meth_name = 'http_error_%s' % proto
				390	http_err = 1
				391	orig_args = args
				392	else:
				393	dict = self.handle_error
				394	meth_name = proto + '_error'
				395	http_err = 0
				396	args = (dict, proto, meth_name) + args
				397	result = self._call_chain(*args)
				398	if result:
				399	return result
				400
				401	if http_err:
				402	args = (dict, 'default', 'http_error_default') + orig_args
				403	return self._call_chain(*args)
				404
				405	# XXX probably also want an abstract factory that knows when it makes
				406	# sense to skip a superclass in favor of a subclass and when it might
				407	# make sense to include both
				408
				409	def build_opener(*handlers):
				410	"""Create an opener object from a list of handlers.
				411
				412	The opener will use several default handlers, including support
				413	for HTTP and FTP.
				414
				415	If any of the handlers passed as arguments are subclasses of the
				416	default handlers, the default handlers will not be used.
				417	"""
				418	def isclass(obj):
				419	return isinstance(obj, type) or hasattr(obj, "__bases__")
				420
				421	opener = OpenerDirector()
				422	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				423	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				424	FTPHandler, FileHandler, HTTPErrorProcessor]
				425	if hasattr(http.client, "HTTPSConnection"):
				426	default_classes.append(HTTPSHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	427	skip = set()
				428	for klass in default_classes:
				429	for check in handlers:
				430	if isclass(check):
				431	if issubclass(check, klass):
				432	skip.add(klass)
				433	elif isinstance(check, klass):
				434	skip.add(klass)
				435	for klass in skip:
				436	default_classes.remove(klass)
				437
				438	for klass in default_classes:
				439	opener.add_handler(klass())
				440
				441	for h in handlers:
				442	if isclass(h):
				443	h = h()
				444	opener.add_handler(h)
				445	return opener
				446
				447	class BaseHandler:
				448	handler_order = 500
				449
				450	def add_parent(self, parent):
				451	self.parent = parent
				452
				453	def close(self):
				454	# Only exists for backwards compatibility
				455	pass
				456
				457	def __lt__(self, other):
				458	if not hasattr(other, "handler_order"):
				459	# Try to preserve the old behavior of having custom classes
				460	# inserted after default ones (works only for custom user
				461	# classes which are not aware of handler_order).
				462	return True
				463	return self.handler_order < other.handler_order
				464
				465
				466	class HTTPErrorProcessor(BaseHandler):
				467	"""Process HTTP error responses."""
				468	handler_order = 1000 # after all other processing
				469
				470	def http_response(self, request, response):
				471	code, msg, hdrs = response.code, response.msg, response.info()
				472
				473	# According to RFC 2616, "2xx" code indicates that the client's
				474	# request was successfully received, understood, and accepted.
				475	if not (200 <= code < 300):
				476	response = self.parent.error(
				477	'http', request, response, code, msg, hdrs)
				478
				479	return response
				480
				481	https_response = http_response
				482
				483	class HTTPDefaultErrorHandler(BaseHandler):
				484	def http_error_default(self, req, fp, code, msg, hdrs):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	485	raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	486
				487	class HTTPRedirectHandler(BaseHandler):
				488	# maximum number of redirections to any single URL
				489	# this is needed because of the state that cookies introduce
				490	max_repeats = 4
				491	# maximum total number of redirections (regardless of URL) before
				492	# assuming we're in a loop
				493	max_redirections = 10
				494
				495	def redirect_request(self, req, fp, code, msg, headers, newurl):
				496	"""Return a Request or None in response to a redirect.
				497
				498	This is called by the http_error_30x methods when a
				499	redirection response is received. If a redirection should
				500	take place, return a new Request to allow http_error_30x to
				501	perform the redirect. Otherwise, raise HTTPError if no-one
				502	else should try to handle this url. Return None if you can't
				503	but another Handler might.
				504	"""
				505	m = req.get_method()
				506	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				507	or code in (301, 302, 303) and m == "POST")):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	508	raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	509
				510	# Strictly (according to RFC 2616), 301 or 302 in response to
				511	# a POST MUST NOT cause a redirection without confirmation
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	512	# from the user (of urllib.request, in this case). In practice,
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	513	# essentially all clients do redirect in this case, so we do
				514	# the same.
				515	# be conciliant with URIs containing a space
				516	newurl = newurl.replace(' ', '%20')
				517	CONTENT_HEADERS = ("content-length", "content-type")
				518	newheaders = dict((k, v) for k, v in req.headers.items()
				519	if k.lower() not in CONTENT_HEADERS)
				520	return Request(newurl,
				521	headers=newheaders,
				522	origin_req_host=req.get_origin_req_host(),
				523	unverifiable=True)
				524
				525	# Implementation note: To avoid the server sending us into an
				526	# infinite loop, the request object needs to track what URLs we
				527	# have already seen. Do this by adding a handler-specific
				528	# attribute to the Request object.
				529	def http_error_302(self, req, fp, code, msg, headers):
				530	# Some servers (incorrectly) return multiple Location headers
				531	# (so probably same goes for URI). Use first header.
				532	if "location" in headers:
				533	newurl = headers["location"]
				534	elif "uri" in headers:
				535	newurl = headers["uri"]
				536	else:
				537	return
Facundo Batista	f24802c	2008-08-17 03:36:03 +0000	[diff] [blame]	538
				539	# fix a possible malformed URL
				540	urlparts = urlparse(newurl)
				541	if not urlparts.path:
				542	urlparts = list(urlparts)
				543	urlparts[2] = "/"
				544	newurl = urlunparse(urlparts)
				545
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	546	newurl = urljoin(req.get_full_url(), newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	547
				548	# XXX Probably want to forget about the state of the current
				549	# request, although that might interact poorly with other
				550	# handlers that also use handler-specific request attributes
				551	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				552	if new is None:
				553	return
				554
				555	# loop detection
				556	# .redirect_dict has a key url if url was previously visited.
				557	if hasattr(req, 'redirect_dict'):
				558	visited = new.redirect_dict = req.redirect_dict
				559	if (visited.get(newurl, 0) >= self.max_repeats or
				560	len(visited) >= self.max_redirections):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	561	raise HTTPError(req.get_full_url(), code,
				562	self.inf_msg + msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	563	else:
				564	visited = new.redirect_dict = req.redirect_dict = {}
				565	visited[newurl] = visited.get(newurl, 0) + 1
				566
				567	# Don't close the fp until we are sure that we won't use it
				568	# with HTTPError.
				569	fp.read()
				570	fp.close()
				571
				572	return self.parent.open(new)
				573
				574	http_error_301 = http_error_303 = http_error_307 = http_error_302
				575
				576	inf_msg = "The HTTP server returned a redirect error that would " \
				577	"lead to an infinite loop.\n" \
				578	"The last 30x error message was:\n"
				579
				580
				581	def _parse_proxy(proxy):
				582	"""Return (scheme, user, password, host/port) given a URL or an authority.
				583
				584	If a URL is supplied, it must have an authority (host:port) component.
				585	According to RFC 3986, having an authority component means the URL must
				586	have two slashes after the scheme:
				587
				588	>>> _parse_proxy('file:/ftp.example.com/')
				589	Traceback (most recent call last):
				590	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				591
				592	The first three items of the returned tuple may be None.
				593
				594	Examples of authority parsing:
				595
				596	>>> _parse_proxy('proxy.example.com')
				597	(None, None, None, 'proxy.example.com')
				598	>>> _parse_proxy('proxy.example.com:3128')
				599	(None, None, None, 'proxy.example.com:3128')
				600
				601	The authority component may optionally include userinfo (assumed to be
				602	username:password):
				603
				604	>>> _parse_proxy('joe:password@proxy.example.com')
				605	(None, 'joe', 'password', 'proxy.example.com')
				606	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				607	(None, 'joe', 'password', 'proxy.example.com:3128')
				608
				609	Same examples, but with URLs instead:
				610
				611	>>> _parse_proxy('http://proxy.example.com/')
				612	('http', None, None, 'proxy.example.com')
				613	>>> _parse_proxy('http://proxy.example.com:3128/')
				614	('http', None, None, 'proxy.example.com:3128')
				615	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				616	('http', 'joe', 'password', 'proxy.example.com')
				617	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				618	('http', 'joe', 'password', 'proxy.example.com:3128')
				619
				620	Everything after the authority is ignored:
				621
				622	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				623	('ftp', 'joe', 'password', 'proxy.example.com')
				624
				625	Test for no trailing '/' case:
				626
				627	>>> _parse_proxy('http://joe:password@proxy.example.com')
				628	('http', 'joe', 'password', 'proxy.example.com')
				629
				630	"""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	631	scheme, r_scheme = splittype(proxy)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	632	if not r_scheme.startswith("/"):
				633	# authority
				634	scheme = None
				635	authority = proxy
				636	else:
				637	# URL
				638	if not r_scheme.startswith("//"):
				639	raise ValueError("proxy URL with no authority: %r" % proxy)
				640	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				641	# and 3.3.), path is empty or starts with '/'
				642	end = r_scheme.find("/", 2)
				643	if end == -1:
				644	end = None
				645	authority = r_scheme[2:end]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	646	userinfo, hostport = splituser(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	647	if userinfo is not None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	648	user, password = splitpasswd(userinfo)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	649	else:
				650	user = password = None
				651	return scheme, user, password, hostport
				652
				653	class ProxyHandler(BaseHandler):
				654	# Proxies must be in front
				655	handler_order = 100
				656
				657	def __init__(self, proxies=None):
				658	if proxies is None:
				659	proxies = getproxies()
				660	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				661	self.proxies = proxies
				662	for type, url in proxies.items():
				663	setattr(self, '%s_open' % type,
				664	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				665	meth(r, proxy, type))
				666
				667	def proxy_open(self, req, proxy, type):
				668	orig_type = req.get_type()
				669	proxy_type, user, password, hostport = _parse_proxy(proxy)
				670	if proxy_type is None:
				671	proxy_type = orig_type
				672	if user and password:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	673	user_pass = '%s:%s' % (unquote(user),
				674	unquote(password))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	675	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				676	req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	677	hostport = unquote(hostport)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	678	req.set_proxy(hostport, proxy_type)
				679	if orig_type == proxy_type:
				680	# let other handlers take care of it
				681	return None
				682	else:
				683	# need to start over, because the other handlers don't
				684	# grok the proxy's URL type
				685	# e.g. if we have a constructor arg proxies like so:
				686	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				687	# a request for http://acme.example.com/a into one for
				688	# ftp://proxy.example.com/a
				689	return self.parent.open(req)
				690
				691	class HTTPPasswordMgr:
				692
				693	def __init__(self):
				694	self.passwd = {}
				695
				696	def add_password(self, realm, uri, user, passwd):
				697	# uri could be a single URI or a sequence
				698	if isinstance(uri, str):
				699	uri = [uri]
				700	if not realm in self.passwd:
				701	self.passwd[realm] = {}
				702	for default_port in True, False:
				703	reduced_uri = tuple(
				704	[self.reduce_uri(u, default_port) for u in uri])
				705	self.passwd[realm][reduced_uri] = (user, passwd)
				706
				707	def find_user_password(self, realm, authuri):
				708	domains = self.passwd.get(realm, {})
				709	for default_port in True, False:
				710	reduced_authuri = self.reduce_uri(authuri, default_port)
				711	for uris, authinfo in domains.items():
				712	for uri in uris:
				713	if self.is_suburi(uri, reduced_authuri):
				714	return authinfo
				715	return None, None
				716
				717	def reduce_uri(self, uri, default_port=True):
				718	"""Accept authority or URI and extract only the authority and path."""
				719	# note HTTP URLs do not have a userinfo component
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	720	parts = urlsplit(uri)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	721	if parts[1]:
				722	# URI
				723	scheme = parts[0]
				724	authority = parts[1]
				725	path = parts[2] or '/'
				726	else:
				727	# host or host:port
				728	scheme = None
				729	authority = uri
				730	path = '/'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	731	host, port = splitport(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	732	if default_port and port is None and scheme is not None:
				733	dport = {"http": 80,
				734	"https": 443,
				735	}.get(scheme)
				736	if dport is not None:
				737	authority = "%s:%d" % (host, dport)
				738	return authority, path
				739
				740	def is_suburi(self, base, test):
				741	"""Check if test is below base in a URI tree
				742
				743	Both args must be URIs in reduced form.
				744	"""
				745	if base == test:
				746	return True
				747	if base[0] != test[0]:
				748	return False
				749	common = posixpath.commonprefix((base[1], test[1]))
				750	if len(common) == len(base[1]):
				751	return True
				752	return False
				753
				754
				755	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				756
				757	def find_user_password(self, realm, authuri):
				758	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				759	authuri)
				760	if user is not None:
				761	return user, password
				762	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				763
				764
				765	class AbstractBasicAuthHandler:
				766
				767	# XXX this allows for multiple auth-schemes, but will stupidly pick
				768	# the last one with a realm specified.
				769
				770	# allow for double- and single-quoted realm values
				771	# (single quotes are a violation of the RFC, but appear in the wild)
				772	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				773	'realm=(["\'])(.*?)\\2', re.I)
				774
				775	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				776	# end of section 2, and section 1.2 immediately after "credentials"
				777	# production).
				778
				779	def __init__(self, password_mgr=None):
				780	if password_mgr is None:
				781	password_mgr = HTTPPasswordMgr()
				782	self.passwd = password_mgr
				783	self.add_password = self.passwd.add_password
				784
				785	def http_error_auth_reqed(self, authreq, host, req, headers):
				786	# host may be an authority (without userinfo) or a URL with an
				787	# authority
				788	# XXX could be multiple headers
				789	authreq = headers.get(authreq, None)
				790	if authreq:
				791	mo = AbstractBasicAuthHandler.rx.search(authreq)
				792	if mo:
				793	scheme, quote, realm = mo.groups()
				794	if scheme.lower() == 'basic':
				795	return self.retry_http_basic_auth(host, req, realm)
				796
				797	def retry_http_basic_auth(self, host, req, realm):
				798	user, pw = self.passwd.find_user_password(realm, host)
				799	if pw is not None:
				800	raw = "%s:%s" % (user, pw)
				801	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				802	if req.headers.get(self.auth_header, None) == auth:
				803	return None
				804	req.add_header(self.auth_header, auth)
				805	return self.parent.open(req)
				806	else:
				807	return None
				808
				809
				810	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				811
				812	auth_header = 'Authorization'
				813
				814	def http_error_401(self, req, fp, code, msg, headers):
				815	url = req.get_full_url()
				816	return self.http_error_auth_reqed('www-authenticate',
				817	url, req, headers)
				818
				819
				820	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				821
				822	auth_header = 'Proxy-authorization'
				823
				824	def http_error_407(self, req, fp, code, msg, headers):
				825	# http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	826	# authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	827	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				828	# userinfo.
				829	authority = req.get_host()
				830	return self.http_error_auth_reqed('proxy-authenticate',
				831	authority, req, headers)
				832
				833
				834	def randombytes(n):
				835	"""Return n random bytes."""
				836	return os.urandom(n)
				837
				838	class AbstractDigestAuthHandler:
				839	# Digest authentication is specified in RFC 2617.
				840
				841	# XXX The client does not inspect the Authentication-Info header
				842	# in a successful response.
				843
				844	# XXX It should be possible to test this implementation against
				845	# a mock server that just generates a static set of challenges.
				846
				847	# XXX qop="auth-int" supports is shaky
				848
				849	def __init__(self, passwd=None):
				850	if passwd is None:
				851	passwd = HTTPPasswordMgr()
				852	self.passwd = passwd
				853	self.add_password = self.passwd.add_password
				854	self.retried = 0
				855	self.nonce_count = 0
				856
				857	def reset_retry_count(self):
				858	self.retried = 0
				859
				860	def http_error_auth_reqed(self, auth_header, host, req, headers):
				861	authreq = headers.get(auth_header, None)
				862	if self.retried > 5:
				863	# Don't fail endlessly - if we failed once, we'll probably
				864	# fail a second time. Hm. Unless the Password Manager is
				865	# prompting for the information. Crap. This isn't great
				866	# but it's better than the current 'repeat until recursion
				867	# depth exceeded' approach <wink>
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	868	raise HTTPError(req.get_full_url(), 401, "digest auth failed",
				869	headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	870	else:
				871	self.retried += 1
				872	if authreq:
				873	scheme = authreq.split()[0]
				874	if scheme.lower() == 'digest':
				875	return self.retry_http_digest_auth(req, authreq)
				876
				877	def retry_http_digest_auth(self, req, auth):
				878	token, challenge = auth.split(' ', 1)
				879	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				880	auth = self.get_authorization(req, chal)
				881	if auth:
				882	auth_val = 'Digest %s' % auth
				883	if req.headers.get(self.auth_header, None) == auth_val:
				884	return None
				885	req.add_unredirected_header(self.auth_header, auth_val)
				886	resp = self.parent.open(req)
				887	return resp
				888
				889	def get_cnonce(self, nonce):
				890	# The cnonce-value is an opaque
				891	# quoted string value provided by the client and used by both client
				892	# and server to avoid chosen plaintext attacks, to provide mutual
				893	# authentication, and to provide some message integrity protection.
				894	# This isn't a fabulous effort, but it's probably Good Enough.
				895	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				896	b = s.encode("ascii") + randombytes(8)
				897	dig = hashlib.sha1(b).hexdigest()
				898	return dig[:16]
				899
				900	def get_authorization(self, req, chal):
				901	try:
				902	realm = chal['realm']
				903	nonce = chal['nonce']
				904	qop = chal.get('qop')
				905	algorithm = chal.get('algorithm', 'MD5')
				906	# mod_digest doesn't send an opaque, even though it isn't
				907	# supposed to be optional
				908	opaque = chal.get('opaque', None)
				909	except KeyError:
				910	return None
				911
				912	H, KD = self.get_algorithm_impls(algorithm)
				913	if H is None:
				914	return None
				915
				916	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				917	if user is None:
				918	return None
				919
				920	# XXX not implemented yet
				921	if req.has_data():
				922	entdig = self.get_entity_digest(req.get_data(), chal)
				923	else:
				924	entdig = None
				925
				926	A1 = "%s:%s:%s" % (user, realm, pw)
				927	A2 = "%s:%s" % (req.get_method(),
				928	# XXX selector: what about proxies and full urls
				929	req.get_selector())
				930	if qop == 'auth':
				931	self.nonce_count += 1
				932	ncvalue = '%08x' % self.nonce_count
				933	cnonce = self.get_cnonce(nonce)
				934	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				935	respdig = KD(H(A1), noncebit)
				936	elif qop is None:
				937	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				938	else:
				939	# XXX handle auth-int.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	940	raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	941
				942	# XXX should the partial digests be encoded too?
				943
				944	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				945	'response="%s"' % (user, realm, nonce, req.get_selector(),
				946	respdig)
				947	if opaque:
				948	base += ', opaque="%s"' % opaque
				949	if entdig:
				950	base += ', digest="%s"' % entdig
				951	base += ', algorithm="%s"' % algorithm
				952	if qop:
				953	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				954	return base
				955
				956	def get_algorithm_impls(self, algorithm):
				957	# lambdas assume digest modules are imported at the top level
				958	if algorithm == 'MD5':
				959	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				960	elif algorithm == 'SHA':
				961	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				962	# XXX MD5-sess
				963	KD = lambda s, d: H("%s:%s" % (s, d))
				964	return H, KD
				965
				966	def get_entity_digest(self, data, chal):
				967	# XXX not implemented yet
				968	return None
				969
				970
				971	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				972	"""An authentication protocol defined by RFC 2069
				973
				974	Digest authentication improves on basic authentication because it
				975	does not transmit passwords in the clear.
				976	"""
				977
				978	auth_header = 'Authorization'
				979	handler_order = 490 # before Basic auth
				980
				981	def http_error_401(self, req, fp, code, msg, headers):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	982	host = urlparse(req.get_full_url())[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	983	retry = self.http_error_auth_reqed('www-authenticate',
				984	host, req, headers)
				985	self.reset_retry_count()
				986	return retry
				987
				988
				989	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				990
				991	auth_header = 'Proxy-Authorization'
				992	handler_order = 490 # before Basic auth
				993
				994	def http_error_407(self, req, fp, code, msg, headers):
				995	host = req.get_host()
				996	retry = self.http_error_auth_reqed('proxy-authenticate',
				997	host, req, headers)
				998	self.reset_retry_count()
				999	return retry
				1000
				1001	class AbstractHTTPHandler(BaseHandler):
				1002
				1003	def __init__(self, debuglevel=0):
				1004	self._debuglevel = debuglevel
				1005
				1006	def set_http_debuglevel(self, level):
				1007	self._debuglevel = level
				1008
				1009	def do_request_(self, request):
				1010	host = request.get_host()
				1011	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1012	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1013
				1014	if request.has_data(): # POST
				1015	data = request.get_data()
				1016	if not request.has_header('Content-type'):
				1017	request.add_unredirected_header(
				1018	'Content-type',
				1019	'application/x-www-form-urlencoded')
				1020	if not request.has_header('Content-length'):
				1021	request.add_unredirected_header(
				1022	'Content-length', '%d' % len(data))
				1023
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame]	1024	sel_host = host
				1025	if request.has_proxy():
				1026	scheme, sel = splittype(request.get_selector())
				1027	sel_host, sel_path = splithost(sel)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1028	if not request.has_header('Host'):
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame]	1029	request.add_unredirected_header('Host', sel_host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1030	for name, value in self.parent.addheaders:
				1031	name = name.capitalize()
				1032	if not request.has_header(name):
				1033	request.add_unredirected_header(name, value)
				1034
				1035	return request
				1036
				1037	def do_open(self, http_class, req):
				1038	"""Return an addinfourl object for the request, using http_class.
				1039
				1040	http_class must implement the HTTPConnection API from http.client.
				1041	The addinfourl return value is a file-like object. It also
				1042	has methods and attributes including:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1043	- info(): return a email Message object for the headers
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1044	- geturl(): return the original request URL
				1045	- code: HTTP status code
				1046	"""
				1047	host = req.get_host()
				1048	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1049	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1050
				1051	h = http_class(host, timeout=req.timeout) # will parse host:port
				1052	headers = dict(req.headers)
				1053	headers.update(req.unredirected_hdrs)
				1054
				1055	# TODO(jhylton): Should this be redesigned to handle
				1056	# persistent connections?
				1057
				1058	# We want to make an HTTP/1.1 request, but the addinfourl
				1059	# class isn't prepared to deal with a persistent connection.
				1060	# It will try to read all remaining data from the socket,
				1061	# which will block while the server waits for the next request.
				1062	# So make sure the connection gets closed after the (only)
				1063	# request.
				1064	headers["Connection"] = "close"
				1065	headers = dict(
				1066	(name.title(), val) for name, val in headers.items())
				1067	try:
				1068	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1069	r = h.getresponse()
				1070	except socket.error as err: # XXX what error?
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1071	raise URLError(err)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1072
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1073	resp = addinfourl(r.fp, r.msg, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1074	resp.code = r.status
				1075	resp.msg = r.reason
				1076	return resp
				1077
				1078
				1079	class HTTPHandler(AbstractHTTPHandler):
				1080
				1081	def http_open(self, req):
				1082	return self.do_open(http.client.HTTPConnection, req)
				1083
				1084	http_request = AbstractHTTPHandler.do_request_
				1085
				1086	if hasattr(http.client, 'HTTPSConnection'):
				1087	class HTTPSHandler(AbstractHTTPHandler):
				1088
				1089	def https_open(self, req):
				1090	return self.do_open(http.client.HTTPSConnection, req)
				1091
				1092	https_request = AbstractHTTPHandler.do_request_
				1093
				1094	class HTTPCookieProcessor(BaseHandler):
				1095	def __init__(self, cookiejar=None):
				1096	import http.cookiejar
				1097	if cookiejar is None:
				1098	cookiejar = http.cookiejar.CookieJar()
				1099	self.cookiejar = cookiejar
				1100
				1101	def http_request(self, request):
				1102	self.cookiejar.add_cookie_header(request)
				1103	return request
				1104
				1105	def http_response(self, request, response):
				1106	self.cookiejar.extract_cookies(response, request)
				1107	return response
				1108
				1109	https_request = http_request
				1110	https_response = http_response
				1111
				1112	class UnknownHandler(BaseHandler):
				1113	def unknown_open(self, req):
				1114	type = req.get_type()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1115	raise URLError('unknown url type: %s' % type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1116
				1117	def parse_keqv_list(l):
				1118	"""Parse list of key=value strings where keys are not duplicated."""
				1119	parsed = {}
				1120	for elt in l:
				1121	k, v = elt.split('=', 1)
				1122	if v[0] == '"' and v[-1] == '"':
				1123	v = v[1:-1]
				1124	parsed[k] = v
				1125	return parsed
				1126
				1127	def parse_http_list(s):
				1128	"""Parse lists as described by RFC 2068 Section 2.
				1129
				1130	In particular, parse comma-separated lists where the elements of
				1131	the list may include quoted-strings. A quoted-string could
				1132	contain a comma. A non-quoted string could have quotes in the
				1133	middle. Neither commas nor quotes count if they are escaped.
				1134	Only double-quotes count, not single-quotes.
				1135	"""
				1136	res = []
				1137	part = ''
				1138
				1139	escape = quote = False
				1140	for cur in s:
				1141	if escape:
				1142	part += cur
				1143	escape = False
				1144	continue
				1145	if quote:
				1146	if cur == '\\':
				1147	escape = True
				1148	continue
				1149	elif cur == '"':
				1150	quote = False
				1151	part += cur
				1152	continue
				1153
				1154	if cur == ',':
				1155	res.append(part)
				1156	part = ''
				1157	continue
				1158
				1159	if cur == '"':
				1160	quote = True
				1161
				1162	part += cur
				1163
				1164	# append last part
				1165	if part:
				1166	res.append(part)
				1167
				1168	return [part.strip() for part in res]
				1169
				1170	class FileHandler(BaseHandler):
				1171	# Use local file or FTP depending on form of URL
				1172	def file_open(self, req):
				1173	url = req.get_selector()
				1174	if url[:2] == '//' and url[2:3] != '/':
				1175	req.type = 'ftp'
				1176	return self.parent.open(req)
				1177	else:
				1178	return self.open_local_file(req)
				1179
				1180	# names for the localhost
				1181	names = None
				1182	def get_names(self):
				1183	if FileHandler.names is None:
				1184	try:
				1185	FileHandler.names = (socket.gethostbyname('localhost'),
				1186	socket.gethostbyname(socket.gethostname()))
				1187	except socket.gaierror:
				1188	FileHandler.names = (socket.gethostbyname('localhost'),)
				1189	return FileHandler.names
				1190
				1191	# not entirely sure what the rules are here
				1192	def open_local_file(self, req):
				1193	import email.utils
				1194	import mimetypes
				1195	host = req.get_host()
				1196	file = req.get_selector()
				1197	localfile = url2pathname(file)
				1198	try:
				1199	stats = os.stat(localfile)
				1200	size = stats.st_size
				1201	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1202	mtype = mimetypes.guess_type(file)[0]
				1203	headers = email.message_from_string(
				1204	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1205	(mtype or 'text/plain', size, modified))
				1206	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1207	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1208	if not host or \
				1209	(not port and _safe_gethostbyname(host) in self.get_names()):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1210	return addinfourl(open(localfile, 'rb'), headers, 'file:'+file)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1211	except OSError as msg:
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	1212	# users shouldn't expect OSErrors coming from urlopen()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1213	raise URLError(msg)
				1214	raise URLError('file not on local host')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1215
				1216	def _safe_gethostbyname(host):
				1217	try:
				1218	return socket.gethostbyname(host)
				1219	except socket.gaierror:
				1220	return None
				1221
				1222	class FTPHandler(BaseHandler):
				1223	def ftp_open(self, req):
				1224	import ftplib
				1225	import mimetypes
				1226	host = req.get_host()
				1227	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1228	raise URLError('ftp error: no host given')
				1229	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1230	if port is None:
				1231	port = ftplib.FTP_PORT
				1232	else:
				1233	port = int(port)
				1234
				1235	# username/password handling
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1236	user, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1237	if user:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1238	user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1239	else:
				1240	passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1241	host = unquote(host)
				1242	user = unquote(user or '')
				1243	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1244
				1245	try:
				1246	host = socket.gethostbyname(host)
				1247	except socket.error as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1248	raise URLError(msg)
				1249	path, attrs = splitattr(req.get_selector())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1250	dirs = path.split('/')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1251	dirs = list(map(unquote, dirs))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1252	dirs, file = dirs[:-1], dirs[-1]
				1253	if dirs and not dirs[0]:
				1254	dirs = dirs[1:]
				1255	try:
				1256	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1257	type = file and 'I' or 'D'
				1258	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1259	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1260	if attr.lower() == 'type' and \
				1261	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1262	type = value.upper()
				1263	fp, retrlen = fw.retrfile(file, type)
				1264	headers = ""
				1265	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1266	if mtype:
				1267	headers += "Content-type: %s\n" % mtype
				1268	if retrlen is not None and retrlen >= 0:
				1269	headers += "Content-length: %d\n" % retrlen
				1270	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1271	return addinfourl(fp, headers, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1272	except ftplib.all_errors as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1273	exc = URLError('ftp error: %s' % msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1274	raise exc.with_traceback(sys.exc_info()[2])
				1275
				1276	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1277	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1278	return fw
				1279
				1280	class CacheFTPHandler(FTPHandler):
				1281	# XXX would be nice to have pluggable cache strategies
				1282	# XXX this stuff is definitely not thread safe
				1283	def __init__(self):
				1284	self.cache = {}
				1285	self.timeout = {}
				1286	self.soonest = 0
				1287	self.delay = 60
				1288	self.max_conns = 16
				1289
				1290	def setTimeout(self, t):
				1291	self.delay = t
				1292
				1293	def setMaxConns(self, m):
				1294	self.max_conns = m
				1295
				1296	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1297	key = user, host, port, '/'.join(dirs), timeout
				1298	if key in self.cache:
				1299	self.timeout[key] = time.time() + self.delay
				1300	else:
				1301	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1302	dirs, timeout)
				1303	self.timeout[key] = time.time() + self.delay
				1304	self.check_cache()
				1305	return self.cache[key]
				1306
				1307	def check_cache(self):
				1308	# first check for old ones
				1309	t = time.time()
				1310	if self.soonest <= t:
				1311	for k, v in list(self.timeout.items()):
				1312	if v < t:
				1313	self.cache[k].close()
				1314	del self.cache[k]
				1315	del self.timeout[k]
				1316	self.soonest = min(list(self.timeout.values()))
				1317
				1318	# then check the size
				1319	if len(self.cache) == self.max_conns:
				1320	for k, v in list(self.timeout.items()):
				1321	if v == self.soonest:
				1322	del self.cache[k]
				1323	del self.timeout[k]
				1324	break
				1325	self.soonest = min(list(self.timeout.values()))
				1326
				1327	# Code move from the old urllib module
				1328
				1329	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1330
				1331	# Helper for non-unix systems
				1332	if os.name == 'mac':
				1333	from macurl2path import url2pathname, pathname2url
				1334	elif os.name == 'nt':
				1335	from nturl2path import url2pathname, pathname2url
				1336	else:
				1337	def url2pathname(pathname):
				1338	"""OS-specific conversion from a relative URL of the 'file' scheme
				1339	to a file system path; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1340	return unquote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1341
				1342	def pathname2url(pathname):
				1343	"""OS-specific conversion from a file system path to a relative URL
				1344	of the 'file' scheme; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1345	return quote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1346
				1347	# This really consists of two pieces:
				1348	# (1) a class which handles opening of all sorts of URLs
				1349	# (plus assorted utilities etc.)
				1350	# (2) a set of functions for parsing URLs
				1351	# XXX Should these be separated out into different modules?
				1352
				1353
				1354	ftpcache = {}
				1355	class URLopener:
				1356	"""Class to open URLs.
				1357	This is a class rather than just a subroutine because we may need
				1358	more than one set of global protocol-specific options.
				1359	Note -- this is a base class for those who don't want the
				1360	automatic handling of errors type 302 (relocated) and 401
				1361	(authorization needed)."""
				1362
				1363	__tempfiles = None
				1364
				1365	version = "Python-urllib/%s" % __version__
				1366
				1367	# Constructor
				1368	def __init__(self, proxies=None, **x509):
				1369	if proxies is None:
				1370	proxies = getproxies()
				1371	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1372	self.proxies = proxies
				1373	self.key_file = x509.get('key_file')
				1374	self.cert_file = x509.get('cert_file')
				1375	self.addheaders = [('User-Agent', self.version)]
				1376	self.__tempfiles = []
				1377	self.__unlink = os.unlink # See cleanup()
				1378	self.tempcache = None
				1379	# Undocumented feature: if you assign {} to tempcache,
				1380	# it is used to cache files retrieved with
				1381	# self.retrieve(). This is not enabled by default
				1382	# since it does not work for changing documents (and I
				1383	# haven't got the logic to check expiration headers
				1384	# yet).
				1385	self.ftpcache = ftpcache
				1386	# Undocumented feature: you can use a different
				1387	# ftp cache by assigning to the .ftpcache member;
				1388	# in case you want logically independent URL openers
				1389	# XXX This is not threadsafe. Bah.
				1390
				1391	def __del__(self):
				1392	self.close()
				1393
				1394	def close(self):
				1395	self.cleanup()
				1396
				1397	def cleanup(self):
				1398	# This code sometimes runs when the rest of this module
				1399	# has already been deleted, so it can't use any globals
				1400	# or import anything.
				1401	if self.__tempfiles:
				1402	for file in self.__tempfiles:
				1403	try:
				1404	self.__unlink(file)
				1405	except OSError:
				1406	pass
				1407	del self.__tempfiles[:]
				1408	if self.tempcache:
				1409	self.tempcache.clear()
				1410
				1411	def addheader(self, *args):
				1412	"""Add a header to be used by the HTTP interface only
				1413	e.g. u.addheader('Accept', 'sound/basic')"""
				1414	self.addheaders.append(args)
				1415
				1416	# External interface
				1417	def open(self, fullurl, data=None):
				1418	"""Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1419	fullurl = unwrap(to_bytes(fullurl))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1420	if self.tempcache and fullurl in self.tempcache:
				1421	filename, headers = self.tempcache[fullurl]
				1422	fp = open(filename, 'rb')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1423	return addinfourl(fp, headers, fullurl)
				1424	urltype, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1425	if not urltype:
				1426	urltype = 'file'
				1427	if urltype in self.proxies:
				1428	proxy = self.proxies[urltype]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1429	urltype, proxyhost = splittype(proxy)
				1430	host, selector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1431	url = (host, fullurl) # Signal special case to open_*()
				1432	else:
				1433	proxy = None
				1434	name = 'open_' + urltype
				1435	self.type = urltype
				1436	name = name.replace('-', '_')
				1437	if not hasattr(self, name):
				1438	if proxy:
				1439	return self.open_unknown_proxy(proxy, fullurl, data)
				1440	else:
				1441	return self.open_unknown(fullurl, data)
				1442	try:
				1443	if data is None:
				1444	return getattr(self, name)(url)
				1445	else:
				1446	return getattr(self, name)(url, data)
				1447	except socket.error as msg:
				1448	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1449
				1450	def open_unknown(self, fullurl, data=None):
				1451	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1452	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1453	raise IOError('url error', 'unknown url type', type)
				1454
				1455	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1456	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1457	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1458	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1459
				1460	# External interface
				1461	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1462	"""retrieve(url) returns (filename, headers) for a local object
				1463	or (tempfilename, headers) for a remote object."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1464	url = unwrap(to_bytes(url))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1465	if self.tempcache and url in self.tempcache:
				1466	return self.tempcache[url]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1467	type, url1 = splittype(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1468	if filename is None and (not type or type == 'file'):
				1469	try:
				1470	fp = self.open_local_file(url1)
				1471	hdrs = fp.info()
				1472	del fp
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1473	return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1474	except IOError as msg:
				1475	pass
				1476	fp = self.open(url, data)
				1477	headers = fp.info()
				1478	if filename:
				1479	tfp = open(filename, 'wb')
				1480	else:
				1481	import tempfile
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1482	garbage, path = splittype(url)
				1483	garbage, path = splithost(path or "")
				1484	path, garbage = splitquery(path or "")
				1485	path, garbage = splitattr(path or "")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1486	suffix = os.path.splitext(path)[1]
				1487	(fd, filename) = tempfile.mkstemp(suffix)
				1488	self.__tempfiles.append(filename)
				1489	tfp = os.fdopen(fd, 'wb')
				1490	result = filename, headers
				1491	if self.tempcache is not None:
				1492	self.tempcache[url] = result
				1493	bs = 1024*8
				1494	size = -1
				1495	read = 0
				1496	blocknum = 0
				1497	if reporthook:
				1498	if "content-length" in headers:
				1499	size = int(headers["Content-Length"])
				1500	reporthook(blocknum, bs, size)
				1501	while 1:
				1502	block = fp.read(bs)
				1503	if not block:
				1504	break
				1505	read += len(block)
				1506	tfp.write(block)
				1507	blocknum += 1
				1508	if reporthook:
				1509	reporthook(blocknum, bs, size)
				1510	fp.close()
				1511	tfp.close()
				1512	del fp
				1513	del tfp
				1514
				1515	# raise exception if actual size does not match content-length header
				1516	if size >= 0 and read < size:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1517	raise ContentTooShortError(
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1518	"retrieval incomplete: got only %i out of %i bytes"
				1519	% (read, size), result)
				1520
				1521	return result
				1522
				1523	# Each method named open_<type> knows how to open that type of URL
				1524
				1525	def _open_generic_http(self, connection_factory, url, data):
				1526	"""Make an HTTP connection using connection_class.
				1527
				1528	This is an internal method that should be called from
				1529	open_http() or open_https().
				1530
				1531	Arguments:
				1532	- connection_factory should take a host name and return an
				1533	HTTPConnection instance.
				1534	- url is the url to retrieval or a host, relative-path pair.
				1535	- data is payload for a POST request or None.
				1536	"""
				1537
				1538	user_passwd = None
				1539	proxy_passwd= None
				1540	if isinstance(url, str):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1541	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1542	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1543	user_passwd, host = splituser(host)
				1544	host = unquote(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1545	realhost = host
				1546	else:
				1547	host, selector = url
				1548	# check whether the proxy contains authorization information
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1549	proxy_passwd, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1550	# now we proceed with the url we want to obtain
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1551	urltype, rest = splittype(selector)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1552	url = rest
				1553	user_passwd = None
				1554	if urltype.lower() != 'http':
				1555	realhost = None
				1556	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1557	realhost, rest = splithost(rest)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1558	if realhost:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1559	user_passwd, realhost = splituser(realhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1560	if user_passwd:
				1561	selector = "%s://%s%s" % (urltype, realhost, rest)
				1562	if proxy_bypass(realhost):
				1563	host = realhost
				1564
				1565	#print "proxy via http:", host, selector
				1566	if not host: raise IOError('http error', 'no host given')
				1567
				1568	if proxy_passwd:
				1569	import base64
				1570	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1571	else:
				1572	proxy_auth = None
				1573
				1574	if user_passwd:
				1575	import base64
				1576	auth = base64.b64encode(user_passwd).strip()
				1577	else:
				1578	auth = None
				1579	http_conn = connection_factory(host)
				1580	# XXX We should fix urllib so that it works with HTTP/1.1.
				1581	http_conn._http_vsn = 10
				1582	http_conn._http_vsn_str = "HTTP/1.0"
				1583
				1584	headers = {}
				1585	if proxy_auth:
				1586	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1587	if auth:
				1588	headers["Authorization"] = "Basic %s" % auth
				1589	if realhost:
				1590	headers["Host"] = realhost
				1591	for header, value in self.addheaders:
				1592	headers[header] = value
				1593
				1594	if data is not None:
				1595	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1596	http_conn.request("POST", selector, data, headers)
				1597	else:
				1598	http_conn.request("GET", selector, headers=headers)
				1599
				1600	try:
				1601	response = http_conn.getresponse()
				1602	except http.client.BadStatusLine:
				1603	# something went wrong with the HTTP status line
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1604	raise URLError("http protocol error: bad status line")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1605
				1606	# According to RFC 2616, "2xx" code indicates that the client's
				1607	# request was successfully received, understood, and accepted.
				1608	if 200 <= response.status < 300:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1609	return addinfourl(response.fp, response.msg, "http:" + url,
				1610	response.status)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1611	else:
				1612	return self.http_error(
				1613	url, response.fp,
				1614	response.status, response.reason, response.msg, data)
				1615
				1616	def open_http(self, url, data=None):
				1617	"""Use HTTP protocol."""
				1618	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1619
				1620	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1621	"""Handle http errors.
				1622
				1623	Derived class can override this, or provide specific handlers
				1624	named http_error_DDD where DDD is the 3-digit error code."""
				1625	# First check if there's a specific handler for this error
				1626	name = 'http_error_%d' % errcode
				1627	if hasattr(self, name):
				1628	method = getattr(self, name)
				1629	if data is None:
				1630	result = method(url, fp, errcode, errmsg, headers)
				1631	else:
				1632	result = method(url, fp, errcode, errmsg, headers, data)
				1633	if result: return result
				1634	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1635
				1636	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1637	"""Default error handler: close the connection and raise IOError."""
				1638	void = fp.read()
				1639	fp.close()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1640	raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1641
				1642	if _have_ssl:
				1643	def _https_connection(self, host):
				1644	return http.client.HTTPSConnection(host,
				1645	key_file=self.key_file,
				1646	cert_file=self.cert_file)
				1647
				1648	def open_https(self, url, data=None):
				1649	"""Use HTTPS protocol."""
				1650	return self._open_generic_http(self._https_connection, url, data)
				1651
				1652	def open_file(self, url):
				1653	"""Use local file or FTP depending on form of URL."""
				1654	if not isinstance(url, str):
				1655	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1656	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1657	return self.open_ftp(url)
				1658	else:
				1659	return self.open_local_file(url)
				1660
				1661	def open_local_file(self, url):
				1662	"""Use local file."""
				1663	import mimetypes, email.utils
				1664	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1665	host, file = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1666	localname = url2pathname(file)
				1667	try:
				1668	stats = os.stat(localname)
				1669	except OSError as e:
				1670	raise URLError(e.errno, e.strerror, e.filename)
				1671	size = stats.st_size
				1672	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1673	mtype = mimetypes.guess_type(url)[0]
				1674	headers = email.message_from_string(
				1675	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1676	(mtype or 'text/plain', size, modified))
				1677	if not host:
				1678	urlfile = file
				1679	if file[:1] == '/':
				1680	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1681	return addinfourl(open(localname, 'rb'), headers, urlfile)
				1682	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1683	if (not port
				1684	and socket.gethostbyname(host) in (localhost(), thishost())):
				1685	urlfile = file
				1686	if file[:1] == '/':
				1687	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1688	return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1689	raise URLError('local file error', 'not on local host')
				1690
				1691	def open_ftp(self, url):
				1692	"""Use FTP protocol."""
				1693	if not isinstance(url, str):
				1694	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1695	import mimetypes
				1696	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1697	host, path = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1698	if not host: raise URLError('ftp error', 'no host given')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1699	host, port = splitport(host)
				1700	user, host = splituser(host)
				1701	if user: user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1702	else: passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1703	host = unquote(host)
				1704	user = unquote(user or '')
				1705	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1706	host = socket.gethostbyname(host)
				1707	if not port:
				1708	import ftplib
				1709	port = ftplib.FTP_PORT
				1710	else:
				1711	port = int(port)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1712	path, attrs = splitattr(path)
				1713	path = unquote(path)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1714	dirs = path.split('/')
				1715	dirs, file = dirs[:-1], dirs[-1]
				1716	if dirs and not dirs[0]: dirs = dirs[1:]
				1717	if dirs and not dirs[0]: dirs[0] = '/'
				1718	key = user, host, port, '/'.join(dirs)
				1719	# XXX thread unsafe!
				1720	if len(self.ftpcache) > MAXFTPCACHE:
				1721	# Prune the cache, rather arbitrarily
				1722	for k in self.ftpcache.keys():
				1723	if k != key:
				1724	v = self.ftpcache[k]
				1725	del self.ftpcache[k]
				1726	v.close()
				1727	try:
				1728	if not key in self.ftpcache:
				1729	self.ftpcache[key] = \
				1730	ftpwrapper(user, passwd, host, port, dirs)
				1731	if not file: type = 'D'
				1732	else: type = 'I'
				1733	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1734	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1735	if attr.lower() == 'type' and \
				1736	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1737	type = value.upper()
				1738	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1739	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1740	headers = ""
				1741	if mtype:
				1742	headers += "Content-Type: %s\n" % mtype
				1743	if retrlen is not None and retrlen >= 0:
				1744	headers += "Content-Length: %d\n" % retrlen
				1745	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1746	return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1747	except ftperrors() as msg:
				1748	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1749
				1750	def open_data(self, url, data=None):
				1751	"""Use "data" URL."""
				1752	if not isinstance(url, str):
				1753	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1754	# ignore POSTed data
				1755	#
				1756	# syntax of data URLs:
				1757	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1758	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1759	# data := *urlchar
				1760	# parameter := attribute "=" value
				1761	try:
				1762	[type, data] = url.split(',', 1)
				1763	except ValueError:
				1764	raise IOError('data error', 'bad data URL')
				1765	if not type:
				1766	type = 'text/plain;charset=US-ASCII'
				1767	semi = type.rfind(';')
				1768	if semi >= 0 and '=' not in type[semi:]:
				1769	encoding = type[semi+1:]
				1770	type = type[:semi]
				1771	else:
				1772	encoding = ''
				1773	msg = []
				1774	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1775	time.gmtime(time.time())))
				1776	msg.append('Content-type: %s' % type)
				1777	if encoding == 'base64':
				1778	import base64
				1779	data = base64.decodestring(data)
				1780	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1781	data = unquote(data)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1782	msg.append('Content-Length: %d' % len(data))
				1783	msg.append('')
				1784	msg.append(data)
				1785	msg = '\n'.join(msg)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1786	headers = email.message_from_string(msg)
				1787	f = io.StringIO(msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1788	#f.fileno = None # needed for addinfourl
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1789	return addinfourl(f, headers, url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1790
				1791
				1792	class FancyURLopener(URLopener):
				1793	"""Derived class with handlers for errors we can handle (perhaps)."""
				1794
				1795	def __init__(self, args, *kwargs):
				1796	URLopener.__init__(self, args, *kwargs)
				1797	self.auth_cache = {}
				1798	self.tries = 0
				1799	self.maxtries = 10
				1800
				1801	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1802	"""Default error handling -- don't raise an exception."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1803	return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1804
				1805	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1806	"""Error 302 -- relocated (temporarily)."""
				1807	self.tries += 1
				1808	if self.maxtries and self.tries >= self.maxtries:
				1809	if hasattr(self, "http_error_500"):
				1810	meth = self.http_error_500
				1811	else:
				1812	meth = self.http_error_default
				1813	self.tries = 0
				1814	return meth(url, fp, 500,
				1815	"Internal Server Error: Redirect Recursion", headers)
				1816	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1817	data)
				1818	self.tries = 0
				1819	return result
				1820
				1821	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1822	if 'location' in headers:
				1823	newurl = headers['location']
				1824	elif 'uri' in headers:
				1825	newurl = headers['uri']
				1826	else:
				1827	return
				1828	void = fp.read()
				1829	fp.close()
				1830	# In case the server sent a relative URL, join with original:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1831	newurl = urljoin(self.type + ":" + url, newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1832	return self.open(newurl)
				1833
				1834	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1835	"""Error 301 -- also relocated (permanently)."""
				1836	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1837
				1838	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1839	"""Error 303 -- also relocated (essentially identical to 302)."""
				1840	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1841
				1842	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1843	"""Error 307 -- relocated, but turn POST into error."""
				1844	if data is None:
				1845	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1846	else:
				1847	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1848
				1849	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1850	"""Error 401 -- authentication required.
				1851	This function supports Basic authentication only."""
				1852	if not 'www-authenticate' in headers:
				1853	URLopener.http_error_default(self, url, fp,
				1854	errcode, errmsg, headers)
				1855	stuff = headers['www-authenticate']
				1856	import re
				1857	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1858	if not match:
				1859	URLopener.http_error_default(self, url, fp,
				1860	errcode, errmsg, headers)
				1861	scheme, realm = match.groups()
				1862	if scheme.lower() != 'basic':
				1863	URLopener.http_error_default(self, url, fp,
				1864	errcode, errmsg, headers)
				1865	name = 'retry_' + self.type + '_basic_auth'
				1866	if data is None:
				1867	return getattr(self,name)(url, realm)
				1868	else:
				1869	return getattr(self,name)(url, realm, data)
				1870
				1871	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1872	"""Error 407 -- proxy authentication required.
				1873	This function supports Basic authentication only."""
				1874	if not 'proxy-authenticate' in headers:
				1875	URLopener.http_error_default(self, url, fp,
				1876	errcode, errmsg, headers)
				1877	stuff = headers['proxy-authenticate']
				1878	import re
				1879	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1880	if not match:
				1881	URLopener.http_error_default(self, url, fp,
				1882	errcode, errmsg, headers)
				1883	scheme, realm = match.groups()
				1884	if scheme.lower() != 'basic':
				1885	URLopener.http_error_default(self, url, fp,
				1886	errcode, errmsg, headers)
				1887	name = 'retry_proxy_' + self.type + '_basic_auth'
				1888	if data is None:
				1889	return getattr(self,name)(url, realm)
				1890	else:
				1891	return getattr(self,name)(url, realm, data)
				1892
				1893	def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1894	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1895	newurl = 'http://' + host + selector
				1896	proxy = self.proxies['http']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1897	urltype, proxyhost = splittype(proxy)
				1898	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1899	i = proxyhost.find('@') + 1
				1900	proxyhost = proxyhost[i:]
				1901	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1902	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1903	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1904	quote(passwd, safe=''), proxyhost)
				1905	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1906	if data is None:
				1907	return self.open(newurl)
				1908	else:
				1909	return self.open(newurl, data)
				1910
				1911	def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1912	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1913	newurl = 'https://' + host + selector
				1914	proxy = self.proxies['https']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1915	urltype, proxyhost = splittype(proxy)
				1916	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1917	i = proxyhost.find('@') + 1
				1918	proxyhost = proxyhost[i:]
				1919	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1920	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1921	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1922	quote(passwd, safe=''), proxyhost)
				1923	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1924	if data is None:
				1925	return self.open(newurl)
				1926	else:
				1927	return self.open(newurl, data)
				1928
				1929	def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1930	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1931	i = host.find('@') + 1
				1932	host = host[i:]
				1933	user, passwd = self.get_user_passwd(host, realm, i)
				1934	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1935	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1936	quote(passwd, safe=''), host)
				1937	newurl = 'http://' + host + selector
				1938	if data is None:
				1939	return self.open(newurl)
				1940	else:
				1941	return self.open(newurl, data)
				1942
				1943	def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1944	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1945	i = host.find('@') + 1
				1946	host = host[i:]
				1947	user, passwd = self.get_user_passwd(host, realm, i)
				1948	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1949	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1950	quote(passwd, safe=''), host)
				1951	newurl = 'https://' + host + selector
				1952	if data is None:
				1953	return self.open(newurl)
				1954	else:
				1955	return self.open(newurl, data)
				1956
				1957	def get_user_passwd(self, host, realm, clear_cache = 0):
				1958	key = realm + '@' + host.lower()
				1959	if key in self.auth_cache:
				1960	if clear_cache:
				1961	del self.auth_cache[key]
				1962	else:
				1963	return self.auth_cache[key]
				1964	user, passwd = self.prompt_user_passwd(host, realm)
				1965	if user or passwd: self.auth_cache[key] = (user, passwd)
				1966	return user, passwd
				1967
				1968	def prompt_user_passwd(self, host, realm):
				1969	"""Override this in a GUI environment!"""
				1970	import getpass
				1971	try:
				1972	user = input("Enter username for %s at %s: " % (realm, host))
				1973	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1974	(user, realm, host))
				1975	return user, passwd
				1976	except KeyboardInterrupt:
				1977	print()
				1978	return None, None
				1979
				1980
				1981	# Utility functions
				1982
				1983	_localhost = None
				1984	def localhost():
				1985	"""Return the IP address of the magic hostname 'localhost'."""
				1986	global _localhost
				1987	if _localhost is None:
				1988	_localhost = socket.gethostbyname('localhost')
				1989	return _localhost
				1990
				1991	_thishost = None
				1992	def thishost():
				1993	"""Return the IP address of the current host."""
				1994	global _thishost
				1995	if _thishost is None:
				1996	_thishost = socket.gethostbyname(socket.gethostname())
				1997	return _thishost
				1998
				1999	_ftperrors = None
				2000	def ftperrors():
				2001	"""Return the set of errors raised by the FTP class."""
				2002	global _ftperrors
				2003	if _ftperrors is None:
				2004	import ftplib
				2005	_ftperrors = ftplib.all_errors
				2006	return _ftperrors
				2007
				2008	_noheaders = None
				2009	def noheaders():
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2010	"""Return an empty email Message object."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2011	global _noheaders
				2012	if _noheaders is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2013	_noheaders = email.message_from_string("")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2014	return _noheaders
				2015
				2016
				2017	# Utility classes
				2018
				2019	class ftpwrapper:
				2020	"""Class used by open_ftp() for cache of open FTP connections."""
				2021
				2022	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2023	self.user = user
				2024	self.passwd = passwd
				2025	self.host = host
				2026	self.port = port
				2027	self.dirs = dirs
				2028	self.timeout = timeout
				2029	self.init()
				2030
				2031	def init(self):
				2032	import ftplib
				2033	self.busy = 0
				2034	self.ftp = ftplib.FTP()
				2035	self.ftp.connect(self.host, self.port, self.timeout)
				2036	self.ftp.login(self.user, self.passwd)
				2037	for dir in self.dirs:
				2038	self.ftp.cwd(dir)
				2039
				2040	def retrfile(self, file, type):
				2041	import ftplib
				2042	self.endtransfer()
				2043	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2044	else: cmd = 'TYPE ' + type; isdir = 0
				2045	try:
				2046	self.ftp.voidcmd(cmd)
				2047	except ftplib.all_errors:
				2048	self.init()
				2049	self.ftp.voidcmd(cmd)
				2050	conn = None
				2051	if file and not isdir:
				2052	# Try to retrieve as a file
				2053	try:
				2054	cmd = 'RETR ' + file
				2055	conn = self.ftp.ntransfercmd(cmd)
				2056	except ftplib.error_perm as reason:
				2057	if str(reason)[:3] != '550':
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2058	raise URLError('ftp error', reason).with_traceback(
				2059	sys.exc_info()[2])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2060	if not conn:
				2061	# Set transfer mode to ASCII!
				2062	self.ftp.voidcmd('TYPE A')
				2063	# Try a directory listing. Verify that directory exists.
				2064	if file:
				2065	pwd = self.ftp.pwd()
				2066	try:
				2067	try:
				2068	self.ftp.cwd(file)
				2069	except ftplib.error_perm as reason:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2070	raise URLError('ftp error', reason) from reason
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2071	finally:
				2072	self.ftp.cwd(pwd)
				2073	cmd = 'LIST ' + file
				2074	else:
				2075	cmd = 'LIST'
				2076	conn = self.ftp.ntransfercmd(cmd)
				2077	self.busy = 1
				2078	# Pass back both a suitably decorated object and a retrieval length
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2079	return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2080	def endtransfer(self):
				2081	if not self.busy:
				2082	return
				2083	self.busy = 0
				2084	try:
				2085	self.ftp.voidresp()
				2086	except ftperrors():
				2087	pass
				2088
				2089	def close(self):
				2090	self.endtransfer()
				2091	try:
				2092	self.ftp.close()
				2093	except ftperrors():
				2094	pass
				2095
				2096	# Proxy handling
				2097	def getproxies_environment():
				2098	"""Return a dictionary of scheme -> proxy server URL mappings.
				2099
				2100	Scan the environment for variables named <scheme>_proxy;
				2101	this seems to be the standard convention. If you need a
				2102	different way, you can pass a proxies dictionary to the
				2103	[Fancy]URLopener constructor.
				2104
				2105	"""
				2106	proxies = {}
				2107	for name, value in os.environ.items():
				2108	name = name.lower()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2109	if value and name[-6:] == '_proxy':
				2110	proxies[name[:-6]] = value
				2111	return proxies
				2112
				2113	def proxy_bypass_environment(host):
				2114	"""Test if proxies should not be used for a particular host.
				2115
				2116	Checks the environment for a variable named no_proxy, which should
				2117	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2118	"""
				2119	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2120	# '*' is special case for always bypass
				2121	if no_proxy == '*':
				2122	return 1
				2123	# strip port off host
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2124	hostonly, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2125	# check if the host ends with any of the DNS suffixes
				2126	for name in no_proxy.split(','):
				2127	if name and (hostonly.endswith(name) or host.endswith(name)):
				2128	return 1
				2129	# otherwise, don't bypass
				2130	return 0
				2131
				2132
				2133	if sys.platform == 'darwin':
				2134	def getproxies_internetconfig():
				2135	"""Return a dictionary of scheme -> proxy server URL mappings.
				2136
				2137	By convention the mac uses Internet Config to store
				2138	proxies. An HTTP proxy, for instance, is stored under
				2139	the HttpProxy key.
				2140
				2141	"""
				2142	try:
				2143	import ic
				2144	except ImportError:
				2145	return {}
				2146
				2147	try:
				2148	config = ic.IC()
				2149	except ic.error:
				2150	return {}
				2151	proxies = {}
				2152	# HTTP:
				2153	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2154	try:
				2155	value = config['HTTPProxyHost']
				2156	except ic.error:
				2157	pass
				2158	else:
				2159	proxies['http'] = 'http://%s' % value
				2160	# FTP: XXX To be done.
				2161	# Gopher: XXX To be done.
				2162	return proxies
				2163
				2164	def proxy_bypass(host):
				2165	if getproxies_environment():
				2166	return proxy_bypass_environment(host)
				2167	else:
				2168	return 0
				2169
				2170	def getproxies():
				2171	return getproxies_environment() or getproxies_internetconfig()
				2172
				2173	elif os.name == 'nt':
				2174	def getproxies_registry():
				2175	"""Return a dictionary of scheme -> proxy server URL mappings.
				2176
				2177	Win32 uses the registry to store proxies.
				2178
				2179	"""
				2180	proxies = {}
				2181	try:
				2182	import _winreg
				2183	except ImportError:
				2184	# Std module, so should be around - but you never know!
				2185	return proxies
				2186	try:
				2187	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2188	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2189	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2190	'ProxyEnable')[0]
				2191	if proxyEnable:
				2192	# Returned as Unicode but problems if not converted to ASCII
				2193	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2194	'ProxyServer')[0])
				2195	if '=' in proxyServer:
				2196	# Per-protocol settings
				2197	for p in proxyServer.split(';'):
				2198	protocol, address = p.split('=', 1)
				2199	# See if address has a type:// prefix
				2200	import re
				2201	if not re.match('^([^/:]+)://', address):
				2202	address = '%s://%s' % (protocol, address)
				2203	proxies[protocol] = address
				2204	else:
				2205	# Use one setting for all protocols
				2206	if proxyServer[:5] == 'http:':
				2207	proxies['http'] = proxyServer
				2208	else:
				2209	proxies['http'] = 'http://%s' % proxyServer
				2210	proxies['ftp'] = 'ftp://%s' % proxyServer
				2211	internetSettings.Close()
				2212	except (WindowsError, ValueError, TypeError):
				2213	# Either registry key not found etc, or the value in an
				2214	# unexpected format.
				2215	# proxies already set up to be empty so nothing to do
				2216	pass
				2217	return proxies
				2218
				2219	def getproxies():
				2220	"""Return a dictionary of scheme -> proxy server URL mappings.
				2221
				2222	Returns settings gathered from the environment, if specified,
				2223	or the registry.
				2224
				2225	"""
				2226	return getproxies_environment() or getproxies_registry()
				2227
				2228	def proxy_bypass_registry(host):
				2229	try:
				2230	import _winreg
				2231	import re
				2232	except ImportError:
				2233	# Std modules, so should be around - but you never know!
				2234	return 0
				2235	try:
				2236	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2237	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2238	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2239	'ProxyEnable')[0]
				2240	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2241	'ProxyOverride')[0])
				2242	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2243	except WindowsError:
				2244	return 0
				2245	if not proxyEnable or not proxyOverride:
				2246	return 0
				2247	# try to make a host list from name and IP address.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2248	rawHost, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2249	host = [rawHost]
				2250	try:
				2251	addr = socket.gethostbyname(rawHost)
				2252	if addr != rawHost:
				2253	host.append(addr)
				2254	except socket.error:
				2255	pass
				2256	try:
				2257	fqdn = socket.getfqdn(rawHost)
				2258	if fqdn != rawHost:
				2259	host.append(fqdn)
				2260	except socket.error:
				2261	pass
				2262	# make a check value list from the registry entry: replace the
				2263	# '<local>' string by the localhost entry and the corresponding
				2264	# canonical entry.
				2265	proxyOverride = proxyOverride.split(';')
				2266	i = 0
				2267	while i < len(proxyOverride):
				2268	if proxyOverride[i] == '<local>':
				2269	proxyOverride[i:i+1] = ['localhost',
				2270	'127.0.0.1',
				2271	socket.gethostname(),
				2272	socket.gethostbyname(
				2273	socket.gethostname())]
				2274	i += 1
				2275	# print proxyOverride
				2276	# now check if we match one of the registry values.
				2277	for test in proxyOverride:
				2278	test = test.replace(".", r"\.") # mask dots
				2279	test = test.replace("", r".") # change glob sequence
				2280	test = test.replace("?", r".") # change glob char
				2281	for val in host:
				2282	# print "%s <--> %s" %( test, val )
				2283	if re.match(test, val, re.I):
				2284	return 1
				2285	return 0
				2286
				2287	def proxy_bypass(host):
				2288	"""Return a dictionary of scheme -> proxy server URL mappings.
				2289
				2290	Returns settings gathered from the environment, if specified,
				2291	or the registry.
				2292
				2293	"""
				2294	if getproxies_environment():
				2295	return proxy_bypass_environment(host)
				2296	else:
				2297	return proxy_bypass_registry(host)
				2298
				2299	else:
				2300	# By default use environment variables
				2301	getproxies = getproxies_environment
				2302	proxy_bypass = proxy_bypass_environment