Blame - Lib/urllib/request.py - platform/external/python/cpython2

blob: c789ffce63b9a2cb103d1c24b8b024aae5b19cb2 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	50	import urllib.request
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	51
				52	# set up authentication info
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	53	authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	59	proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	60
				61	# build a new opener that adds authentication and caching FTP handlers
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	62	opener = urllib.request.build_opener(proxy_support, authinfo,
				63	urllib.request.CacheFTPHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	64
				65	# install it
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	66	urllib.request.install_opener(opener)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	67
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	68	f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	69	"""
				70
				71	# XXX issues:
				72	# If an authentication error handler that tries to perform
				73	# authentication for some reason but fails, how should the error be
				74	# signalled? The client needs to know the HTTP error code. But if
				75	# the handler knows that the problem was, e.g., that it didn't know
				76	# that hash algo that requested in the challenge, it would be good to
				77	# pass that information along to the client, too.
				78	# ftp errors aren't handled cleanly
				79	# check digest against correct (i.e. non-apache) implementation
				80
				81	# Possible extensions:
				82	# complex proxies XXX not sure what exactly was meant by this
				83	# abstract factory for opener
				84
				85	import base64
				86	import email
				87	import hashlib
				88	import http.client
				89	import io
				90	import os
				91	import posixpath
				92	import random
				93	import re
				94	import socket
				95	import sys
				96	import time
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	97	import bisect
				98
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	99	from urllib.error import URLError, HTTPError, ContentTooShortError
				100	from urllib.parse import (
				101	urlparse, urlsplit, urljoin, unwrap, quote, unquote,
				102	splittype, splithost, splitport, splituser, splitpasswd,
Facundo Batista	f24802c	2008-08-17 03:36:03 +0000	[diff] [blame]	103	splitattr, splitquery, splitvalue, to_bytes, urlunparse)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	104	from urllib.response import addinfourl, addclosehook
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	105
				106	# check for SSL
				107	try:
				108	import ssl
				109	except:
				110	_have_ssl = False
				111	else:
				112	_have_ssl = True
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	113
				114	# used in User-Agent header sent
				115	__version__ = sys.version[:3]
				116
				117	_opener = None
				118	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				119	global _opener
				120	if _opener is None:
				121	_opener = build_opener()
				122	return _opener.open(url, data, timeout)
				123
				124	def install_opener(opener):
				125	global _opener
				126	_opener = opener
				127
				128	# TODO(jhylton): Make this work with the same global opener.
				129	_urlopener = None
				130	def urlretrieve(url, filename=None, reporthook=None, data=None):
				131	global _urlopener
				132	if not _urlopener:
				133	_urlopener = FancyURLopener()
				134	return _urlopener.retrieve(url, filename, reporthook, data)
				135
				136	def urlcleanup():
				137	if _urlopener:
				138	_urlopener.cleanup()
				139	global _opener
				140	if _opener:
				141	_opener = None
				142
				143	# copied from cookielib.py
Antoine Pitrou	fd03645	2008-08-19 17:56:33 +0000	[diff] [blame]	144	_cut_port_re = re.compile(r":\d+$", re.ASCII)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	145	def request_host(request):
				146	"""Return request-host, as defined by RFC 2965.
				147
				148	Variation from RFC: returned value is lowercased, for convenient
				149	comparison.
				150
				151	"""
				152	url = request.get_full_url()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	153	host = urlparse(url)[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	154	if host == "":
				155	host = request.get_header("Host", "")
				156
				157	# remove port, if present
				158	host = _cut_port_re.sub("", host, 1)
				159	return host.lower()
				160
				161	class Request:
				162
				163	def __init__(self, url, data=None, headers={},
				164	origin_req_host=None, unverifiable=False):
				165	# unwrap('<URL:type://host/path>') --> 'type://host/path'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	166	self.__original = unwrap(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	167	self.type = None
				168	# self.__r_type is what's left after doing the splittype
				169	self.host = None
				170	self.port = None
				171	self.data = data
				172	self.headers = {}
				173	for key, value in headers.items():
				174	self.add_header(key, value)
				175	self.unredirected_hdrs = {}
				176	if origin_req_host is None:
				177	origin_req_host = request_host(self)
				178	self.origin_req_host = origin_req_host
				179	self.unverifiable = unverifiable
				180
				181	def __getattr__(self, attr):
				182	# XXX this is a fallback mechanism to guard against these
				183	# methods getting called in a non-standard order. this may be
				184	# too complicated and/or unnecessary.
				185	# XXX should the __r_XXX attributes be public?
				186	if attr[:12] == '_Request__r_':
				187	name = attr[12:]
				188	if hasattr(Request, 'get_' + name):
				189	getattr(self, 'get_' + name)()
				190	return getattr(self, attr)
				191	raise AttributeError(attr)
				192
				193	def get_method(self):
				194	if self.has_data():
				195	return "POST"
				196	else:
				197	return "GET"
				198
				199	# XXX these helper methods are lame
				200
				201	def add_data(self, data):
				202	self.data = data
				203
				204	def has_data(self):
				205	return self.data is not None
				206
				207	def get_data(self):
				208	return self.data
				209
				210	def get_full_url(self):
				211	return self.__original
				212
				213	def get_type(self):
				214	if self.type is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	215	self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	216	if self.type is None:
				217	raise ValueError("unknown url type: %s" % self.__original)
				218	return self.type
				219
				220	def get_host(self):
				221	if self.host is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	222	self.host, self.__r_host = splithost(self.__r_type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	223	if self.host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	224	self.host = unquote(self.host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	225	return self.host
				226
				227	def get_selector(self):
				228	return self.__r_host
				229
				230	def set_proxy(self, host, type):
				231	self.host, self.type = host, type
				232	self.__r_host = self.__original
				233
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame]	234	def has_proxy(self):
				235	return self.__r_host == self.__original
				236
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	237	def get_origin_req_host(self):
				238	return self.origin_req_host
				239
				240	def is_unverifiable(self):
				241	return self.unverifiable
				242
				243	def add_header(self, key, val):
				244	# useful for something like authentication
				245	self.headers[key.capitalize()] = val
				246
				247	def add_unredirected_header(self, key, val):
				248	# will not be added to a redirected request
				249	self.unredirected_hdrs[key.capitalize()] = val
				250
				251	def has_header(self, header_name):
				252	return (header_name in self.headers or
				253	header_name in self.unredirected_hdrs)
				254
				255	def get_header(self, header_name, default=None):
				256	return self.headers.get(
				257	header_name,
				258	self.unredirected_hdrs.get(header_name, default))
				259
				260	def header_items(self):
				261	hdrs = self.unredirected_hdrs.copy()
				262	hdrs.update(self.headers)
				263	return list(hdrs.items())
				264
				265	class OpenerDirector:
				266	def __init__(self):
				267	client_version = "Python-urllib/%s" % __version__
				268	self.addheaders = [('User-agent', client_version)]
				269	# manage the individual handlers
				270	self.handlers = []
				271	self.handle_open = {}
				272	self.handle_error = {}
				273	self.process_response = {}
				274	self.process_request = {}
				275
				276	def add_handler(self, handler):
				277	if not hasattr(handler, "add_parent"):
				278	raise TypeError("expected BaseHandler instance, got %r" %
				279	type(handler))
				280
				281	added = False
				282	for meth in dir(handler):
				283	if meth in ["redirect_request", "do_open", "proxy_open"]:
				284	# oops, coincidental match
				285	continue
				286
				287	i = meth.find("_")
				288	protocol = meth[:i]
				289	condition = meth[i+1:]
				290
				291	if condition.startswith("error"):
				292	j = condition.find("_") + i + 1
				293	kind = meth[j+1:]
				294	try:
				295	kind = int(kind)
				296	except ValueError:
				297	pass
				298	lookup = self.handle_error.get(protocol, {})
				299	self.handle_error[protocol] = lookup
				300	elif condition == "open":
				301	kind = protocol
				302	lookup = self.handle_open
				303	elif condition == "response":
				304	kind = protocol
				305	lookup = self.process_response
				306	elif condition == "request":
				307	kind = protocol
				308	lookup = self.process_request
				309	else:
				310	continue
				311
				312	handlers = lookup.setdefault(kind, [])
				313	if handlers:
				314	bisect.insort(handlers, handler)
				315	else:
				316	handlers.append(handler)
				317	added = True
				318
				319	if added:
				320	# the handlers must work in an specific order, the order
				321	# is specified in a Handler attribute
				322	bisect.insort(self.handlers, handler)
				323	handler.add_parent(self)
				324
				325	def close(self):
				326	# Only exists for backwards compatibility.
				327	pass
				328
				329	def _call_chain(self, chain, kind, meth_name, *args):
				330	# Handlers raise an exception if no one else should try to handle
				331	# the request, or return None if they can't but another handler
				332	# could. Otherwise, they return the response.
				333	handlers = chain.get(kind, ())
				334	for handler in handlers:
				335	func = getattr(handler, meth_name)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	336	result = func(*args)
				337	if result is not None:
				338	return result
				339
				340	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				341	# accept a URL or a Request object
				342	if isinstance(fullurl, str):
				343	req = Request(fullurl, data)
				344	else:
				345	req = fullurl
				346	if data is not None:
				347	req.add_data(data)
				348
				349	req.timeout = timeout
				350	protocol = req.get_type()
				351
				352	# pre-process request
				353	meth_name = protocol+"_request"
				354	for processor in self.process_request.get(protocol, []):
				355	meth = getattr(processor, meth_name)
				356	req = meth(req)
				357
				358	response = self._open(req, data)
				359
				360	# post-process response
				361	meth_name = protocol+"_response"
				362	for processor in self.process_response.get(protocol, []):
				363	meth = getattr(processor, meth_name)
				364	response = meth(req, response)
				365
				366	return response
				367
				368	def _open(self, req, data=None):
				369	result = self._call_chain(self.handle_open, 'default',
				370	'default_open', req)
				371	if result:
				372	return result
				373
				374	protocol = req.get_type()
				375	result = self._call_chain(self.handle_open, protocol, protocol +
				376	'_open', req)
				377	if result:
				378	return result
				379
				380	return self._call_chain(self.handle_open, 'unknown',
				381	'unknown_open', req)
				382
				383	def error(self, proto, *args):
				384	if proto in ('http', 'https'):
				385	# XXX http[s] protocols are special-cased
				386	dict = self.handle_error['http'] # https is not different than http
				387	proto = args[2] # YUCK!
				388	meth_name = 'http_error_%s' % proto
				389	http_err = 1
				390	orig_args = args
				391	else:
				392	dict = self.handle_error
				393	meth_name = proto + '_error'
				394	http_err = 0
				395	args = (dict, proto, meth_name) + args
				396	result = self._call_chain(*args)
				397	if result:
				398	return result
				399
				400	if http_err:
				401	args = (dict, 'default', 'http_error_default') + orig_args
				402	return self._call_chain(*args)
				403
				404	# XXX probably also want an abstract factory that knows when it makes
				405	# sense to skip a superclass in favor of a subclass and when it might
				406	# make sense to include both
				407
				408	def build_opener(*handlers):
				409	"""Create an opener object from a list of handlers.
				410
				411	The opener will use several default handlers, including support
				412	for HTTP and FTP.
				413
				414	If any of the handlers passed as arguments are subclasses of the
				415	default handlers, the default handlers will not be used.
				416	"""
				417	def isclass(obj):
				418	return isinstance(obj, type) or hasattr(obj, "__bases__")
				419
				420	opener = OpenerDirector()
				421	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				422	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				423	FTPHandler, FileHandler, HTTPErrorProcessor]
				424	if hasattr(http.client, "HTTPSConnection"):
				425	default_classes.append(HTTPSHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	426	skip = set()
				427	for klass in default_classes:
				428	for check in handlers:
				429	if isclass(check):
				430	if issubclass(check, klass):
				431	skip.add(klass)
				432	elif isinstance(check, klass):
				433	skip.add(klass)
				434	for klass in skip:
				435	default_classes.remove(klass)
				436
				437	for klass in default_classes:
				438	opener.add_handler(klass())
				439
				440	for h in handlers:
				441	if isclass(h):
				442	h = h()
				443	opener.add_handler(h)
				444	return opener
				445
				446	class BaseHandler:
				447	handler_order = 500
				448
				449	def add_parent(self, parent):
				450	self.parent = parent
				451
				452	def close(self):
				453	# Only exists for backwards compatibility
				454	pass
				455
				456	def __lt__(self, other):
				457	if not hasattr(other, "handler_order"):
				458	# Try to preserve the old behavior of having custom classes
				459	# inserted after default ones (works only for custom user
				460	# classes which are not aware of handler_order).
				461	return True
				462	return self.handler_order < other.handler_order
				463
				464
				465	class HTTPErrorProcessor(BaseHandler):
				466	"""Process HTTP error responses."""
				467	handler_order = 1000 # after all other processing
				468
				469	def http_response(self, request, response):
				470	code, msg, hdrs = response.code, response.msg, response.info()
				471
				472	# According to RFC 2616, "2xx" code indicates that the client's
				473	# request was successfully received, understood, and accepted.
				474	if not (200 <= code < 300):
				475	response = self.parent.error(
				476	'http', request, response, code, msg, hdrs)
				477
				478	return response
				479
				480	https_response = http_response
				481
				482	class HTTPDefaultErrorHandler(BaseHandler):
				483	def http_error_default(self, req, fp, code, msg, hdrs):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	484	raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	485
				486	class HTTPRedirectHandler(BaseHandler):
				487	# maximum number of redirections to any single URL
				488	# this is needed because of the state that cookies introduce
				489	max_repeats = 4
				490	# maximum total number of redirections (regardless of URL) before
				491	# assuming we're in a loop
				492	max_redirections = 10
				493
				494	def redirect_request(self, req, fp, code, msg, headers, newurl):
				495	"""Return a Request or None in response to a redirect.
				496
				497	This is called by the http_error_30x methods when a
				498	redirection response is received. If a redirection should
				499	take place, return a new Request to allow http_error_30x to
				500	perform the redirect. Otherwise, raise HTTPError if no-one
				501	else should try to handle this url. Return None if you can't
				502	but another Handler might.
				503	"""
				504	m = req.get_method()
				505	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				506	or code in (301, 302, 303) and m == "POST")):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	507	raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	508
				509	# Strictly (according to RFC 2616), 301 or 302 in response to
				510	# a POST MUST NOT cause a redirection without confirmation
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	511	# from the user (of urllib.request, in this case). In practice,
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	512	# essentially all clients do redirect in this case, so we do
				513	# the same.
				514	# be conciliant with URIs containing a space
				515	newurl = newurl.replace(' ', '%20')
				516	CONTENT_HEADERS = ("content-length", "content-type")
				517	newheaders = dict((k, v) for k, v in req.headers.items()
				518	if k.lower() not in CONTENT_HEADERS)
				519	return Request(newurl,
				520	headers=newheaders,
				521	origin_req_host=req.get_origin_req_host(),
				522	unverifiable=True)
				523
				524	# Implementation note: To avoid the server sending us into an
				525	# infinite loop, the request object needs to track what URLs we
				526	# have already seen. Do this by adding a handler-specific
				527	# attribute to the Request object.
				528	def http_error_302(self, req, fp, code, msg, headers):
				529	# Some servers (incorrectly) return multiple Location headers
				530	# (so probably same goes for URI). Use first header.
				531	if "location" in headers:
				532	newurl = headers["location"]
				533	elif "uri" in headers:
				534	newurl = headers["uri"]
				535	else:
				536	return
Facundo Batista	f24802c	2008-08-17 03:36:03 +0000	[diff] [blame]	537
				538	# fix a possible malformed URL
				539	urlparts = urlparse(newurl)
				540	if not urlparts.path:
				541	urlparts = list(urlparts)
				542	urlparts[2] = "/"
				543	newurl = urlunparse(urlparts)
				544
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	545	newurl = urljoin(req.get_full_url(), newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	546
				547	# XXX Probably want to forget about the state of the current
				548	# request, although that might interact poorly with other
				549	# handlers that also use handler-specific request attributes
				550	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				551	if new is None:
				552	return
				553
				554	# loop detection
				555	# .redirect_dict has a key url if url was previously visited.
				556	if hasattr(req, 'redirect_dict'):
				557	visited = new.redirect_dict = req.redirect_dict
				558	if (visited.get(newurl, 0) >= self.max_repeats or
				559	len(visited) >= self.max_redirections):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	560	raise HTTPError(req.get_full_url(), code,
				561	self.inf_msg + msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	562	else:
				563	visited = new.redirect_dict = req.redirect_dict = {}
				564	visited[newurl] = visited.get(newurl, 0) + 1
				565
				566	# Don't close the fp until we are sure that we won't use it
				567	# with HTTPError.
				568	fp.read()
				569	fp.close()
				570
				571	return self.parent.open(new)
				572
				573	http_error_301 = http_error_303 = http_error_307 = http_error_302
				574
				575	inf_msg = "The HTTP server returned a redirect error that would " \
				576	"lead to an infinite loop.\n" \
				577	"The last 30x error message was:\n"
				578
				579
				580	def _parse_proxy(proxy):
				581	"""Return (scheme, user, password, host/port) given a URL or an authority.
				582
				583	If a URL is supplied, it must have an authority (host:port) component.
				584	According to RFC 3986, having an authority component means the URL must
				585	have two slashes after the scheme:
				586
				587	>>> _parse_proxy('file:/ftp.example.com/')
				588	Traceback (most recent call last):
				589	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				590
				591	The first three items of the returned tuple may be None.
				592
				593	Examples of authority parsing:
				594
				595	>>> _parse_proxy('proxy.example.com')
				596	(None, None, None, 'proxy.example.com')
				597	>>> _parse_proxy('proxy.example.com:3128')
				598	(None, None, None, 'proxy.example.com:3128')
				599
				600	The authority component may optionally include userinfo (assumed to be
				601	username:password):
				602
				603	>>> _parse_proxy('joe:password@proxy.example.com')
				604	(None, 'joe', 'password', 'proxy.example.com')
				605	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				606	(None, 'joe', 'password', 'proxy.example.com:3128')
				607
				608	Same examples, but with URLs instead:
				609
				610	>>> _parse_proxy('http://proxy.example.com/')
				611	('http', None, None, 'proxy.example.com')
				612	>>> _parse_proxy('http://proxy.example.com:3128/')
				613	('http', None, None, 'proxy.example.com:3128')
				614	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				615	('http', 'joe', 'password', 'proxy.example.com')
				616	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				617	('http', 'joe', 'password', 'proxy.example.com:3128')
				618
				619	Everything after the authority is ignored:
				620
				621	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				622	('ftp', 'joe', 'password', 'proxy.example.com')
				623
				624	Test for no trailing '/' case:
				625
				626	>>> _parse_proxy('http://joe:password@proxy.example.com')
				627	('http', 'joe', 'password', 'proxy.example.com')
				628
				629	"""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	630	scheme, r_scheme = splittype(proxy)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	631	if not r_scheme.startswith("/"):
				632	# authority
				633	scheme = None
				634	authority = proxy
				635	else:
				636	# URL
				637	if not r_scheme.startswith("//"):
				638	raise ValueError("proxy URL with no authority: %r" % proxy)
				639	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				640	# and 3.3.), path is empty or starts with '/'
				641	end = r_scheme.find("/", 2)
				642	if end == -1:
				643	end = None
				644	authority = r_scheme[2:end]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	645	userinfo, hostport = splituser(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	646	if userinfo is not None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	647	user, password = splitpasswd(userinfo)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	648	else:
				649	user = password = None
				650	return scheme, user, password, hostport
				651
				652	class ProxyHandler(BaseHandler):
				653	# Proxies must be in front
				654	handler_order = 100
				655
				656	def __init__(self, proxies=None):
				657	if proxies is None:
				658	proxies = getproxies()
				659	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				660	self.proxies = proxies
				661	for type, url in proxies.items():
				662	setattr(self, '%s_open' % type,
				663	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				664	meth(r, proxy, type))
				665
				666	def proxy_open(self, req, proxy, type):
				667	orig_type = req.get_type()
				668	proxy_type, user, password, hostport = _parse_proxy(proxy)
				669	if proxy_type is None:
				670	proxy_type = orig_type
				671	if user and password:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	672	user_pass = '%s:%s' % (unquote(user),
				673	unquote(password))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	674	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				675	req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	676	hostport = unquote(hostport)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	677	req.set_proxy(hostport, proxy_type)
				678	if orig_type == proxy_type:
				679	# let other handlers take care of it
				680	return None
				681	else:
				682	# need to start over, because the other handlers don't
				683	# grok the proxy's URL type
				684	# e.g. if we have a constructor arg proxies like so:
				685	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				686	# a request for http://acme.example.com/a into one for
				687	# ftp://proxy.example.com/a
				688	return self.parent.open(req)
				689
				690	class HTTPPasswordMgr:
				691
				692	def __init__(self):
				693	self.passwd = {}
				694
				695	def add_password(self, realm, uri, user, passwd):
				696	# uri could be a single URI or a sequence
				697	if isinstance(uri, str):
				698	uri = [uri]
				699	if not realm in self.passwd:
				700	self.passwd[realm] = {}
				701	for default_port in True, False:
				702	reduced_uri = tuple(
				703	[self.reduce_uri(u, default_port) for u in uri])
				704	self.passwd[realm][reduced_uri] = (user, passwd)
				705
				706	def find_user_password(self, realm, authuri):
				707	domains = self.passwd.get(realm, {})
				708	for default_port in True, False:
				709	reduced_authuri = self.reduce_uri(authuri, default_port)
				710	for uris, authinfo in domains.items():
				711	for uri in uris:
				712	if self.is_suburi(uri, reduced_authuri):
				713	return authinfo
				714	return None, None
				715
				716	def reduce_uri(self, uri, default_port=True):
				717	"""Accept authority or URI and extract only the authority and path."""
				718	# note HTTP URLs do not have a userinfo component
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	719	parts = urlsplit(uri)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	720	if parts[1]:
				721	# URI
				722	scheme = parts[0]
				723	authority = parts[1]
				724	path = parts[2] or '/'
				725	else:
				726	# host or host:port
				727	scheme = None
				728	authority = uri
				729	path = '/'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	730	host, port = splitport(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	731	if default_port and port is None and scheme is not None:
				732	dport = {"http": 80,
				733	"https": 443,
				734	}.get(scheme)
				735	if dport is not None:
				736	authority = "%s:%d" % (host, dport)
				737	return authority, path
				738
				739	def is_suburi(self, base, test):
				740	"""Check if test is below base in a URI tree
				741
				742	Both args must be URIs in reduced form.
				743	"""
				744	if base == test:
				745	return True
				746	if base[0] != test[0]:
				747	return False
				748	common = posixpath.commonprefix((base[1], test[1]))
				749	if len(common) == len(base[1]):
				750	return True
				751	return False
				752
				753
				754	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				755
				756	def find_user_password(self, realm, authuri):
				757	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				758	authuri)
				759	if user is not None:
				760	return user, password
				761	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				762
				763
				764	class AbstractBasicAuthHandler:
				765
				766	# XXX this allows for multiple auth-schemes, but will stupidly pick
				767	# the last one with a realm specified.
				768
				769	# allow for double- and single-quoted realm values
				770	# (single quotes are a violation of the RFC, but appear in the wild)
				771	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				772	'realm=(["\'])(.*?)\\2', re.I)
				773
				774	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				775	# end of section 2, and section 1.2 immediately after "credentials"
				776	# production).
				777
				778	def __init__(self, password_mgr=None):
				779	if password_mgr is None:
				780	password_mgr = HTTPPasswordMgr()
				781	self.passwd = password_mgr
				782	self.add_password = self.passwd.add_password
				783
				784	def http_error_auth_reqed(self, authreq, host, req, headers):
				785	# host may be an authority (without userinfo) or a URL with an
				786	# authority
				787	# XXX could be multiple headers
				788	authreq = headers.get(authreq, None)
				789	if authreq:
				790	mo = AbstractBasicAuthHandler.rx.search(authreq)
				791	if mo:
				792	scheme, quote, realm = mo.groups()
				793	if scheme.lower() == 'basic':
				794	return self.retry_http_basic_auth(host, req, realm)
				795
				796	def retry_http_basic_auth(self, host, req, realm):
				797	user, pw = self.passwd.find_user_password(realm, host)
				798	if pw is not None:
				799	raw = "%s:%s" % (user, pw)
				800	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				801	if req.headers.get(self.auth_header, None) == auth:
				802	return None
				803	req.add_header(self.auth_header, auth)
				804	return self.parent.open(req)
				805	else:
				806	return None
				807
				808
				809	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				810
				811	auth_header = 'Authorization'
				812
				813	def http_error_401(self, req, fp, code, msg, headers):
				814	url = req.get_full_url()
				815	return self.http_error_auth_reqed('www-authenticate',
				816	url, req, headers)
				817
				818
				819	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				820
				821	auth_header = 'Proxy-authorization'
				822
				823	def http_error_407(self, req, fp, code, msg, headers):
				824	# http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	825	# authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	826	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				827	# userinfo.
				828	authority = req.get_host()
				829	return self.http_error_auth_reqed('proxy-authenticate',
				830	authority, req, headers)
				831
				832
				833	def randombytes(n):
				834	"""Return n random bytes."""
				835	return os.urandom(n)
				836
				837	class AbstractDigestAuthHandler:
				838	# Digest authentication is specified in RFC 2617.
				839
				840	# XXX The client does not inspect the Authentication-Info header
				841	# in a successful response.
				842
				843	# XXX It should be possible to test this implementation against
				844	# a mock server that just generates a static set of challenges.
				845
				846	# XXX qop="auth-int" supports is shaky
				847
				848	def __init__(self, passwd=None):
				849	if passwd is None:
				850	passwd = HTTPPasswordMgr()
				851	self.passwd = passwd
				852	self.add_password = self.passwd.add_password
				853	self.retried = 0
				854	self.nonce_count = 0
				855
				856	def reset_retry_count(self):
				857	self.retried = 0
				858
				859	def http_error_auth_reqed(self, auth_header, host, req, headers):
				860	authreq = headers.get(auth_header, None)
				861	if self.retried > 5:
				862	# Don't fail endlessly - if we failed once, we'll probably
				863	# fail a second time. Hm. Unless the Password Manager is
				864	# prompting for the information. Crap. This isn't great
				865	# but it's better than the current 'repeat until recursion
				866	# depth exceeded' approach <wink>
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	867	raise HTTPError(req.get_full_url(), 401, "digest auth failed",
				868	headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	869	else:
				870	self.retried += 1
				871	if authreq:
				872	scheme = authreq.split()[0]
				873	if scheme.lower() == 'digest':
				874	return self.retry_http_digest_auth(req, authreq)
				875
				876	def retry_http_digest_auth(self, req, auth):
				877	token, challenge = auth.split(' ', 1)
				878	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				879	auth = self.get_authorization(req, chal)
				880	if auth:
				881	auth_val = 'Digest %s' % auth
				882	if req.headers.get(self.auth_header, None) == auth_val:
				883	return None
				884	req.add_unredirected_header(self.auth_header, auth_val)
				885	resp = self.parent.open(req)
				886	return resp
				887
				888	def get_cnonce(self, nonce):
				889	# The cnonce-value is an opaque
				890	# quoted string value provided by the client and used by both client
				891	# and server to avoid chosen plaintext attacks, to provide mutual
				892	# authentication, and to provide some message integrity protection.
				893	# This isn't a fabulous effort, but it's probably Good Enough.
				894	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				895	b = s.encode("ascii") + randombytes(8)
				896	dig = hashlib.sha1(b).hexdigest()
				897	return dig[:16]
				898
				899	def get_authorization(self, req, chal):
				900	try:
				901	realm = chal['realm']
				902	nonce = chal['nonce']
				903	qop = chal.get('qop')
				904	algorithm = chal.get('algorithm', 'MD5')
				905	# mod_digest doesn't send an opaque, even though it isn't
				906	# supposed to be optional
				907	opaque = chal.get('opaque', None)
				908	except KeyError:
				909	return None
				910
				911	H, KD = self.get_algorithm_impls(algorithm)
				912	if H is None:
				913	return None
				914
				915	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				916	if user is None:
				917	return None
				918
				919	# XXX not implemented yet
				920	if req.has_data():
				921	entdig = self.get_entity_digest(req.get_data(), chal)
				922	else:
				923	entdig = None
				924
				925	A1 = "%s:%s:%s" % (user, realm, pw)
				926	A2 = "%s:%s" % (req.get_method(),
				927	# XXX selector: what about proxies and full urls
				928	req.get_selector())
				929	if qop == 'auth':
				930	self.nonce_count += 1
				931	ncvalue = '%08x' % self.nonce_count
				932	cnonce = self.get_cnonce(nonce)
				933	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				934	respdig = KD(H(A1), noncebit)
				935	elif qop is None:
				936	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				937	else:
				938	# XXX handle auth-int.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	939	raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	940
				941	# XXX should the partial digests be encoded too?
				942
				943	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				944	'response="%s"' % (user, realm, nonce, req.get_selector(),
				945	respdig)
				946	if opaque:
				947	base += ', opaque="%s"' % opaque
				948	if entdig:
				949	base += ', digest="%s"' % entdig
				950	base += ', algorithm="%s"' % algorithm
				951	if qop:
				952	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				953	return base
				954
				955	def get_algorithm_impls(self, algorithm):
				956	# lambdas assume digest modules are imported at the top level
				957	if algorithm == 'MD5':
				958	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				959	elif algorithm == 'SHA':
				960	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				961	# XXX MD5-sess
				962	KD = lambda s, d: H("%s:%s" % (s, d))
				963	return H, KD
				964
				965	def get_entity_digest(self, data, chal):
				966	# XXX not implemented yet
				967	return None
				968
				969
				970	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				971	"""An authentication protocol defined by RFC 2069
				972
				973	Digest authentication improves on basic authentication because it
				974	does not transmit passwords in the clear.
				975	"""
				976
				977	auth_header = 'Authorization'
				978	handler_order = 490 # before Basic auth
				979
				980	def http_error_401(self, req, fp, code, msg, headers):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	981	host = urlparse(req.get_full_url())[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	982	retry = self.http_error_auth_reqed('www-authenticate',
				983	host, req, headers)
				984	self.reset_retry_count()
				985	return retry
				986
				987
				988	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				989
				990	auth_header = 'Proxy-Authorization'
				991	handler_order = 490 # before Basic auth
				992
				993	def http_error_407(self, req, fp, code, msg, headers):
				994	host = req.get_host()
				995	retry = self.http_error_auth_reqed('proxy-authenticate',
				996	host, req, headers)
				997	self.reset_retry_count()
				998	return retry
				999
				1000	class AbstractHTTPHandler(BaseHandler):
				1001
				1002	def __init__(self, debuglevel=0):
				1003	self._debuglevel = debuglevel
				1004
				1005	def set_http_debuglevel(self, level):
				1006	self._debuglevel = level
				1007
				1008	def do_request_(self, request):
				1009	host = request.get_host()
				1010	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1011	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1012
				1013	if request.has_data(): # POST
				1014	data = request.get_data()
				1015	if not request.has_header('Content-type'):
				1016	request.add_unredirected_header(
				1017	'Content-type',
				1018	'application/x-www-form-urlencoded')
				1019	if not request.has_header('Content-length'):
				1020	request.add_unredirected_header(
				1021	'Content-length', '%d' % len(data))
				1022
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame]	1023	sel_host = host
				1024	if request.has_proxy():
				1025	scheme, sel = splittype(request.get_selector())
				1026	sel_host, sel_path = splithost(sel)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1027	if not request.has_header('Host'):
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame]	1028	request.add_unredirected_header('Host', sel_host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1029	for name, value in self.parent.addheaders:
				1030	name = name.capitalize()
				1031	if not request.has_header(name):
				1032	request.add_unredirected_header(name, value)
				1033
				1034	return request
				1035
				1036	def do_open(self, http_class, req):
				1037	"""Return an addinfourl object for the request, using http_class.
				1038
				1039	http_class must implement the HTTPConnection API from http.client.
				1040	The addinfourl return value is a file-like object. It also
				1041	has methods and attributes including:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1042	- info(): return a email Message object for the headers
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1043	- geturl(): return the original request URL
				1044	- code: HTTP status code
				1045	"""
				1046	host = req.get_host()
				1047	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1048	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1049
				1050	h = http_class(host, timeout=req.timeout) # will parse host:port
				1051	headers = dict(req.headers)
				1052	headers.update(req.unredirected_hdrs)
				1053
				1054	# TODO(jhylton): Should this be redesigned to handle
				1055	# persistent connections?
				1056
				1057	# We want to make an HTTP/1.1 request, but the addinfourl
				1058	# class isn't prepared to deal with a persistent connection.
				1059	# It will try to read all remaining data from the socket,
				1060	# which will block while the server waits for the next request.
				1061	# So make sure the connection gets closed after the (only)
				1062	# request.
				1063	headers["Connection"] = "close"
				1064	headers = dict(
				1065	(name.title(), val) for name, val in headers.items())
				1066	try:
				1067	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1068	r = h.getresponse()
				1069	except socket.error as err: # XXX what error?
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1070	raise URLError(err)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1071
Antoine Pitrou	b353c12	2009-02-11 00:39:14 +0000	[diff] [blame]	1072	## resp = addinfourl(r.fp, r.msg, req.get_full_url())
				1073	resp = addinfourl(r, r.msg, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1074	resp.code = r.status
				1075	resp.msg = r.reason
				1076	return resp
				1077
				1078
				1079	class HTTPHandler(AbstractHTTPHandler):
				1080
				1081	def http_open(self, req):
				1082	return self.do_open(http.client.HTTPConnection, req)
				1083
				1084	http_request = AbstractHTTPHandler.do_request_
				1085
				1086	if hasattr(http.client, 'HTTPSConnection'):
				1087	class HTTPSHandler(AbstractHTTPHandler):
				1088
				1089	def https_open(self, req):
				1090	return self.do_open(http.client.HTTPSConnection, req)
				1091
				1092	https_request = AbstractHTTPHandler.do_request_
				1093
				1094	class HTTPCookieProcessor(BaseHandler):
				1095	def __init__(self, cookiejar=None):
				1096	import http.cookiejar
				1097	if cookiejar is None:
				1098	cookiejar = http.cookiejar.CookieJar()
				1099	self.cookiejar = cookiejar
				1100
				1101	def http_request(self, request):
				1102	self.cookiejar.add_cookie_header(request)
				1103	return request
				1104
				1105	def http_response(self, request, response):
				1106	self.cookiejar.extract_cookies(response, request)
				1107	return response
				1108
				1109	https_request = http_request
				1110	https_response = http_response
				1111
				1112	class UnknownHandler(BaseHandler):
				1113	def unknown_open(self, req):
				1114	type = req.get_type()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1115	raise URLError('unknown url type: %s' % type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1116
				1117	def parse_keqv_list(l):
				1118	"""Parse list of key=value strings where keys are not duplicated."""
				1119	parsed = {}
				1120	for elt in l:
				1121	k, v = elt.split('=', 1)
				1122	if v[0] == '"' and v[-1] == '"':
				1123	v = v[1:-1]
				1124	parsed[k] = v
				1125	return parsed
				1126
				1127	def parse_http_list(s):
				1128	"""Parse lists as described by RFC 2068 Section 2.
				1129
				1130	In particular, parse comma-separated lists where the elements of
				1131	the list may include quoted-strings. A quoted-string could
				1132	contain a comma. A non-quoted string could have quotes in the
				1133	middle. Neither commas nor quotes count if they are escaped.
				1134	Only double-quotes count, not single-quotes.
				1135	"""
				1136	res = []
				1137	part = ''
				1138
				1139	escape = quote = False
				1140	for cur in s:
				1141	if escape:
				1142	part += cur
				1143	escape = False
				1144	continue
				1145	if quote:
				1146	if cur == '\\':
				1147	escape = True
				1148	continue
				1149	elif cur == '"':
				1150	quote = False
				1151	part += cur
				1152	continue
				1153
				1154	if cur == ',':
				1155	res.append(part)
				1156	part = ''
				1157	continue
				1158
				1159	if cur == '"':
				1160	quote = True
				1161
				1162	part += cur
				1163
				1164	# append last part
				1165	if part:
				1166	res.append(part)
				1167
				1168	return [part.strip() for part in res]
				1169
				1170	class FileHandler(BaseHandler):
				1171	# Use local file or FTP depending on form of URL
				1172	def file_open(self, req):
				1173	url = req.get_selector()
				1174	if url[:2] == '//' and url[2:3] != '/':
				1175	req.type = 'ftp'
				1176	return self.parent.open(req)
				1177	else:
				1178	return self.open_local_file(req)
				1179
				1180	# names for the localhost
				1181	names = None
				1182	def get_names(self):
				1183	if FileHandler.names is None:
				1184	try:
				1185	FileHandler.names = (socket.gethostbyname('localhost'),
				1186	socket.gethostbyname(socket.gethostname()))
				1187	except socket.gaierror:
				1188	FileHandler.names = (socket.gethostbyname('localhost'),)
				1189	return FileHandler.names
				1190
				1191	# not entirely sure what the rules are here
				1192	def open_local_file(self, req):
				1193	import email.utils
				1194	import mimetypes
				1195	host = req.get_host()
				1196	file = req.get_selector()
				1197	localfile = url2pathname(file)
				1198	try:
				1199	stats = os.stat(localfile)
				1200	size = stats.st_size
				1201	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1202	mtype = mimetypes.guess_type(file)[0]
				1203	headers = email.message_from_string(
				1204	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1205	(mtype or 'text/plain', size, modified))
				1206	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1207	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1208	if not host or \
				1209	(not port and _safe_gethostbyname(host) in self.get_names()):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1210	return addinfourl(open(localfile, 'rb'), headers, 'file:'+file)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1211	except OSError as msg:
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	1212	# users shouldn't expect OSErrors coming from urlopen()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1213	raise URLError(msg)
				1214	raise URLError('file not on local host')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1215
				1216	def _safe_gethostbyname(host):
				1217	try:
				1218	return socket.gethostbyname(host)
				1219	except socket.gaierror:
				1220	return None
				1221
				1222	class FTPHandler(BaseHandler):
				1223	def ftp_open(self, req):
				1224	import ftplib
				1225	import mimetypes
				1226	host = req.get_host()
				1227	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1228	raise URLError('ftp error: no host given')
				1229	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1230	if port is None:
				1231	port = ftplib.FTP_PORT
				1232	else:
				1233	port = int(port)
				1234
				1235	# username/password handling
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1236	user, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1237	if user:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1238	user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1239	else:
				1240	passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1241	host = unquote(host)
				1242	user = unquote(user or '')
				1243	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1244
				1245	try:
				1246	host = socket.gethostbyname(host)
				1247	except socket.error as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1248	raise URLError(msg)
				1249	path, attrs = splitattr(req.get_selector())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1250	dirs = path.split('/')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1251	dirs = list(map(unquote, dirs))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1252	dirs, file = dirs[:-1], dirs[-1]
				1253	if dirs and not dirs[0]:
				1254	dirs = dirs[1:]
				1255	try:
				1256	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1257	type = file and 'I' or 'D'
				1258	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1259	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1260	if attr.lower() == 'type' and \
				1261	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1262	type = value.upper()
				1263	fp, retrlen = fw.retrfile(file, type)
				1264	headers = ""
				1265	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1266	if mtype:
				1267	headers += "Content-type: %s\n" % mtype
				1268	if retrlen is not None and retrlen >= 0:
				1269	headers += "Content-length: %d\n" % retrlen
				1270	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1271	return addinfourl(fp, headers, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1272	except ftplib.all_errors as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1273	exc = URLError('ftp error: %s' % msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1274	raise exc.with_traceback(sys.exc_info()[2])
				1275
				1276	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1277	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1278	return fw
				1279
				1280	class CacheFTPHandler(FTPHandler):
				1281	# XXX would be nice to have pluggable cache strategies
				1282	# XXX this stuff is definitely not thread safe
				1283	def __init__(self):
				1284	self.cache = {}
				1285	self.timeout = {}
				1286	self.soonest = 0
				1287	self.delay = 60
				1288	self.max_conns = 16
				1289
				1290	def setTimeout(self, t):
				1291	self.delay = t
				1292
				1293	def setMaxConns(self, m):
				1294	self.max_conns = m
				1295
				1296	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1297	key = user, host, port, '/'.join(dirs), timeout
				1298	if key in self.cache:
				1299	self.timeout[key] = time.time() + self.delay
				1300	else:
				1301	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1302	dirs, timeout)
				1303	self.timeout[key] = time.time() + self.delay
				1304	self.check_cache()
				1305	return self.cache[key]
				1306
				1307	def check_cache(self):
				1308	# first check for old ones
				1309	t = time.time()
				1310	if self.soonest <= t:
				1311	for k, v in list(self.timeout.items()):
				1312	if v < t:
				1313	self.cache[k].close()
				1314	del self.cache[k]
				1315	del self.timeout[k]
				1316	self.soonest = min(list(self.timeout.values()))
				1317
				1318	# then check the size
				1319	if len(self.cache) == self.max_conns:
				1320	for k, v in list(self.timeout.items()):
				1321	if v == self.soonest:
				1322	del self.cache[k]
				1323	del self.timeout[k]
				1324	break
				1325	self.soonest = min(list(self.timeout.values()))
				1326
				1327	# Code move from the old urllib module
				1328
				1329	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1330
				1331	# Helper for non-unix systems
				1332	if os.name == 'mac':
				1333	from macurl2path import url2pathname, pathname2url
				1334	elif os.name == 'nt':
				1335	from nturl2path import url2pathname, pathname2url
				1336	else:
				1337	def url2pathname(pathname):
				1338	"""OS-specific conversion from a relative URL of the 'file' scheme
				1339	to a file system path; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1340	return unquote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1341
				1342	def pathname2url(pathname):
				1343	"""OS-specific conversion from a file system path to a relative URL
				1344	of the 'file' scheme; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1345	return quote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1346
				1347	# This really consists of two pieces:
				1348	# (1) a class which handles opening of all sorts of URLs
				1349	# (plus assorted utilities etc.)
				1350	# (2) a set of functions for parsing URLs
				1351	# XXX Should these be separated out into different modules?
				1352
				1353
				1354	ftpcache = {}
				1355	class URLopener:
				1356	"""Class to open URLs.
				1357	This is a class rather than just a subroutine because we may need
				1358	more than one set of global protocol-specific options.
				1359	Note -- this is a base class for those who don't want the
				1360	automatic handling of errors type 302 (relocated) and 401
				1361	(authorization needed)."""
				1362
				1363	__tempfiles = None
				1364
				1365	version = "Python-urllib/%s" % __version__
				1366
				1367	# Constructor
				1368	def __init__(self, proxies=None, **x509):
				1369	if proxies is None:
				1370	proxies = getproxies()
				1371	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1372	self.proxies = proxies
				1373	self.key_file = x509.get('key_file')
				1374	self.cert_file = x509.get('cert_file')
				1375	self.addheaders = [('User-Agent', self.version)]
				1376	self.__tempfiles = []
				1377	self.__unlink = os.unlink # See cleanup()
				1378	self.tempcache = None
				1379	# Undocumented feature: if you assign {} to tempcache,
				1380	# it is used to cache files retrieved with
				1381	# self.retrieve(). This is not enabled by default
				1382	# since it does not work for changing documents (and I
				1383	# haven't got the logic to check expiration headers
				1384	# yet).
				1385	self.ftpcache = ftpcache
				1386	# Undocumented feature: you can use a different
				1387	# ftp cache by assigning to the .ftpcache member;
				1388	# in case you want logically independent URL openers
				1389	# XXX This is not threadsafe. Bah.
				1390
				1391	def __del__(self):
				1392	self.close()
				1393
				1394	def close(self):
				1395	self.cleanup()
				1396
				1397	def cleanup(self):
				1398	# This code sometimes runs when the rest of this module
				1399	# has already been deleted, so it can't use any globals
				1400	# or import anything.
				1401	if self.__tempfiles:
				1402	for file in self.__tempfiles:
				1403	try:
				1404	self.__unlink(file)
				1405	except OSError:
				1406	pass
				1407	del self.__tempfiles[:]
				1408	if self.tempcache:
				1409	self.tempcache.clear()
				1410
				1411	def addheader(self, *args):
				1412	"""Add a header to be used by the HTTP interface only
				1413	e.g. u.addheader('Accept', 'sound/basic')"""
				1414	self.addheaders.append(args)
				1415
				1416	# External interface
				1417	def open(self, fullurl, data=None):
				1418	"""Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1419	fullurl = unwrap(to_bytes(fullurl))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1420	if self.tempcache and fullurl in self.tempcache:
				1421	filename, headers = self.tempcache[fullurl]
				1422	fp = open(filename, 'rb')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1423	return addinfourl(fp, headers, fullurl)
				1424	urltype, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1425	if not urltype:
				1426	urltype = 'file'
				1427	if urltype in self.proxies:
				1428	proxy = self.proxies[urltype]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1429	urltype, proxyhost = splittype(proxy)
				1430	host, selector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1431	url = (host, fullurl) # Signal special case to open_*()
				1432	else:
				1433	proxy = None
				1434	name = 'open_' + urltype
				1435	self.type = urltype
				1436	name = name.replace('-', '_')
				1437	if not hasattr(self, name):
				1438	if proxy:
				1439	return self.open_unknown_proxy(proxy, fullurl, data)
				1440	else:
				1441	return self.open_unknown(fullurl, data)
				1442	try:
				1443	if data is None:
				1444	return getattr(self, name)(url)
				1445	else:
				1446	return getattr(self, name)(url, data)
				1447	except socket.error as msg:
				1448	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1449
				1450	def open_unknown(self, fullurl, data=None):
				1451	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1452	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1453	raise IOError('url error', 'unknown url type', type)
				1454
				1455	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1456	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1457	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1458	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1459
				1460	# External interface
				1461	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1462	"""retrieve(url) returns (filename, headers) for a local object
				1463	or (tempfilename, headers) for a remote object."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1464	url = unwrap(to_bytes(url))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1465	if self.tempcache and url in self.tempcache:
				1466	return self.tempcache[url]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1467	type, url1 = splittype(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1468	if filename is None and (not type or type == 'file'):
				1469	try:
				1470	fp = self.open_local_file(url1)
				1471	hdrs = fp.info()
				1472	del fp
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1473	return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1474	except IOError as msg:
				1475	pass
				1476	fp = self.open(url, data)
Benjamin Peterson	5f28b7b	2009-03-26 21:49:58 +0000	[diff] [blame^]	1477	try:
				1478	headers = fp.info()
				1479	if filename:
				1480	tfp = open(filename, 'wb')
				1481	else:
				1482	import tempfile
				1483	garbage, path = splittype(url)
				1484	garbage, path = splithost(path or "")
				1485	path, garbage = splitquery(path or "")
				1486	path, garbage = splitattr(path or "")
				1487	suffix = os.path.splitext(path)[1]
				1488	(fd, filename) = tempfile.mkstemp(suffix)
				1489	self.__tempfiles.append(filename)
				1490	tfp = os.fdopen(fd, 'wb')
				1491	try:
				1492	result = filename, headers
				1493	if self.tempcache is not None:
				1494	self.tempcache[url] = result
				1495	bs = 1024*8
				1496	size = -1
				1497	read = 0
				1498	blocknum = 0
				1499	if reporthook:
				1500	if "content-length" in headers:
				1501	size = int(headers["Content-Length"])
				1502	reporthook(blocknum, bs, size)
				1503	while 1:
				1504	block = fp.read(bs)
				1505	if not block:
				1506	break
				1507	read += len(block)
				1508	tfp.write(block)
				1509	blocknum += 1
				1510	if reporthook:
				1511	reporthook(blocknum, bs, size)
				1512	finally:
				1513	tfp.close()
				1514	finally:
				1515	fp.close()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1516	del fp
				1517	del tfp
				1518
				1519	# raise exception if actual size does not match content-length header
				1520	if size >= 0 and read < size:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1521	raise ContentTooShortError(
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1522	"retrieval incomplete: got only %i out of %i bytes"
				1523	% (read, size), result)
				1524
				1525	return result
				1526
				1527	# Each method named open_<type> knows how to open that type of URL
				1528
				1529	def _open_generic_http(self, connection_factory, url, data):
				1530	"""Make an HTTP connection using connection_class.
				1531
				1532	This is an internal method that should be called from
				1533	open_http() or open_https().
				1534
				1535	Arguments:
				1536	- connection_factory should take a host name and return an
				1537	HTTPConnection instance.
				1538	- url is the url to retrieval or a host, relative-path pair.
				1539	- data is payload for a POST request or None.
				1540	"""
				1541
				1542	user_passwd = None
				1543	proxy_passwd= None
				1544	if isinstance(url, str):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1545	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1546	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1547	user_passwd, host = splituser(host)
				1548	host = unquote(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1549	realhost = host
				1550	else:
				1551	host, selector = url
				1552	# check whether the proxy contains authorization information
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1553	proxy_passwd, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1554	# now we proceed with the url we want to obtain
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1555	urltype, rest = splittype(selector)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1556	url = rest
				1557	user_passwd = None
				1558	if urltype.lower() != 'http':
				1559	realhost = None
				1560	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1561	realhost, rest = splithost(rest)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1562	if realhost:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1563	user_passwd, realhost = splituser(realhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1564	if user_passwd:
				1565	selector = "%s://%s%s" % (urltype, realhost, rest)
				1566	if proxy_bypass(realhost):
				1567	host = realhost
				1568
				1569	#print "proxy via http:", host, selector
				1570	if not host: raise IOError('http error', 'no host given')
				1571
				1572	if proxy_passwd:
				1573	import base64
				1574	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1575	else:
				1576	proxy_auth = None
				1577
				1578	if user_passwd:
				1579	import base64
				1580	auth = base64.b64encode(user_passwd).strip()
				1581	else:
				1582	auth = None
				1583	http_conn = connection_factory(host)
				1584	# XXX We should fix urllib so that it works with HTTP/1.1.
				1585	http_conn._http_vsn = 10
				1586	http_conn._http_vsn_str = "HTTP/1.0"
				1587
				1588	headers = {}
				1589	if proxy_auth:
				1590	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1591	if auth:
				1592	headers["Authorization"] = "Basic %s" % auth
				1593	if realhost:
				1594	headers["Host"] = realhost
				1595	for header, value in self.addheaders:
				1596	headers[header] = value
				1597
				1598	if data is not None:
				1599	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1600	http_conn.request("POST", selector, data, headers)
				1601	else:
				1602	http_conn.request("GET", selector, headers=headers)
				1603
				1604	try:
				1605	response = http_conn.getresponse()
				1606	except http.client.BadStatusLine:
				1607	# something went wrong with the HTTP status line
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1608	raise URLError("http protocol error: bad status line")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1609
				1610	# According to RFC 2616, "2xx" code indicates that the client's
				1611	# request was successfully received, understood, and accepted.
				1612	if 200 <= response.status < 300:
Antoine Pitrou	b353c12	2009-02-11 00:39:14 +0000	[diff] [blame]	1613	return addinfourl(response, response.msg, "http:" + url,
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1614	response.status)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1615	else:
				1616	return self.http_error(
				1617	url, response.fp,
				1618	response.status, response.reason, response.msg, data)
				1619
				1620	def open_http(self, url, data=None):
				1621	"""Use HTTP protocol."""
				1622	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1623
				1624	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1625	"""Handle http errors.
				1626
				1627	Derived class can override this, or provide specific handlers
				1628	named http_error_DDD where DDD is the 3-digit error code."""
				1629	# First check if there's a specific handler for this error
				1630	name = 'http_error_%d' % errcode
				1631	if hasattr(self, name):
				1632	method = getattr(self, name)
				1633	if data is None:
				1634	result = method(url, fp, errcode, errmsg, headers)
				1635	else:
				1636	result = method(url, fp, errcode, errmsg, headers, data)
				1637	if result: return result
				1638	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1639
				1640	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1641	"""Default error handler: close the connection and raise IOError."""
				1642	void = fp.read()
				1643	fp.close()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1644	raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1645
				1646	if _have_ssl:
				1647	def _https_connection(self, host):
				1648	return http.client.HTTPSConnection(host,
				1649	key_file=self.key_file,
				1650	cert_file=self.cert_file)
				1651
				1652	def open_https(self, url, data=None):
				1653	"""Use HTTPS protocol."""
				1654	return self._open_generic_http(self._https_connection, url, data)
				1655
				1656	def open_file(self, url):
				1657	"""Use local file or FTP depending on form of URL."""
				1658	if not isinstance(url, str):
				1659	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1660	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1661	return self.open_ftp(url)
				1662	else:
				1663	return self.open_local_file(url)
				1664
				1665	def open_local_file(self, url):
				1666	"""Use local file."""
				1667	import mimetypes, email.utils
				1668	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1669	host, file = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1670	localname = url2pathname(file)
				1671	try:
				1672	stats = os.stat(localname)
				1673	except OSError as e:
				1674	raise URLError(e.errno, e.strerror, e.filename)
				1675	size = stats.st_size
				1676	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1677	mtype = mimetypes.guess_type(url)[0]
				1678	headers = email.message_from_string(
				1679	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1680	(mtype or 'text/plain', size, modified))
				1681	if not host:
				1682	urlfile = file
				1683	if file[:1] == '/':
				1684	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1685	return addinfourl(open(localname, 'rb'), headers, urlfile)
				1686	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1687	if (not port
				1688	and socket.gethostbyname(host) in (localhost(), thishost())):
				1689	urlfile = file
				1690	if file[:1] == '/':
				1691	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1692	return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1693	raise URLError('local file error', 'not on local host')
				1694
				1695	def open_ftp(self, url):
				1696	"""Use FTP protocol."""
				1697	if not isinstance(url, str):
				1698	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1699	import mimetypes
				1700	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1701	host, path = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1702	if not host: raise URLError('ftp error', 'no host given')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1703	host, port = splitport(host)
				1704	user, host = splituser(host)
				1705	if user: user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1706	else: passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1707	host = unquote(host)
				1708	user = unquote(user or '')
				1709	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1710	host = socket.gethostbyname(host)
				1711	if not port:
				1712	import ftplib
				1713	port = ftplib.FTP_PORT
				1714	else:
				1715	port = int(port)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1716	path, attrs = splitattr(path)
				1717	path = unquote(path)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1718	dirs = path.split('/')
				1719	dirs, file = dirs[:-1], dirs[-1]
				1720	if dirs and not dirs[0]: dirs = dirs[1:]
				1721	if dirs and not dirs[0]: dirs[0] = '/'
				1722	key = user, host, port, '/'.join(dirs)
				1723	# XXX thread unsafe!
				1724	if len(self.ftpcache) > MAXFTPCACHE:
				1725	# Prune the cache, rather arbitrarily
				1726	for k in self.ftpcache.keys():
				1727	if k != key:
				1728	v = self.ftpcache[k]
				1729	del self.ftpcache[k]
				1730	v.close()
				1731	try:
				1732	if not key in self.ftpcache:
				1733	self.ftpcache[key] = \
				1734	ftpwrapper(user, passwd, host, port, dirs)
				1735	if not file: type = 'D'
				1736	else: type = 'I'
				1737	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1738	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1739	if attr.lower() == 'type' and \
				1740	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1741	type = value.upper()
				1742	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1743	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1744	headers = ""
				1745	if mtype:
				1746	headers += "Content-Type: %s\n" % mtype
				1747	if retrlen is not None and retrlen >= 0:
				1748	headers += "Content-Length: %d\n" % retrlen
				1749	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1750	return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1751	except ftperrors() as msg:
				1752	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1753
				1754	def open_data(self, url, data=None):
				1755	"""Use "data" URL."""
				1756	if not isinstance(url, str):
				1757	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1758	# ignore POSTed data
				1759	#
				1760	# syntax of data URLs:
				1761	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1762	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1763	# data := *urlchar
				1764	# parameter := attribute "=" value
				1765	try:
				1766	[type, data] = url.split(',', 1)
				1767	except ValueError:
				1768	raise IOError('data error', 'bad data URL')
				1769	if not type:
				1770	type = 'text/plain;charset=US-ASCII'
				1771	semi = type.rfind(';')
				1772	if semi >= 0 and '=' not in type[semi:]:
				1773	encoding = type[semi+1:]
				1774	type = type[:semi]
				1775	else:
				1776	encoding = ''
				1777	msg = []
				1778	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1779	time.gmtime(time.time())))
				1780	msg.append('Content-type: %s' % type)
				1781	if encoding == 'base64':
				1782	import base64
				1783	data = base64.decodestring(data)
				1784	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1785	data = unquote(data)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1786	msg.append('Content-Length: %d' % len(data))
				1787	msg.append('')
				1788	msg.append(data)
				1789	msg = '\n'.join(msg)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1790	headers = email.message_from_string(msg)
				1791	f = io.StringIO(msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1792	#f.fileno = None # needed for addinfourl
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1793	return addinfourl(f, headers, url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1794
				1795
				1796	class FancyURLopener(URLopener):
				1797	"""Derived class with handlers for errors we can handle (perhaps)."""
				1798
				1799	def __init__(self, args, *kwargs):
				1800	URLopener.__init__(self, args, *kwargs)
				1801	self.auth_cache = {}
				1802	self.tries = 0
				1803	self.maxtries = 10
				1804
				1805	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1806	"""Default error handling -- don't raise an exception."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1807	return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1808
				1809	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1810	"""Error 302 -- relocated (temporarily)."""
				1811	self.tries += 1
				1812	if self.maxtries and self.tries >= self.maxtries:
				1813	if hasattr(self, "http_error_500"):
				1814	meth = self.http_error_500
				1815	else:
				1816	meth = self.http_error_default
				1817	self.tries = 0
				1818	return meth(url, fp, 500,
				1819	"Internal Server Error: Redirect Recursion", headers)
				1820	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1821	data)
				1822	self.tries = 0
				1823	return result
				1824
				1825	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1826	if 'location' in headers:
				1827	newurl = headers['location']
				1828	elif 'uri' in headers:
				1829	newurl = headers['uri']
				1830	else:
				1831	return
				1832	void = fp.read()
				1833	fp.close()
				1834	# In case the server sent a relative URL, join with original:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1835	newurl = urljoin(self.type + ":" + url, newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1836	return self.open(newurl)
				1837
				1838	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1839	"""Error 301 -- also relocated (permanently)."""
				1840	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1841
				1842	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1843	"""Error 303 -- also relocated (essentially identical to 302)."""
				1844	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1845
				1846	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1847	"""Error 307 -- relocated, but turn POST into error."""
				1848	if data is None:
				1849	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1850	else:
				1851	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1852
				1853	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1854	"""Error 401 -- authentication required.
				1855	This function supports Basic authentication only."""
				1856	if not 'www-authenticate' in headers:
				1857	URLopener.http_error_default(self, url, fp,
				1858	errcode, errmsg, headers)
				1859	stuff = headers['www-authenticate']
				1860	import re
				1861	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1862	if not match:
				1863	URLopener.http_error_default(self, url, fp,
				1864	errcode, errmsg, headers)
				1865	scheme, realm = match.groups()
				1866	if scheme.lower() != 'basic':
				1867	URLopener.http_error_default(self, url, fp,
				1868	errcode, errmsg, headers)
				1869	name = 'retry_' + self.type + '_basic_auth'
				1870	if data is None:
				1871	return getattr(self,name)(url, realm)
				1872	else:
				1873	return getattr(self,name)(url, realm, data)
				1874
				1875	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1876	"""Error 407 -- proxy authentication required.
				1877	This function supports Basic authentication only."""
				1878	if not 'proxy-authenticate' in headers:
				1879	URLopener.http_error_default(self, url, fp,
				1880	errcode, errmsg, headers)
				1881	stuff = headers['proxy-authenticate']
				1882	import re
				1883	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1884	if not match:
				1885	URLopener.http_error_default(self, url, fp,
				1886	errcode, errmsg, headers)
				1887	scheme, realm = match.groups()
				1888	if scheme.lower() != 'basic':
				1889	URLopener.http_error_default(self, url, fp,
				1890	errcode, errmsg, headers)
				1891	name = 'retry_proxy_' + self.type + '_basic_auth'
				1892	if data is None:
				1893	return getattr(self,name)(url, realm)
				1894	else:
				1895	return getattr(self,name)(url, realm, data)
				1896
				1897	def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1898	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1899	newurl = 'http://' + host + selector
				1900	proxy = self.proxies['http']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1901	urltype, proxyhost = splittype(proxy)
				1902	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1903	i = proxyhost.find('@') + 1
				1904	proxyhost = proxyhost[i:]
				1905	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1906	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1907	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1908	quote(passwd, safe=''), proxyhost)
				1909	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1910	if data is None:
				1911	return self.open(newurl)
				1912	else:
				1913	return self.open(newurl, data)
				1914
				1915	def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1916	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1917	newurl = 'https://' + host + selector
				1918	proxy = self.proxies['https']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1919	urltype, proxyhost = splittype(proxy)
				1920	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1921	i = proxyhost.find('@') + 1
				1922	proxyhost = proxyhost[i:]
				1923	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1924	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1925	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1926	quote(passwd, safe=''), proxyhost)
				1927	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1928	if data is None:
				1929	return self.open(newurl)
				1930	else:
				1931	return self.open(newurl, data)
				1932
				1933	def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1934	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1935	i = host.find('@') + 1
				1936	host = host[i:]
				1937	user, passwd = self.get_user_passwd(host, realm, i)
				1938	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1939	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1940	quote(passwd, safe=''), host)
				1941	newurl = 'http://' + host + selector
				1942	if data is None:
				1943	return self.open(newurl)
				1944	else:
				1945	return self.open(newurl, data)
				1946
				1947	def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1948	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1949	i = host.find('@') + 1
				1950	host = host[i:]
				1951	user, passwd = self.get_user_passwd(host, realm, i)
				1952	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1953	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1954	quote(passwd, safe=''), host)
				1955	newurl = 'https://' + host + selector
				1956	if data is None:
				1957	return self.open(newurl)
				1958	else:
				1959	return self.open(newurl, data)
				1960
				1961	def get_user_passwd(self, host, realm, clear_cache = 0):
				1962	key = realm + '@' + host.lower()
				1963	if key in self.auth_cache:
				1964	if clear_cache:
				1965	del self.auth_cache[key]
				1966	else:
				1967	return self.auth_cache[key]
				1968	user, passwd = self.prompt_user_passwd(host, realm)
				1969	if user or passwd: self.auth_cache[key] = (user, passwd)
				1970	return user, passwd
				1971
				1972	def prompt_user_passwd(self, host, realm):
				1973	"""Override this in a GUI environment!"""
				1974	import getpass
				1975	try:
				1976	user = input("Enter username for %s at %s: " % (realm, host))
				1977	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1978	(user, realm, host))
				1979	return user, passwd
				1980	except KeyboardInterrupt:
				1981	print()
				1982	return None, None
				1983
				1984
				1985	# Utility functions
				1986
				1987	_localhost = None
				1988	def localhost():
				1989	"""Return the IP address of the magic hostname 'localhost'."""
				1990	global _localhost
				1991	if _localhost is None:
				1992	_localhost = socket.gethostbyname('localhost')
				1993	return _localhost
				1994
				1995	_thishost = None
				1996	def thishost():
				1997	"""Return the IP address of the current host."""
				1998	global _thishost
				1999	if _thishost is None:
				2000	_thishost = socket.gethostbyname(socket.gethostname())
				2001	return _thishost
				2002
				2003	_ftperrors = None
				2004	def ftperrors():
				2005	"""Return the set of errors raised by the FTP class."""
				2006	global _ftperrors
				2007	if _ftperrors is None:
				2008	import ftplib
				2009	_ftperrors = ftplib.all_errors
				2010	return _ftperrors
				2011
				2012	_noheaders = None
				2013	def noheaders():
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2014	"""Return an empty email Message object."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2015	global _noheaders
				2016	if _noheaders is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2017	_noheaders = email.message_from_string("")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2018	return _noheaders
				2019
				2020
				2021	# Utility classes
				2022
				2023	class ftpwrapper:
				2024	"""Class used by open_ftp() for cache of open FTP connections."""
				2025
				2026	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2027	self.user = user
				2028	self.passwd = passwd
				2029	self.host = host
				2030	self.port = port
				2031	self.dirs = dirs
				2032	self.timeout = timeout
				2033	self.init()
				2034
				2035	def init(self):
				2036	import ftplib
				2037	self.busy = 0
				2038	self.ftp = ftplib.FTP()
				2039	self.ftp.connect(self.host, self.port, self.timeout)
				2040	self.ftp.login(self.user, self.passwd)
				2041	for dir in self.dirs:
				2042	self.ftp.cwd(dir)
				2043
				2044	def retrfile(self, file, type):
				2045	import ftplib
				2046	self.endtransfer()
				2047	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2048	else: cmd = 'TYPE ' + type; isdir = 0
				2049	try:
				2050	self.ftp.voidcmd(cmd)
				2051	except ftplib.all_errors:
				2052	self.init()
				2053	self.ftp.voidcmd(cmd)
				2054	conn = None
				2055	if file and not isdir:
				2056	# Try to retrieve as a file
				2057	try:
				2058	cmd = 'RETR ' + file
				2059	conn = self.ftp.ntransfercmd(cmd)
				2060	except ftplib.error_perm as reason:
				2061	if str(reason)[:3] != '550':
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2062	raise URLError('ftp error', reason).with_traceback(
				2063	sys.exc_info()[2])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2064	if not conn:
				2065	# Set transfer mode to ASCII!
				2066	self.ftp.voidcmd('TYPE A')
				2067	# Try a directory listing. Verify that directory exists.
				2068	if file:
				2069	pwd = self.ftp.pwd()
				2070	try:
				2071	try:
				2072	self.ftp.cwd(file)
				2073	except ftplib.error_perm as reason:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2074	raise URLError('ftp error', reason) from reason
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2075	finally:
				2076	self.ftp.cwd(pwd)
				2077	cmd = 'LIST ' + file
				2078	else:
				2079	cmd = 'LIST'
				2080	conn = self.ftp.ntransfercmd(cmd)
				2081	self.busy = 1
				2082	# Pass back both a suitably decorated object and a retrieval length
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2083	return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2084	def endtransfer(self):
				2085	if not self.busy:
				2086	return
				2087	self.busy = 0
				2088	try:
				2089	self.ftp.voidresp()
				2090	except ftperrors():
				2091	pass
				2092
				2093	def close(self):
				2094	self.endtransfer()
				2095	try:
				2096	self.ftp.close()
				2097	except ftperrors():
				2098	pass
				2099
				2100	# Proxy handling
				2101	def getproxies_environment():
				2102	"""Return a dictionary of scheme -> proxy server URL mappings.
				2103
				2104	Scan the environment for variables named <scheme>_proxy;
				2105	this seems to be the standard convention. If you need a
				2106	different way, you can pass a proxies dictionary to the
				2107	[Fancy]URLopener constructor.
				2108
				2109	"""
				2110	proxies = {}
				2111	for name, value in os.environ.items():
				2112	name = name.lower()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2113	if value and name[-6:] == '_proxy':
				2114	proxies[name[:-6]] = value
				2115	return proxies
				2116
				2117	def proxy_bypass_environment(host):
				2118	"""Test if proxies should not be used for a particular host.
				2119
				2120	Checks the environment for a variable named no_proxy, which should
				2121	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2122	"""
				2123	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2124	# '*' is special case for always bypass
				2125	if no_proxy == '*':
				2126	return 1
				2127	# strip port off host
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2128	hostonly, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2129	# check if the host ends with any of the DNS suffixes
				2130	for name in no_proxy.split(','):
				2131	if name and (hostonly.endswith(name) or host.endswith(name)):
				2132	return 1
				2133	# otherwise, don't bypass
				2134	return 0
				2135
				2136
				2137	if sys.platform == 'darwin':
				2138	def getproxies_internetconfig():
				2139	"""Return a dictionary of scheme -> proxy server URL mappings.
				2140
				2141	By convention the mac uses Internet Config to store
				2142	proxies. An HTTP proxy, for instance, is stored under
				2143	the HttpProxy key.
				2144
				2145	"""
				2146	try:
				2147	import ic
				2148	except ImportError:
				2149	return {}
				2150
				2151	try:
				2152	config = ic.IC()
				2153	except ic.error:
				2154	return {}
				2155	proxies = {}
				2156	# HTTP:
				2157	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2158	try:
				2159	value = config['HTTPProxyHost']
				2160	except ic.error:
				2161	pass
				2162	else:
				2163	proxies['http'] = 'http://%s' % value
				2164	# FTP: XXX To be done.
				2165	# Gopher: XXX To be done.
				2166	return proxies
				2167
				2168	def proxy_bypass(host):
				2169	if getproxies_environment():
				2170	return proxy_bypass_environment(host)
				2171	else:
				2172	return 0
				2173
				2174	def getproxies():
				2175	return getproxies_environment() or getproxies_internetconfig()
				2176
				2177	elif os.name == 'nt':
				2178	def getproxies_registry():
				2179	"""Return a dictionary of scheme -> proxy server URL mappings.
				2180
				2181	Win32 uses the registry to store proxies.
				2182
				2183	"""
				2184	proxies = {}
				2185	try:
				2186	import _winreg
				2187	except ImportError:
				2188	# Std module, so should be around - but you never know!
				2189	return proxies
				2190	try:
				2191	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2192	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2193	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2194	'ProxyEnable')[0]
				2195	if proxyEnable:
				2196	# Returned as Unicode but problems if not converted to ASCII
				2197	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2198	'ProxyServer')[0])
				2199	if '=' in proxyServer:
				2200	# Per-protocol settings
				2201	for p in proxyServer.split(';'):
				2202	protocol, address = p.split('=', 1)
				2203	# See if address has a type:// prefix
				2204	import re
				2205	if not re.match('^([^/:]+)://', address):
				2206	address = '%s://%s' % (protocol, address)
				2207	proxies[protocol] = address
				2208	else:
				2209	# Use one setting for all protocols
				2210	if proxyServer[:5] == 'http:':
				2211	proxies['http'] = proxyServer
				2212	else:
				2213	proxies['http'] = 'http://%s' % proxyServer
				2214	proxies['ftp'] = 'ftp://%s' % proxyServer
				2215	internetSettings.Close()
				2216	except (WindowsError, ValueError, TypeError):
				2217	# Either registry key not found etc, or the value in an
				2218	# unexpected format.
				2219	# proxies already set up to be empty so nothing to do
				2220	pass
				2221	return proxies
				2222
				2223	def getproxies():
				2224	"""Return a dictionary of scheme -> proxy server URL mappings.
				2225
				2226	Returns settings gathered from the environment, if specified,
				2227	or the registry.
				2228
				2229	"""
				2230	return getproxies_environment() or getproxies_registry()
				2231
				2232	def proxy_bypass_registry(host):
				2233	try:
				2234	import _winreg
				2235	import re
				2236	except ImportError:
				2237	# Std modules, so should be around - but you never know!
				2238	return 0
				2239	try:
				2240	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2241	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2242	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2243	'ProxyEnable')[0]
				2244	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2245	'ProxyOverride')[0])
				2246	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2247	except WindowsError:
				2248	return 0
				2249	if not proxyEnable or not proxyOverride:
				2250	return 0
				2251	# try to make a host list from name and IP address.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2252	rawHost, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2253	host = [rawHost]
				2254	try:
				2255	addr = socket.gethostbyname(rawHost)
				2256	if addr != rawHost:
				2257	host.append(addr)
				2258	except socket.error:
				2259	pass
				2260	try:
				2261	fqdn = socket.getfqdn(rawHost)
				2262	if fqdn != rawHost:
				2263	host.append(fqdn)
				2264	except socket.error:
				2265	pass
				2266	# make a check value list from the registry entry: replace the
				2267	# '<local>' string by the localhost entry and the corresponding
				2268	# canonical entry.
				2269	proxyOverride = proxyOverride.split(';')
				2270	i = 0
				2271	while i < len(proxyOverride):
				2272	if proxyOverride[i] == '<local>':
				2273	proxyOverride[i:i+1] = ['localhost',
				2274	'127.0.0.1',
				2275	socket.gethostname(),
				2276	socket.gethostbyname(
				2277	socket.gethostname())]
				2278	i += 1
				2279	# print proxyOverride
				2280	# now check if we match one of the registry values.
				2281	for test in proxyOverride:
				2282	test = test.replace(".", r"\.") # mask dots
				2283	test = test.replace("", r".") # change glob sequence
				2284	test = test.replace("?", r".") # change glob char
				2285	for val in host:
				2286	# print "%s <--> %s" %( test, val )
				2287	if re.match(test, val, re.I):
				2288	return 1
				2289	return 0
				2290
				2291	def proxy_bypass(host):
				2292	"""Return a dictionary of scheme -> proxy server URL mappings.
				2293
				2294	Returns settings gathered from the environment, if specified,
				2295	or the registry.
				2296
				2297	"""
				2298	if getproxies_environment():
				2299	return proxy_bypass_environment(host)
				2300	else:
				2301	return proxy_bypass_registry(host)
				2302
				2303	else:
				2304	# By default use environment variables
				2305	getproxies = getproxies_environment
				2306	proxy_bypass = proxy_bypass_environment