Blame - Lib/urllib/request.py - platform/external/python/cpython2

blob: c050ed85c295608d907cc797bf13e1d2fbb1c3d8 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	50	import urllib.request
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	51
				52	# set up authentication info
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	53	authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	59	proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	60
				61	# build a new opener that adds authentication and caching FTP handlers
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	62	opener = urllib.request.build_opener(proxy_support, authinfo,
				63	urllib.request.CacheFTPHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	64
				65	# install it
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	66	urllib.request.install_opener(opener)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	67
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	68	f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	69	"""
				70
				71	# XXX issues:
				72	# If an authentication error handler that tries to perform
				73	# authentication for some reason but fails, how should the error be
				74	# signalled? The client needs to know the HTTP error code. But if
				75	# the handler knows that the problem was, e.g., that it didn't know
				76	# that hash algo that requested in the challenge, it would be good to
				77	# pass that information along to the client, too.
				78	# ftp errors aren't handled cleanly
				79	# check digest against correct (i.e. non-apache) implementation
				80
				81	# Possible extensions:
				82	# complex proxies XXX not sure what exactly was meant by this
				83	# abstract factory for opener
				84
				85	import base64
				86	import email
				87	import hashlib
				88	import http.client
				89	import io
				90	import os
				91	import posixpath
				92	import random
				93	import re
				94	import socket
				95	import sys
				96	import time
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	97	import bisect
				98
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	99	from urllib.error import URLError, HTTPError, ContentTooShortError
				100	from urllib.parse import (
				101	urlparse, urlsplit, urljoin, unwrap, quote, unquote,
				102	splittype, splithost, splitport, splituser, splitpasswd,
				103	splitattr, splitquery, splitvalue, to_bytes)
				104	from urllib.response import addinfourl, addclosehook
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	105
				106	# check for SSL
				107	try:
				108	import ssl
				109	except:
				110	_have_ssl = False
				111	else:
				112	_have_ssl = True
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	113
				114	# used in User-Agent header sent
				115	__version__ = sys.version[:3]
				116
				117	_opener = None
				118	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				119	global _opener
				120	if _opener is None:
				121	_opener = build_opener()
				122	return _opener.open(url, data, timeout)
				123
				124	def install_opener(opener):
				125	global _opener
				126	_opener = opener
				127
				128	# TODO(jhylton): Make this work with the same global opener.
				129	_urlopener = None
				130	def urlretrieve(url, filename=None, reporthook=None, data=None):
				131	global _urlopener
				132	if not _urlopener:
				133	_urlopener = FancyURLopener()
				134	return _urlopener.retrieve(url, filename, reporthook, data)
				135
				136	def urlcleanup():
				137	if _urlopener:
				138	_urlopener.cleanup()
				139	global _opener
				140	if _opener:
				141	_opener = None
				142
				143	# copied from cookielib.py
				144	_cut_port_re = re.compile(r":\d+$")
				145	def request_host(request):
				146	"""Return request-host, as defined by RFC 2965.
				147
				148	Variation from RFC: returned value is lowercased, for convenient
				149	comparison.
				150
				151	"""
				152	url = request.get_full_url()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	153	host = urlparse(url)[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	154	if host == "":
				155	host = request.get_header("Host", "")
				156
				157	# remove port, if present
				158	host = _cut_port_re.sub("", host, 1)
				159	return host.lower()
				160
				161	class Request:
				162
				163	def __init__(self, url, data=None, headers={},
				164	origin_req_host=None, unverifiable=False):
				165	# unwrap('<URL:type://host/path>') --> 'type://host/path'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	166	self.__original = unwrap(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	167	self.type = None
				168	# self.__r_type is what's left after doing the splittype
				169	self.host = None
				170	self.port = None
				171	self.data = data
				172	self.headers = {}
				173	for key, value in headers.items():
				174	self.add_header(key, value)
				175	self.unredirected_hdrs = {}
				176	if origin_req_host is None:
				177	origin_req_host = request_host(self)
				178	self.origin_req_host = origin_req_host
				179	self.unverifiable = unverifiable
				180
				181	def __getattr__(self, attr):
				182	# XXX this is a fallback mechanism to guard against these
				183	# methods getting called in a non-standard order. this may be
				184	# too complicated and/or unnecessary.
				185	# XXX should the __r_XXX attributes be public?
				186	if attr[:12] == '_Request__r_':
				187	name = attr[12:]
				188	if hasattr(Request, 'get_' + name):
				189	getattr(self, 'get_' + name)()
				190	return getattr(self, attr)
				191	raise AttributeError(attr)
				192
				193	def get_method(self):
				194	if self.has_data():
				195	return "POST"
				196	else:
				197	return "GET"
				198
				199	# XXX these helper methods are lame
				200
				201	def add_data(self, data):
				202	self.data = data
				203
				204	def has_data(self):
				205	return self.data is not None
				206
				207	def get_data(self):
				208	return self.data
				209
				210	def get_full_url(self):
				211	return self.__original
				212
				213	def get_type(self):
				214	if self.type is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	215	self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	216	if self.type is None:
				217	raise ValueError("unknown url type: %s" % self.__original)
				218	return self.type
				219
				220	def get_host(self):
				221	if self.host is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	222	self.host, self.__r_host = splithost(self.__r_type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	223	if self.host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	224	self.host = unquote(self.host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	225	return self.host
				226
				227	def get_selector(self):
				228	return self.__r_host
				229
				230	def set_proxy(self, host, type):
				231	self.host, self.type = host, type
				232	self.__r_host = self.__original
				233
				234	def get_origin_req_host(self):
				235	return self.origin_req_host
				236
				237	def is_unverifiable(self):
				238	return self.unverifiable
				239
				240	def add_header(self, key, val):
				241	# useful for something like authentication
				242	self.headers[key.capitalize()] = val
				243
				244	def add_unredirected_header(self, key, val):
				245	# will not be added to a redirected request
				246	self.unredirected_hdrs[key.capitalize()] = val
				247
				248	def has_header(self, header_name):
				249	return (header_name in self.headers or
				250	header_name in self.unredirected_hdrs)
				251
				252	def get_header(self, header_name, default=None):
				253	return self.headers.get(
				254	header_name,
				255	self.unredirected_hdrs.get(header_name, default))
				256
				257	def header_items(self):
				258	hdrs = self.unredirected_hdrs.copy()
				259	hdrs.update(self.headers)
				260	return list(hdrs.items())
				261
				262	class OpenerDirector:
				263	def __init__(self):
				264	client_version = "Python-urllib/%s" % __version__
				265	self.addheaders = [('User-agent', client_version)]
				266	# manage the individual handlers
				267	self.handlers = []
				268	self.handle_open = {}
				269	self.handle_error = {}
				270	self.process_response = {}
				271	self.process_request = {}
				272
				273	def add_handler(self, handler):
				274	if not hasattr(handler, "add_parent"):
				275	raise TypeError("expected BaseHandler instance, got %r" %
				276	type(handler))
				277
				278	added = False
				279	for meth in dir(handler):
				280	if meth in ["redirect_request", "do_open", "proxy_open"]:
				281	# oops, coincidental match
				282	continue
				283
				284	i = meth.find("_")
				285	protocol = meth[:i]
				286	condition = meth[i+1:]
				287
				288	if condition.startswith("error"):
				289	j = condition.find("_") + i + 1
				290	kind = meth[j+1:]
				291	try:
				292	kind = int(kind)
				293	except ValueError:
				294	pass
				295	lookup = self.handle_error.get(protocol, {})
				296	self.handle_error[protocol] = lookup
				297	elif condition == "open":
				298	kind = protocol
				299	lookup = self.handle_open
				300	elif condition == "response":
				301	kind = protocol
				302	lookup = self.process_response
				303	elif condition == "request":
				304	kind = protocol
				305	lookup = self.process_request
				306	else:
				307	continue
				308
				309	handlers = lookup.setdefault(kind, [])
				310	if handlers:
				311	bisect.insort(handlers, handler)
				312	else:
				313	handlers.append(handler)
				314	added = True
				315
				316	if added:
				317	# the handlers must work in an specific order, the order
				318	# is specified in a Handler attribute
				319	bisect.insort(self.handlers, handler)
				320	handler.add_parent(self)
				321
				322	def close(self):
				323	# Only exists for backwards compatibility.
				324	pass
				325
				326	def _call_chain(self, chain, kind, meth_name, *args):
				327	# Handlers raise an exception if no one else should try to handle
				328	# the request, or return None if they can't but another handler
				329	# could. Otherwise, they return the response.
				330	handlers = chain.get(kind, ())
				331	for handler in handlers:
				332	func = getattr(handler, meth_name)
				333
				334	result = func(*args)
				335	if result is not None:
				336	return result
				337
				338	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				339	# accept a URL or a Request object
				340	if isinstance(fullurl, str):
				341	req = Request(fullurl, data)
				342	else:
				343	req = fullurl
				344	if data is not None:
				345	req.add_data(data)
				346
				347	req.timeout = timeout
				348	protocol = req.get_type()
				349
				350	# pre-process request
				351	meth_name = protocol+"_request"
				352	for processor in self.process_request.get(protocol, []):
				353	meth = getattr(processor, meth_name)
				354	req = meth(req)
				355
				356	response = self._open(req, data)
				357
				358	# post-process response
				359	meth_name = protocol+"_response"
				360	for processor in self.process_response.get(protocol, []):
				361	meth = getattr(processor, meth_name)
				362	response = meth(req, response)
				363
				364	return response
				365
				366	def _open(self, req, data=None):
				367	result = self._call_chain(self.handle_open, 'default',
				368	'default_open', req)
				369	if result:
				370	return result
				371
				372	protocol = req.get_type()
				373	result = self._call_chain(self.handle_open, protocol, protocol +
				374	'_open', req)
				375	if result:
				376	return result
				377
				378	return self._call_chain(self.handle_open, 'unknown',
				379	'unknown_open', req)
				380
				381	def error(self, proto, *args):
				382	if proto in ('http', 'https'):
				383	# XXX http[s] protocols are special-cased
				384	dict = self.handle_error['http'] # https is not different than http
				385	proto = args[2] # YUCK!
				386	meth_name = 'http_error_%s' % proto
				387	http_err = 1
				388	orig_args = args
				389	else:
				390	dict = self.handle_error
				391	meth_name = proto + '_error'
				392	http_err = 0
				393	args = (dict, proto, meth_name) + args
				394	result = self._call_chain(*args)
				395	if result:
				396	return result
				397
				398	if http_err:
				399	args = (dict, 'default', 'http_error_default') + orig_args
				400	return self._call_chain(*args)
				401
				402	# XXX probably also want an abstract factory that knows when it makes
				403	# sense to skip a superclass in favor of a subclass and when it might
				404	# make sense to include both
				405
				406	def build_opener(*handlers):
				407	"""Create an opener object from a list of handlers.
				408
				409	The opener will use several default handlers, including support
				410	for HTTP and FTP.
				411
				412	If any of the handlers passed as arguments are subclasses of the
				413	default handlers, the default handlers will not be used.
				414	"""
				415	def isclass(obj):
				416	return isinstance(obj, type) or hasattr(obj, "__bases__")
				417
				418	opener = OpenerDirector()
				419	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				420	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				421	FTPHandler, FileHandler, HTTPErrorProcessor]
				422	if hasattr(http.client, "HTTPSConnection"):
				423	default_classes.append(HTTPSHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	424	skip = set()
				425	for klass in default_classes:
				426	for check in handlers:
				427	if isclass(check):
				428	if issubclass(check, klass):
				429	skip.add(klass)
				430	elif isinstance(check, klass):
				431	skip.add(klass)
				432	for klass in skip:
				433	default_classes.remove(klass)
				434
				435	for klass in default_classes:
				436	opener.add_handler(klass())
				437
				438	for h in handlers:
				439	if isclass(h):
				440	h = h()
				441	opener.add_handler(h)
				442	return opener
				443
				444	class BaseHandler:
				445	handler_order = 500
				446
				447	def add_parent(self, parent):
				448	self.parent = parent
				449
				450	def close(self):
				451	# Only exists for backwards compatibility
				452	pass
				453
				454	def __lt__(self, other):
				455	if not hasattr(other, "handler_order"):
				456	# Try to preserve the old behavior of having custom classes
				457	# inserted after default ones (works only for custom user
				458	# classes which are not aware of handler_order).
				459	return True
				460	return self.handler_order < other.handler_order
				461
				462
				463	class HTTPErrorProcessor(BaseHandler):
				464	"""Process HTTP error responses."""
				465	handler_order = 1000 # after all other processing
				466
				467	def http_response(self, request, response):
				468	code, msg, hdrs = response.code, response.msg, response.info()
				469
				470	# According to RFC 2616, "2xx" code indicates that the client's
				471	# request was successfully received, understood, and accepted.
				472	if not (200 <= code < 300):
				473	response = self.parent.error(
				474	'http', request, response, code, msg, hdrs)
				475
				476	return response
				477
				478	https_response = http_response
				479
				480	class HTTPDefaultErrorHandler(BaseHandler):
				481	def http_error_default(self, req, fp, code, msg, hdrs):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	482	raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	483
				484	class HTTPRedirectHandler(BaseHandler):
				485	# maximum number of redirections to any single URL
				486	# this is needed because of the state that cookies introduce
				487	max_repeats = 4
				488	# maximum total number of redirections (regardless of URL) before
				489	# assuming we're in a loop
				490	max_redirections = 10
				491
				492	def redirect_request(self, req, fp, code, msg, headers, newurl):
				493	"""Return a Request or None in response to a redirect.
				494
				495	This is called by the http_error_30x methods when a
				496	redirection response is received. If a redirection should
				497	take place, return a new Request to allow http_error_30x to
				498	perform the redirect. Otherwise, raise HTTPError if no-one
				499	else should try to handle this url. Return None if you can't
				500	but another Handler might.
				501	"""
				502	m = req.get_method()
				503	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				504	or code in (301, 302, 303) and m == "POST")):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	505	raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	506
				507	# Strictly (according to RFC 2616), 301 or 302 in response to
				508	# a POST MUST NOT cause a redirection without confirmation
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	509	# from the user (of urllib.request, in this case). In practice,
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	510	# essentially all clients do redirect in this case, so we do
				511	# the same.
				512	# be conciliant with URIs containing a space
				513	newurl = newurl.replace(' ', '%20')
				514	CONTENT_HEADERS = ("content-length", "content-type")
				515	newheaders = dict((k, v) for k, v in req.headers.items()
				516	if k.lower() not in CONTENT_HEADERS)
				517	return Request(newurl,
				518	headers=newheaders,
				519	origin_req_host=req.get_origin_req_host(),
				520	unverifiable=True)
				521
				522	# Implementation note: To avoid the server sending us into an
				523	# infinite loop, the request object needs to track what URLs we
				524	# have already seen. Do this by adding a handler-specific
				525	# attribute to the Request object.
				526	def http_error_302(self, req, fp, code, msg, headers):
				527	# Some servers (incorrectly) return multiple Location headers
				528	# (so probably same goes for URI). Use first header.
				529	if "location" in headers:
				530	newurl = headers["location"]
				531	elif "uri" in headers:
				532	newurl = headers["uri"]
				533	else:
				534	return
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	535	newurl = urljoin(req.get_full_url(), newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	536
				537	# XXX Probably want to forget about the state of the current
				538	# request, although that might interact poorly with other
				539	# handlers that also use handler-specific request attributes
				540	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				541	if new is None:
				542	return
				543
				544	# loop detection
				545	# .redirect_dict has a key url if url was previously visited.
				546	if hasattr(req, 'redirect_dict'):
				547	visited = new.redirect_dict = req.redirect_dict
				548	if (visited.get(newurl, 0) >= self.max_repeats or
				549	len(visited) >= self.max_redirections):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	550	raise HTTPError(req.get_full_url(), code,
				551	self.inf_msg + msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	552	else:
				553	visited = new.redirect_dict = req.redirect_dict = {}
				554	visited[newurl] = visited.get(newurl, 0) + 1
				555
				556	# Don't close the fp until we are sure that we won't use it
				557	# with HTTPError.
				558	fp.read()
				559	fp.close()
				560
				561	return self.parent.open(new)
				562
				563	http_error_301 = http_error_303 = http_error_307 = http_error_302
				564
				565	inf_msg = "The HTTP server returned a redirect error that would " \
				566	"lead to an infinite loop.\n" \
				567	"The last 30x error message was:\n"
				568
				569
				570	def _parse_proxy(proxy):
				571	"""Return (scheme, user, password, host/port) given a URL or an authority.
				572
				573	If a URL is supplied, it must have an authority (host:port) component.
				574	According to RFC 3986, having an authority component means the URL must
				575	have two slashes after the scheme:
				576
				577	>>> _parse_proxy('file:/ftp.example.com/')
				578	Traceback (most recent call last):
				579	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				580
				581	The first three items of the returned tuple may be None.
				582
				583	Examples of authority parsing:
				584
				585	>>> _parse_proxy('proxy.example.com')
				586	(None, None, None, 'proxy.example.com')
				587	>>> _parse_proxy('proxy.example.com:3128')
				588	(None, None, None, 'proxy.example.com:3128')
				589
				590	The authority component may optionally include userinfo (assumed to be
				591	username:password):
				592
				593	>>> _parse_proxy('joe:password@proxy.example.com')
				594	(None, 'joe', 'password', 'proxy.example.com')
				595	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				596	(None, 'joe', 'password', 'proxy.example.com:3128')
				597
				598	Same examples, but with URLs instead:
				599
				600	>>> _parse_proxy('http://proxy.example.com/')
				601	('http', None, None, 'proxy.example.com')
				602	>>> _parse_proxy('http://proxy.example.com:3128/')
				603	('http', None, None, 'proxy.example.com:3128')
				604	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				605	('http', 'joe', 'password', 'proxy.example.com')
				606	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				607	('http', 'joe', 'password', 'proxy.example.com:3128')
				608
				609	Everything after the authority is ignored:
				610
				611	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				612	('ftp', 'joe', 'password', 'proxy.example.com')
				613
				614	Test for no trailing '/' case:
				615
				616	>>> _parse_proxy('http://joe:password@proxy.example.com')
				617	('http', 'joe', 'password', 'proxy.example.com')
				618
				619	"""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	620	scheme, r_scheme = splittype(proxy)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	621	if not r_scheme.startswith("/"):
				622	# authority
				623	scheme = None
				624	authority = proxy
				625	else:
				626	# URL
				627	if not r_scheme.startswith("//"):
				628	raise ValueError("proxy URL with no authority: %r" % proxy)
				629	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				630	# and 3.3.), path is empty or starts with '/'
				631	end = r_scheme.find("/", 2)
				632	if end == -1:
				633	end = None
				634	authority = r_scheme[2:end]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	635	userinfo, hostport = splituser(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	636	if userinfo is not None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	637	user, password = splitpasswd(userinfo)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	638	else:
				639	user = password = None
				640	return scheme, user, password, hostport
				641
				642	class ProxyHandler(BaseHandler):
				643	# Proxies must be in front
				644	handler_order = 100
				645
				646	def __init__(self, proxies=None):
				647	if proxies is None:
				648	proxies = getproxies()
				649	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				650	self.proxies = proxies
				651	for type, url in proxies.items():
				652	setattr(self, '%s_open' % type,
				653	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				654	meth(r, proxy, type))
				655
				656	def proxy_open(self, req, proxy, type):
				657	orig_type = req.get_type()
				658	proxy_type, user, password, hostport = _parse_proxy(proxy)
				659	if proxy_type is None:
				660	proxy_type = orig_type
				661	if user and password:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	662	user_pass = '%s:%s' % (unquote(user),
				663	unquote(password))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	664	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				665	req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	666	hostport = unquote(hostport)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	667	req.set_proxy(hostport, proxy_type)
				668	if orig_type == proxy_type:
				669	# let other handlers take care of it
				670	return None
				671	else:
				672	# need to start over, because the other handlers don't
				673	# grok the proxy's URL type
				674	# e.g. if we have a constructor arg proxies like so:
				675	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				676	# a request for http://acme.example.com/a into one for
				677	# ftp://proxy.example.com/a
				678	return self.parent.open(req)
				679
				680	class HTTPPasswordMgr:
				681
				682	def __init__(self):
				683	self.passwd = {}
				684
				685	def add_password(self, realm, uri, user, passwd):
				686	# uri could be a single URI or a sequence
				687	if isinstance(uri, str):
				688	uri = [uri]
				689	if not realm in self.passwd:
				690	self.passwd[realm] = {}
				691	for default_port in True, False:
				692	reduced_uri = tuple(
				693	[self.reduce_uri(u, default_port) for u in uri])
				694	self.passwd[realm][reduced_uri] = (user, passwd)
				695
				696	def find_user_password(self, realm, authuri):
				697	domains = self.passwd.get(realm, {})
				698	for default_port in True, False:
				699	reduced_authuri = self.reduce_uri(authuri, default_port)
				700	for uris, authinfo in domains.items():
				701	for uri in uris:
				702	if self.is_suburi(uri, reduced_authuri):
				703	return authinfo
				704	return None, None
				705
				706	def reduce_uri(self, uri, default_port=True):
				707	"""Accept authority or URI and extract only the authority and path."""
				708	# note HTTP URLs do not have a userinfo component
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	709	parts = urlsplit(uri)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	710	if parts[1]:
				711	# URI
				712	scheme = parts[0]
				713	authority = parts[1]
				714	path = parts[2] or '/'
				715	else:
				716	# host or host:port
				717	scheme = None
				718	authority = uri
				719	path = '/'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	720	host, port = splitport(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	721	if default_port and port is None and scheme is not None:
				722	dport = {"http": 80,
				723	"https": 443,
				724	}.get(scheme)
				725	if dport is not None:
				726	authority = "%s:%d" % (host, dport)
				727	return authority, path
				728
				729	def is_suburi(self, base, test):
				730	"""Check if test is below base in a URI tree
				731
				732	Both args must be URIs in reduced form.
				733	"""
				734	if base == test:
				735	return True
				736	if base[0] != test[0]:
				737	return False
				738	common = posixpath.commonprefix((base[1], test[1]))
				739	if len(common) == len(base[1]):
				740	return True
				741	return False
				742
				743
				744	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				745
				746	def find_user_password(self, realm, authuri):
				747	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				748	authuri)
				749	if user is not None:
				750	return user, password
				751	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				752
				753
				754	class AbstractBasicAuthHandler:
				755
				756	# XXX this allows for multiple auth-schemes, but will stupidly pick
				757	# the last one with a realm specified.
				758
				759	# allow for double- and single-quoted realm values
				760	# (single quotes are a violation of the RFC, but appear in the wild)
				761	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				762	'realm=(["\'])(.*?)\\2', re.I)
				763
				764	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				765	# end of section 2, and section 1.2 immediately after "credentials"
				766	# production).
				767
				768	def __init__(self, password_mgr=None):
				769	if password_mgr is None:
				770	password_mgr = HTTPPasswordMgr()
				771	self.passwd = password_mgr
				772	self.add_password = self.passwd.add_password
				773
				774	def http_error_auth_reqed(self, authreq, host, req, headers):
				775	# host may be an authority (without userinfo) or a URL with an
				776	# authority
				777	# XXX could be multiple headers
				778	authreq = headers.get(authreq, None)
				779	if authreq:
				780	mo = AbstractBasicAuthHandler.rx.search(authreq)
				781	if mo:
				782	scheme, quote, realm = mo.groups()
				783	if scheme.lower() == 'basic':
				784	return self.retry_http_basic_auth(host, req, realm)
				785
				786	def retry_http_basic_auth(self, host, req, realm):
				787	user, pw = self.passwd.find_user_password(realm, host)
				788	if pw is not None:
				789	raw = "%s:%s" % (user, pw)
				790	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				791	if req.headers.get(self.auth_header, None) == auth:
				792	return None
				793	req.add_header(self.auth_header, auth)
				794	return self.parent.open(req)
				795	else:
				796	return None
				797
				798
				799	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				800
				801	auth_header = 'Authorization'
				802
				803	def http_error_401(self, req, fp, code, msg, headers):
				804	url = req.get_full_url()
				805	return self.http_error_auth_reqed('www-authenticate',
				806	url, req, headers)
				807
				808
				809	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				810
				811	auth_header = 'Proxy-authorization'
				812
				813	def http_error_407(self, req, fp, code, msg, headers):
				814	# http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	815	# authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	816	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				817	# userinfo.
				818	authority = req.get_host()
				819	return self.http_error_auth_reqed('proxy-authenticate',
				820	authority, req, headers)
				821
				822
				823	def randombytes(n):
				824	"""Return n random bytes."""
				825	return os.urandom(n)
				826
				827	class AbstractDigestAuthHandler:
				828	# Digest authentication is specified in RFC 2617.
				829
				830	# XXX The client does not inspect the Authentication-Info header
				831	# in a successful response.
				832
				833	# XXX It should be possible to test this implementation against
				834	# a mock server that just generates a static set of challenges.
				835
				836	# XXX qop="auth-int" supports is shaky
				837
				838	def __init__(self, passwd=None):
				839	if passwd is None:
				840	passwd = HTTPPasswordMgr()
				841	self.passwd = passwd
				842	self.add_password = self.passwd.add_password
				843	self.retried = 0
				844	self.nonce_count = 0
				845
				846	def reset_retry_count(self):
				847	self.retried = 0
				848
				849	def http_error_auth_reqed(self, auth_header, host, req, headers):
				850	authreq = headers.get(auth_header, None)
				851	if self.retried > 5:
				852	# Don't fail endlessly - if we failed once, we'll probably
				853	# fail a second time. Hm. Unless the Password Manager is
				854	# prompting for the information. Crap. This isn't great
				855	# but it's better than the current 'repeat until recursion
				856	# depth exceeded' approach <wink>
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	857	raise HTTPError(req.get_full_url(), 401, "digest auth failed",
				858	headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	859	else:
				860	self.retried += 1
				861	if authreq:
				862	scheme = authreq.split()[0]
				863	if scheme.lower() == 'digest':
				864	return self.retry_http_digest_auth(req, authreq)
				865
				866	def retry_http_digest_auth(self, req, auth):
				867	token, challenge = auth.split(' ', 1)
				868	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				869	auth = self.get_authorization(req, chal)
				870	if auth:
				871	auth_val = 'Digest %s' % auth
				872	if req.headers.get(self.auth_header, None) == auth_val:
				873	return None
				874	req.add_unredirected_header(self.auth_header, auth_val)
				875	resp = self.parent.open(req)
				876	return resp
				877
				878	def get_cnonce(self, nonce):
				879	# The cnonce-value is an opaque
				880	# quoted string value provided by the client and used by both client
				881	# and server to avoid chosen plaintext attacks, to provide mutual
				882	# authentication, and to provide some message integrity protection.
				883	# This isn't a fabulous effort, but it's probably Good Enough.
				884	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				885	b = s.encode("ascii") + randombytes(8)
				886	dig = hashlib.sha1(b).hexdigest()
				887	return dig[:16]
				888
				889	def get_authorization(self, req, chal):
				890	try:
				891	realm = chal['realm']
				892	nonce = chal['nonce']
				893	qop = chal.get('qop')
				894	algorithm = chal.get('algorithm', 'MD5')
				895	# mod_digest doesn't send an opaque, even though it isn't
				896	# supposed to be optional
				897	opaque = chal.get('opaque', None)
				898	except KeyError:
				899	return None
				900
				901	H, KD = self.get_algorithm_impls(algorithm)
				902	if H is None:
				903	return None
				904
				905	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				906	if user is None:
				907	return None
				908
				909	# XXX not implemented yet
				910	if req.has_data():
				911	entdig = self.get_entity_digest(req.get_data(), chal)
				912	else:
				913	entdig = None
				914
				915	A1 = "%s:%s:%s" % (user, realm, pw)
				916	A2 = "%s:%s" % (req.get_method(),
				917	# XXX selector: what about proxies and full urls
				918	req.get_selector())
				919	if qop == 'auth':
				920	self.nonce_count += 1
				921	ncvalue = '%08x' % self.nonce_count
				922	cnonce = self.get_cnonce(nonce)
				923	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				924	respdig = KD(H(A1), noncebit)
				925	elif qop is None:
				926	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				927	else:
				928	# XXX handle auth-int.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	929	raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	930
				931	# XXX should the partial digests be encoded too?
				932
				933	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				934	'response="%s"' % (user, realm, nonce, req.get_selector(),
				935	respdig)
				936	if opaque:
				937	base += ', opaque="%s"' % opaque
				938	if entdig:
				939	base += ', digest="%s"' % entdig
				940	base += ', algorithm="%s"' % algorithm
				941	if qop:
				942	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				943	return base
				944
				945	def get_algorithm_impls(self, algorithm):
				946	# lambdas assume digest modules are imported at the top level
				947	if algorithm == 'MD5':
				948	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				949	elif algorithm == 'SHA':
				950	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				951	# XXX MD5-sess
				952	KD = lambda s, d: H("%s:%s" % (s, d))
				953	return H, KD
				954
				955	def get_entity_digest(self, data, chal):
				956	# XXX not implemented yet
				957	return None
				958
				959
				960	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				961	"""An authentication protocol defined by RFC 2069
				962
				963	Digest authentication improves on basic authentication because it
				964	does not transmit passwords in the clear.
				965	"""
				966
				967	auth_header = 'Authorization'
				968	handler_order = 490 # before Basic auth
				969
				970	def http_error_401(self, req, fp, code, msg, headers):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	971	host = urlparse(req.get_full_url())[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	972	retry = self.http_error_auth_reqed('www-authenticate',
				973	host, req, headers)
				974	self.reset_retry_count()
				975	return retry
				976
				977
				978	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				979
				980	auth_header = 'Proxy-Authorization'
				981	handler_order = 490 # before Basic auth
				982
				983	def http_error_407(self, req, fp, code, msg, headers):
				984	host = req.get_host()
				985	retry = self.http_error_auth_reqed('proxy-authenticate',
				986	host, req, headers)
				987	self.reset_retry_count()
				988	return retry
				989
				990	class AbstractHTTPHandler(BaseHandler):
				991
				992	def __init__(self, debuglevel=0):
				993	self._debuglevel = debuglevel
				994
				995	def set_http_debuglevel(self, level):
				996	self._debuglevel = level
				997
				998	def do_request_(self, request):
				999	host = request.get_host()
				1000	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1001	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1002
				1003	if request.has_data(): # POST
				1004	data = request.get_data()
				1005	if not request.has_header('Content-type'):
				1006	request.add_unredirected_header(
				1007	'Content-type',
				1008	'application/x-www-form-urlencoded')
				1009	if not request.has_header('Content-length'):
				1010	request.add_unredirected_header(
				1011	'Content-length', '%d' % len(data))
				1012
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1013	scheme, sel = splittype(request.get_selector())
				1014	sel_host, sel_path = splithost(sel)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1015	if not request.has_header('Host'):
				1016	request.add_unredirected_header('Host', sel_host or host)
				1017	for name, value in self.parent.addheaders:
				1018	name = name.capitalize()
				1019	if not request.has_header(name):
				1020	request.add_unredirected_header(name, value)
				1021
				1022	return request
				1023
				1024	def do_open(self, http_class, req):
				1025	"""Return an addinfourl object for the request, using http_class.
				1026
				1027	http_class must implement the HTTPConnection API from http.client.
				1028	The addinfourl return value is a file-like object. It also
				1029	has methods and attributes including:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1030	- info(): return a email Message object for the headers
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1031	- geturl(): return the original request URL
				1032	- code: HTTP status code
				1033	"""
				1034	host = req.get_host()
				1035	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1036	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1037
				1038	h = http_class(host, timeout=req.timeout) # will parse host:port
				1039	headers = dict(req.headers)
				1040	headers.update(req.unredirected_hdrs)
				1041
				1042	# TODO(jhylton): Should this be redesigned to handle
				1043	# persistent connections?
				1044
				1045	# We want to make an HTTP/1.1 request, but the addinfourl
				1046	# class isn't prepared to deal with a persistent connection.
				1047	# It will try to read all remaining data from the socket,
				1048	# which will block while the server waits for the next request.
				1049	# So make sure the connection gets closed after the (only)
				1050	# request.
				1051	headers["Connection"] = "close"
				1052	headers = dict(
				1053	(name.title(), val) for name, val in headers.items())
				1054	try:
				1055	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1056	r = h.getresponse()
				1057	except socket.error as err: # XXX what error?
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1058	raise URLError(err)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1059
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1060	resp = addinfourl(r.fp, r.msg, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1061	resp.code = r.status
				1062	resp.msg = r.reason
				1063	return resp
				1064
				1065
				1066	class HTTPHandler(AbstractHTTPHandler):
				1067
				1068	def http_open(self, req):
				1069	return self.do_open(http.client.HTTPConnection, req)
				1070
				1071	http_request = AbstractHTTPHandler.do_request_
				1072
				1073	if hasattr(http.client, 'HTTPSConnection'):
				1074	class HTTPSHandler(AbstractHTTPHandler):
				1075
				1076	def https_open(self, req):
				1077	return self.do_open(http.client.HTTPSConnection, req)
				1078
				1079	https_request = AbstractHTTPHandler.do_request_
				1080
				1081	class HTTPCookieProcessor(BaseHandler):
				1082	def __init__(self, cookiejar=None):
				1083	import http.cookiejar
				1084	if cookiejar is None:
				1085	cookiejar = http.cookiejar.CookieJar()
				1086	self.cookiejar = cookiejar
				1087
				1088	def http_request(self, request):
				1089	self.cookiejar.add_cookie_header(request)
				1090	return request
				1091
				1092	def http_response(self, request, response):
				1093	self.cookiejar.extract_cookies(response, request)
				1094	return response
				1095
				1096	https_request = http_request
				1097	https_response = http_response
				1098
				1099	class UnknownHandler(BaseHandler):
				1100	def unknown_open(self, req):
				1101	type = req.get_type()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1102	raise URLError('unknown url type: %s' % type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1103
				1104	def parse_keqv_list(l):
				1105	"""Parse list of key=value strings where keys are not duplicated."""
				1106	parsed = {}
				1107	for elt in l:
				1108	k, v = elt.split('=', 1)
				1109	if v[0] == '"' and v[-1] == '"':
				1110	v = v[1:-1]
				1111	parsed[k] = v
				1112	return parsed
				1113
				1114	def parse_http_list(s):
				1115	"""Parse lists as described by RFC 2068 Section 2.
				1116
				1117	In particular, parse comma-separated lists where the elements of
				1118	the list may include quoted-strings. A quoted-string could
				1119	contain a comma. A non-quoted string could have quotes in the
				1120	middle. Neither commas nor quotes count if they are escaped.
				1121	Only double-quotes count, not single-quotes.
				1122	"""
				1123	res = []
				1124	part = ''
				1125
				1126	escape = quote = False
				1127	for cur in s:
				1128	if escape:
				1129	part += cur
				1130	escape = False
				1131	continue
				1132	if quote:
				1133	if cur == '\\':
				1134	escape = True
				1135	continue
				1136	elif cur == '"':
				1137	quote = False
				1138	part += cur
				1139	continue
				1140
				1141	if cur == ',':
				1142	res.append(part)
				1143	part = ''
				1144	continue
				1145
				1146	if cur == '"':
				1147	quote = True
				1148
				1149	part += cur
				1150
				1151	# append last part
				1152	if part:
				1153	res.append(part)
				1154
				1155	return [part.strip() for part in res]
				1156
				1157	class FileHandler(BaseHandler):
				1158	# Use local file or FTP depending on form of URL
				1159	def file_open(self, req):
				1160	url = req.get_selector()
				1161	if url[:2] == '//' and url[2:3] != '/':
				1162	req.type = 'ftp'
				1163	return self.parent.open(req)
				1164	else:
				1165	return self.open_local_file(req)
				1166
				1167	# names for the localhost
				1168	names = None
				1169	def get_names(self):
				1170	if FileHandler.names is None:
				1171	try:
				1172	FileHandler.names = (socket.gethostbyname('localhost'),
				1173	socket.gethostbyname(socket.gethostname()))
				1174	except socket.gaierror:
				1175	FileHandler.names = (socket.gethostbyname('localhost'),)
				1176	return FileHandler.names
				1177
				1178	# not entirely sure what the rules are here
				1179	def open_local_file(self, req):
				1180	import email.utils
				1181	import mimetypes
				1182	host = req.get_host()
				1183	file = req.get_selector()
				1184	localfile = url2pathname(file)
				1185	try:
				1186	stats = os.stat(localfile)
				1187	size = stats.st_size
				1188	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1189	mtype = mimetypes.guess_type(file)[0]
				1190	headers = email.message_from_string(
				1191	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1192	(mtype or 'text/plain', size, modified))
				1193	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1194	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1195	if not host or \
				1196	(not port and _safe_gethostbyname(host) in self.get_names()):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1197	return addinfourl(open(localfile, 'rb'), headers, 'file:'+file)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1198	except OSError as msg:
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	1199	# users shouldn't expect OSErrors coming from urlopen()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1200	raise URLError(msg)
				1201	raise URLError('file not on local host')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1202
				1203	def _safe_gethostbyname(host):
				1204	try:
				1205	return socket.gethostbyname(host)
				1206	except socket.gaierror:
				1207	return None
				1208
				1209	class FTPHandler(BaseHandler):
				1210	def ftp_open(self, req):
				1211	import ftplib
				1212	import mimetypes
				1213	host = req.get_host()
				1214	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1215	raise URLError('ftp error: no host given')
				1216	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1217	if port is None:
				1218	port = ftplib.FTP_PORT
				1219	else:
				1220	port = int(port)
				1221
				1222	# username/password handling
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1223	user, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1224	if user:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1225	user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1226	else:
				1227	passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1228	host = unquote(host)
				1229	user = unquote(user or '')
				1230	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1231
				1232	try:
				1233	host = socket.gethostbyname(host)
				1234	except socket.error as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1235	raise URLError(msg)
				1236	path, attrs = splitattr(req.get_selector())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1237	dirs = path.split('/')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1238	dirs = list(map(unquote, dirs))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1239	dirs, file = dirs[:-1], dirs[-1]
				1240	if dirs and not dirs[0]:
				1241	dirs = dirs[1:]
				1242	try:
				1243	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1244	type = file and 'I' or 'D'
				1245	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1246	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1247	if attr.lower() == 'type' and \
				1248	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1249	type = value.upper()
				1250	fp, retrlen = fw.retrfile(file, type)
				1251	headers = ""
				1252	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1253	if mtype:
				1254	headers += "Content-type: %s\n" % mtype
				1255	if retrlen is not None and retrlen >= 0:
				1256	headers += "Content-length: %d\n" % retrlen
				1257	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1258	return addinfourl(fp, headers, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1259	except ftplib.all_errors as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1260	exc = URLError('ftp error: %s' % msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1261	raise exc.with_traceback(sys.exc_info()[2])
				1262
				1263	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1264	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1265	return fw
				1266
				1267	class CacheFTPHandler(FTPHandler):
				1268	# XXX would be nice to have pluggable cache strategies
				1269	# XXX this stuff is definitely not thread safe
				1270	def __init__(self):
				1271	self.cache = {}
				1272	self.timeout = {}
				1273	self.soonest = 0
				1274	self.delay = 60
				1275	self.max_conns = 16
				1276
				1277	def setTimeout(self, t):
				1278	self.delay = t
				1279
				1280	def setMaxConns(self, m):
				1281	self.max_conns = m
				1282
				1283	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1284	key = user, host, port, '/'.join(dirs), timeout
				1285	if key in self.cache:
				1286	self.timeout[key] = time.time() + self.delay
				1287	else:
				1288	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1289	dirs, timeout)
				1290	self.timeout[key] = time.time() + self.delay
				1291	self.check_cache()
				1292	return self.cache[key]
				1293
				1294	def check_cache(self):
				1295	# first check for old ones
				1296	t = time.time()
				1297	if self.soonest <= t:
				1298	for k, v in list(self.timeout.items()):
				1299	if v < t:
				1300	self.cache[k].close()
				1301	del self.cache[k]
				1302	del self.timeout[k]
				1303	self.soonest = min(list(self.timeout.values()))
				1304
				1305	# then check the size
				1306	if len(self.cache) == self.max_conns:
				1307	for k, v in list(self.timeout.items()):
				1308	if v == self.soonest:
				1309	del self.cache[k]
				1310	del self.timeout[k]
				1311	break
				1312	self.soonest = min(list(self.timeout.values()))
				1313
				1314	# Code move from the old urllib module
				1315
				1316	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1317
				1318	# Helper for non-unix systems
				1319	if os.name == 'mac':
				1320	from macurl2path import url2pathname, pathname2url
				1321	elif os.name == 'nt':
				1322	from nturl2path import url2pathname, pathname2url
				1323	else:
				1324	def url2pathname(pathname):
				1325	"""OS-specific conversion from a relative URL of the 'file' scheme
				1326	to a file system path; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1327	return unquote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1328
				1329	def pathname2url(pathname):
				1330	"""OS-specific conversion from a file system path to a relative URL
				1331	of the 'file' scheme; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1332	return quote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1333
				1334	# This really consists of two pieces:
				1335	# (1) a class which handles opening of all sorts of URLs
				1336	# (plus assorted utilities etc.)
				1337	# (2) a set of functions for parsing URLs
				1338	# XXX Should these be separated out into different modules?
				1339
				1340
				1341	ftpcache = {}
				1342	class URLopener:
				1343	"""Class to open URLs.
				1344	This is a class rather than just a subroutine because we may need
				1345	more than one set of global protocol-specific options.
				1346	Note -- this is a base class for those who don't want the
				1347	automatic handling of errors type 302 (relocated) and 401
				1348	(authorization needed)."""
				1349
				1350	__tempfiles = None
				1351
				1352	version = "Python-urllib/%s" % __version__
				1353
				1354	# Constructor
				1355	def __init__(self, proxies=None, **x509):
				1356	if proxies is None:
				1357	proxies = getproxies()
				1358	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1359	self.proxies = proxies
				1360	self.key_file = x509.get('key_file')
				1361	self.cert_file = x509.get('cert_file')
				1362	self.addheaders = [('User-Agent', self.version)]
				1363	self.__tempfiles = []
				1364	self.__unlink = os.unlink # See cleanup()
				1365	self.tempcache = None
				1366	# Undocumented feature: if you assign {} to tempcache,
				1367	# it is used to cache files retrieved with
				1368	# self.retrieve(). This is not enabled by default
				1369	# since it does not work for changing documents (and I
				1370	# haven't got the logic to check expiration headers
				1371	# yet).
				1372	self.ftpcache = ftpcache
				1373	# Undocumented feature: you can use a different
				1374	# ftp cache by assigning to the .ftpcache member;
				1375	# in case you want logically independent URL openers
				1376	# XXX This is not threadsafe. Bah.
				1377
				1378	def __del__(self):
				1379	self.close()
				1380
				1381	def close(self):
				1382	self.cleanup()
				1383
				1384	def cleanup(self):
				1385	# This code sometimes runs when the rest of this module
				1386	# has already been deleted, so it can't use any globals
				1387	# or import anything.
				1388	if self.__tempfiles:
				1389	for file in self.__tempfiles:
				1390	try:
				1391	self.__unlink(file)
				1392	except OSError:
				1393	pass
				1394	del self.__tempfiles[:]
				1395	if self.tempcache:
				1396	self.tempcache.clear()
				1397
				1398	def addheader(self, *args):
				1399	"""Add a header to be used by the HTTP interface only
				1400	e.g. u.addheader('Accept', 'sound/basic')"""
				1401	self.addheaders.append(args)
				1402
				1403	# External interface
				1404	def open(self, fullurl, data=None):
				1405	"""Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1406	fullurl = unwrap(to_bytes(fullurl))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1407	if self.tempcache and fullurl in self.tempcache:
				1408	filename, headers = self.tempcache[fullurl]
				1409	fp = open(filename, 'rb')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1410	return addinfourl(fp, headers, fullurl)
				1411	urltype, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1412	if not urltype:
				1413	urltype = 'file'
				1414	if urltype in self.proxies:
				1415	proxy = self.proxies[urltype]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1416	urltype, proxyhost = splittype(proxy)
				1417	host, selector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1418	url = (host, fullurl) # Signal special case to open_*()
				1419	else:
				1420	proxy = None
				1421	name = 'open_' + urltype
				1422	self.type = urltype
				1423	name = name.replace('-', '_')
				1424	if not hasattr(self, name):
				1425	if proxy:
				1426	return self.open_unknown_proxy(proxy, fullurl, data)
				1427	else:
				1428	return self.open_unknown(fullurl, data)
				1429	try:
				1430	if data is None:
				1431	return getattr(self, name)(url)
				1432	else:
				1433	return getattr(self, name)(url, data)
				1434	except socket.error as msg:
				1435	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1436
				1437	def open_unknown(self, fullurl, data=None):
				1438	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1439	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1440	raise IOError('url error', 'unknown url type', type)
				1441
				1442	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1443	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1444	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1445	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1446
				1447	# External interface
				1448	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1449	"""retrieve(url) returns (filename, headers) for a local object
				1450	or (tempfilename, headers) for a remote object."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1451	url = unwrap(to_bytes(url))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1452	if self.tempcache and url in self.tempcache:
				1453	return self.tempcache[url]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1454	type, url1 = splittype(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1455	if filename is None and (not type or type == 'file'):
				1456	try:
				1457	fp = self.open_local_file(url1)
				1458	hdrs = fp.info()
				1459	del fp
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1460	return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1461	except IOError as msg:
				1462	pass
				1463	fp = self.open(url, data)
				1464	headers = fp.info()
				1465	if filename:
				1466	tfp = open(filename, 'wb')
				1467	else:
				1468	import tempfile
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1469	garbage, path = splittype(url)
				1470	garbage, path = splithost(path or "")
				1471	path, garbage = splitquery(path or "")
				1472	path, garbage = splitattr(path or "")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1473	suffix = os.path.splitext(path)[1]
				1474	(fd, filename) = tempfile.mkstemp(suffix)
				1475	self.__tempfiles.append(filename)
				1476	tfp = os.fdopen(fd, 'wb')
				1477	result = filename, headers
				1478	if self.tempcache is not None:
				1479	self.tempcache[url] = result
				1480	bs = 1024*8
				1481	size = -1
				1482	read = 0
				1483	blocknum = 0
				1484	if reporthook:
				1485	if "content-length" in headers:
				1486	size = int(headers["Content-Length"])
				1487	reporthook(blocknum, bs, size)
				1488	while 1:
				1489	block = fp.read(bs)
				1490	if not block:
				1491	break
				1492	read += len(block)
				1493	tfp.write(block)
				1494	blocknum += 1
				1495	if reporthook:
				1496	reporthook(blocknum, bs, size)
				1497	fp.close()
				1498	tfp.close()
				1499	del fp
				1500	del tfp
				1501
				1502	# raise exception if actual size does not match content-length header
				1503	if size >= 0 and read < size:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1504	raise ContentTooShortError(
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1505	"retrieval incomplete: got only %i out of %i bytes"
				1506	% (read, size), result)
				1507
				1508	return result
				1509
				1510	# Each method named open_<type> knows how to open that type of URL
				1511
				1512	def _open_generic_http(self, connection_factory, url, data):
				1513	"""Make an HTTP connection using connection_class.
				1514
				1515	This is an internal method that should be called from
				1516	open_http() or open_https().
				1517
				1518	Arguments:
				1519	- connection_factory should take a host name and return an
				1520	HTTPConnection instance.
				1521	- url is the url to retrieval or a host, relative-path pair.
				1522	- data is payload for a POST request or None.
				1523	"""
				1524
				1525	user_passwd = None
				1526	proxy_passwd= None
				1527	if isinstance(url, str):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1528	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1529	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1530	user_passwd, host = splituser(host)
				1531	host = unquote(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1532	realhost = host
				1533	else:
				1534	host, selector = url
				1535	# check whether the proxy contains authorization information
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1536	proxy_passwd, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1537	# now we proceed with the url we want to obtain
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1538	urltype, rest = splittype(selector)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1539	url = rest
				1540	user_passwd = None
				1541	if urltype.lower() != 'http':
				1542	realhost = None
				1543	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1544	realhost, rest = splithost(rest)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1545	if realhost:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1546	user_passwd, realhost = splituser(realhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1547	if user_passwd:
				1548	selector = "%s://%s%s" % (urltype, realhost, rest)
				1549	if proxy_bypass(realhost):
				1550	host = realhost
				1551
				1552	#print "proxy via http:", host, selector
				1553	if not host: raise IOError('http error', 'no host given')
				1554
				1555	if proxy_passwd:
				1556	import base64
				1557	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1558	else:
				1559	proxy_auth = None
				1560
				1561	if user_passwd:
				1562	import base64
				1563	auth = base64.b64encode(user_passwd).strip()
				1564	else:
				1565	auth = None
				1566	http_conn = connection_factory(host)
				1567	# XXX We should fix urllib so that it works with HTTP/1.1.
				1568	http_conn._http_vsn = 10
				1569	http_conn._http_vsn_str = "HTTP/1.0"
				1570
				1571	headers = {}
				1572	if proxy_auth:
				1573	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1574	if auth:
				1575	headers["Authorization"] = "Basic %s" % auth
				1576	if realhost:
				1577	headers["Host"] = realhost
				1578	for header, value in self.addheaders:
				1579	headers[header] = value
				1580
				1581	if data is not None:
				1582	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1583	http_conn.request("POST", selector, data, headers)
				1584	else:
				1585	http_conn.request("GET", selector, headers=headers)
				1586
				1587	try:
				1588	response = http_conn.getresponse()
				1589	except http.client.BadStatusLine:
				1590	# something went wrong with the HTTP status line
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1591	raise URLError("http protocol error: bad status line")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1592
				1593	# According to RFC 2616, "2xx" code indicates that the client's
				1594	# request was successfully received, understood, and accepted.
				1595	if 200 <= response.status < 300:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1596	return addinfourl(response.fp, response.msg, "http:" + url,
				1597	response.status)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1598	else:
				1599	return self.http_error(
				1600	url, response.fp,
				1601	response.status, response.reason, response.msg, data)
				1602
				1603	def open_http(self, url, data=None):
				1604	"""Use HTTP protocol."""
				1605	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1606
				1607	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1608	"""Handle http errors.
				1609
				1610	Derived class can override this, or provide specific handlers
				1611	named http_error_DDD where DDD is the 3-digit error code."""
				1612	# First check if there's a specific handler for this error
				1613	name = 'http_error_%d' % errcode
				1614	if hasattr(self, name):
				1615	method = getattr(self, name)
				1616	if data is None:
				1617	result = method(url, fp, errcode, errmsg, headers)
				1618	else:
				1619	result = method(url, fp, errcode, errmsg, headers, data)
				1620	if result: return result
				1621	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1622
				1623	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1624	"""Default error handler: close the connection and raise IOError."""
				1625	void = fp.read()
				1626	fp.close()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1627	raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1628
				1629	if _have_ssl:
				1630	def _https_connection(self, host):
				1631	return http.client.HTTPSConnection(host,
				1632	key_file=self.key_file,
				1633	cert_file=self.cert_file)
				1634
				1635	def open_https(self, url, data=None):
				1636	"""Use HTTPS protocol."""
				1637	return self._open_generic_http(self._https_connection, url, data)
				1638
				1639	def open_file(self, url):
				1640	"""Use local file or FTP depending on form of URL."""
				1641	if not isinstance(url, str):
				1642	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1643	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1644	return self.open_ftp(url)
				1645	else:
				1646	return self.open_local_file(url)
				1647
				1648	def open_local_file(self, url):
				1649	"""Use local file."""
				1650	import mimetypes, email.utils
				1651	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1652	host, file = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1653	localname = url2pathname(file)
				1654	try:
				1655	stats = os.stat(localname)
				1656	except OSError as e:
				1657	raise URLError(e.errno, e.strerror, e.filename)
				1658	size = stats.st_size
				1659	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1660	mtype = mimetypes.guess_type(url)[0]
				1661	headers = email.message_from_string(
				1662	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1663	(mtype or 'text/plain', size, modified))
				1664	if not host:
				1665	urlfile = file
				1666	if file[:1] == '/':
				1667	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1668	return addinfourl(open(localname, 'rb'), headers, urlfile)
				1669	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1670	if (not port
				1671	and socket.gethostbyname(host) in (localhost(), thishost())):
				1672	urlfile = file
				1673	if file[:1] == '/':
				1674	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1675	return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1676	raise URLError('local file error', 'not on local host')
				1677
				1678	def open_ftp(self, url):
				1679	"""Use FTP protocol."""
				1680	if not isinstance(url, str):
				1681	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1682	import mimetypes
				1683	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1684	host, path = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1685	if not host: raise URLError('ftp error', 'no host given')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1686	host, port = splitport(host)
				1687	user, host = splituser(host)
				1688	if user: user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1689	else: passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1690	host = unquote(host)
				1691	user = unquote(user or '')
				1692	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1693	host = socket.gethostbyname(host)
				1694	if not port:
				1695	import ftplib
				1696	port = ftplib.FTP_PORT
				1697	else:
				1698	port = int(port)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1699	path, attrs = splitattr(path)
				1700	path = unquote(path)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1701	dirs = path.split('/')
				1702	dirs, file = dirs[:-1], dirs[-1]
				1703	if dirs and not dirs[0]: dirs = dirs[1:]
				1704	if dirs and not dirs[0]: dirs[0] = '/'
				1705	key = user, host, port, '/'.join(dirs)
				1706	# XXX thread unsafe!
				1707	if len(self.ftpcache) > MAXFTPCACHE:
				1708	# Prune the cache, rather arbitrarily
				1709	for k in self.ftpcache.keys():
				1710	if k != key:
				1711	v = self.ftpcache[k]
				1712	del self.ftpcache[k]
				1713	v.close()
				1714	try:
				1715	if not key in self.ftpcache:
				1716	self.ftpcache[key] = \
				1717	ftpwrapper(user, passwd, host, port, dirs)
				1718	if not file: type = 'D'
				1719	else: type = 'I'
				1720	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1721	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1722	if attr.lower() == 'type' and \
				1723	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1724	type = value.upper()
				1725	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1726	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1727	headers = ""
				1728	if mtype:
				1729	headers += "Content-Type: %s\n" % mtype
				1730	if retrlen is not None and retrlen >= 0:
				1731	headers += "Content-Length: %d\n" % retrlen
				1732	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1733	return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1734	except ftperrors() as msg:
				1735	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1736
				1737	def open_data(self, url, data=None):
				1738	"""Use "data" URL."""
				1739	if not isinstance(url, str):
				1740	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1741	# ignore POSTed data
				1742	#
				1743	# syntax of data URLs:
				1744	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1745	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1746	# data := *urlchar
				1747	# parameter := attribute "=" value
				1748	try:
				1749	[type, data] = url.split(',', 1)
				1750	except ValueError:
				1751	raise IOError('data error', 'bad data URL')
				1752	if not type:
				1753	type = 'text/plain;charset=US-ASCII'
				1754	semi = type.rfind(';')
				1755	if semi >= 0 and '=' not in type[semi:]:
				1756	encoding = type[semi+1:]
				1757	type = type[:semi]
				1758	else:
				1759	encoding = ''
				1760	msg = []
				1761	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1762	time.gmtime(time.time())))
				1763	msg.append('Content-type: %s' % type)
				1764	if encoding == 'base64':
				1765	import base64
				1766	data = base64.decodestring(data)
				1767	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1768	data = unquote(data)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1769	msg.append('Content-Length: %d' % len(data))
				1770	msg.append('')
				1771	msg.append(data)
				1772	msg = '\n'.join(msg)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1773	headers = email.message_from_string(msg)
				1774	f = io.StringIO(msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1775	#f.fileno = None # needed for addinfourl
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1776	return addinfourl(f, headers, url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1777
				1778
				1779	class FancyURLopener(URLopener):
				1780	"""Derived class with handlers for errors we can handle (perhaps)."""
				1781
				1782	def __init__(self, args, *kwargs):
				1783	URLopener.__init__(self, args, *kwargs)
				1784	self.auth_cache = {}
				1785	self.tries = 0
				1786	self.maxtries = 10
				1787
				1788	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1789	"""Default error handling -- don't raise an exception."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1790	return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1791
				1792	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1793	"""Error 302 -- relocated (temporarily)."""
				1794	self.tries += 1
				1795	if self.maxtries and self.tries >= self.maxtries:
				1796	if hasattr(self, "http_error_500"):
				1797	meth = self.http_error_500
				1798	else:
				1799	meth = self.http_error_default
				1800	self.tries = 0
				1801	return meth(url, fp, 500,
				1802	"Internal Server Error: Redirect Recursion", headers)
				1803	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1804	data)
				1805	self.tries = 0
				1806	return result
				1807
				1808	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1809	if 'location' in headers:
				1810	newurl = headers['location']
				1811	elif 'uri' in headers:
				1812	newurl = headers['uri']
				1813	else:
				1814	return
				1815	void = fp.read()
				1816	fp.close()
				1817	# In case the server sent a relative URL, join with original:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1818	newurl = urljoin(self.type + ":" + url, newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1819	return self.open(newurl)
				1820
				1821	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1822	"""Error 301 -- also relocated (permanently)."""
				1823	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1824
				1825	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1826	"""Error 303 -- also relocated (essentially identical to 302)."""
				1827	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1828
				1829	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1830	"""Error 307 -- relocated, but turn POST into error."""
				1831	if data is None:
				1832	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1833	else:
				1834	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1835
				1836	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1837	"""Error 401 -- authentication required.
				1838	This function supports Basic authentication only."""
				1839	if not 'www-authenticate' in headers:
				1840	URLopener.http_error_default(self, url, fp,
				1841	errcode, errmsg, headers)
				1842	stuff = headers['www-authenticate']
				1843	import re
				1844	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1845	if not match:
				1846	URLopener.http_error_default(self, url, fp,
				1847	errcode, errmsg, headers)
				1848	scheme, realm = match.groups()
				1849	if scheme.lower() != 'basic':
				1850	URLopener.http_error_default(self, url, fp,
				1851	errcode, errmsg, headers)
				1852	name = 'retry_' + self.type + '_basic_auth'
				1853	if data is None:
				1854	return getattr(self,name)(url, realm)
				1855	else:
				1856	return getattr(self,name)(url, realm, data)
				1857
				1858	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1859	"""Error 407 -- proxy authentication required.
				1860	This function supports Basic authentication only."""
				1861	if not 'proxy-authenticate' in headers:
				1862	URLopener.http_error_default(self, url, fp,
				1863	errcode, errmsg, headers)
				1864	stuff = headers['proxy-authenticate']
				1865	import re
				1866	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1867	if not match:
				1868	URLopener.http_error_default(self, url, fp,
				1869	errcode, errmsg, headers)
				1870	scheme, realm = match.groups()
				1871	if scheme.lower() != 'basic':
				1872	URLopener.http_error_default(self, url, fp,
				1873	errcode, errmsg, headers)
				1874	name = 'retry_proxy_' + self.type + '_basic_auth'
				1875	if data is None:
				1876	return getattr(self,name)(url, realm)
				1877	else:
				1878	return getattr(self,name)(url, realm, data)
				1879
				1880	def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1881	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1882	newurl = 'http://' + host + selector
				1883	proxy = self.proxies['http']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1884	urltype, proxyhost = splittype(proxy)
				1885	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1886	i = proxyhost.find('@') + 1
				1887	proxyhost = proxyhost[i:]
				1888	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1889	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1890	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1891	quote(passwd, safe=''), proxyhost)
				1892	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1893	if data is None:
				1894	return self.open(newurl)
				1895	else:
				1896	return self.open(newurl, data)
				1897
				1898	def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1899	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1900	newurl = 'https://' + host + selector
				1901	proxy = self.proxies['https']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1902	urltype, proxyhost = splittype(proxy)
				1903	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1904	i = proxyhost.find('@') + 1
				1905	proxyhost = proxyhost[i:]
				1906	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1907	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1908	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1909	quote(passwd, safe=''), proxyhost)
				1910	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1911	if data is None:
				1912	return self.open(newurl)
				1913	else:
				1914	return self.open(newurl, data)
				1915
				1916	def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1917	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1918	i = host.find('@') + 1
				1919	host = host[i:]
				1920	user, passwd = self.get_user_passwd(host, realm, i)
				1921	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1922	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1923	quote(passwd, safe=''), host)
				1924	newurl = 'http://' + host + selector
				1925	if data is None:
				1926	return self.open(newurl)
				1927	else:
				1928	return self.open(newurl, data)
				1929
				1930	def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1931	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1932	i = host.find('@') + 1
				1933	host = host[i:]
				1934	user, passwd = self.get_user_passwd(host, realm, i)
				1935	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1936	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1937	quote(passwd, safe=''), host)
				1938	newurl = 'https://' + host + selector
				1939	if data is None:
				1940	return self.open(newurl)
				1941	else:
				1942	return self.open(newurl, data)
				1943
				1944	def get_user_passwd(self, host, realm, clear_cache = 0):
				1945	key = realm + '@' + host.lower()
				1946	if key in self.auth_cache:
				1947	if clear_cache:
				1948	del self.auth_cache[key]
				1949	else:
				1950	return self.auth_cache[key]
				1951	user, passwd = self.prompt_user_passwd(host, realm)
				1952	if user or passwd: self.auth_cache[key] = (user, passwd)
				1953	return user, passwd
				1954
				1955	def prompt_user_passwd(self, host, realm):
				1956	"""Override this in a GUI environment!"""
				1957	import getpass
				1958	try:
				1959	user = input("Enter username for %s at %s: " % (realm, host))
				1960	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1961	(user, realm, host))
				1962	return user, passwd
				1963	except KeyboardInterrupt:
				1964	print()
				1965	return None, None
				1966
				1967
				1968	# Utility functions
				1969
				1970	_localhost = None
				1971	def localhost():
				1972	"""Return the IP address of the magic hostname 'localhost'."""
				1973	global _localhost
				1974	if _localhost is None:
				1975	_localhost = socket.gethostbyname('localhost')
				1976	return _localhost
				1977
				1978	_thishost = None
				1979	def thishost():
				1980	"""Return the IP address of the current host."""
				1981	global _thishost
				1982	if _thishost is None:
				1983	_thishost = socket.gethostbyname(socket.gethostname())
				1984	return _thishost
				1985
				1986	_ftperrors = None
				1987	def ftperrors():
				1988	"""Return the set of errors raised by the FTP class."""
				1989	global _ftperrors
				1990	if _ftperrors is None:
				1991	import ftplib
				1992	_ftperrors = ftplib.all_errors
				1993	return _ftperrors
				1994
				1995	_noheaders = None
				1996	def noheaders():
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	1997	"""Return an empty email Message object."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1998	global _noheaders
				1999	if _noheaders is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	2000	_noheaders = email.message_from_string("")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2001	return _noheaders
				2002
				2003
				2004	# Utility classes
				2005
				2006	class ftpwrapper:
				2007	"""Class used by open_ftp() for cache of open FTP connections."""
				2008
				2009	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2010	self.user = user
				2011	self.passwd = passwd
				2012	self.host = host
				2013	self.port = port
				2014	self.dirs = dirs
				2015	self.timeout = timeout
				2016	self.init()
				2017
				2018	def init(self):
				2019	import ftplib
				2020	self.busy = 0
				2021	self.ftp = ftplib.FTP()
				2022	self.ftp.connect(self.host, self.port, self.timeout)
				2023	self.ftp.login(self.user, self.passwd)
				2024	for dir in self.dirs:
				2025	self.ftp.cwd(dir)
				2026
				2027	def retrfile(self, file, type):
				2028	import ftplib
				2029	self.endtransfer()
				2030	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2031	else: cmd = 'TYPE ' + type; isdir = 0
				2032	try:
				2033	self.ftp.voidcmd(cmd)
				2034	except ftplib.all_errors:
				2035	self.init()
				2036	self.ftp.voidcmd(cmd)
				2037	conn = None
				2038	if file and not isdir:
				2039	# Try to retrieve as a file
				2040	try:
				2041	cmd = 'RETR ' + file
				2042	conn = self.ftp.ntransfercmd(cmd)
				2043	except ftplib.error_perm as reason:
				2044	if str(reason)[:3] != '550':
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	2045	raise URLError('ftp error', reason).with_traceback(
				2046	sys.exc_info()[2])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2047	if not conn:
				2048	# Set transfer mode to ASCII!
				2049	self.ftp.voidcmd('TYPE A')
				2050	# Try a directory listing. Verify that directory exists.
				2051	if file:
				2052	pwd = self.ftp.pwd()
				2053	try:
				2054	try:
				2055	self.ftp.cwd(file)
				2056	except ftplib.error_perm as reason:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	2057	raise URLError('ftp error', reason) from reason
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2058	finally:
				2059	self.ftp.cwd(pwd)
				2060	cmd = 'LIST ' + file
				2061	else:
				2062	cmd = 'LIST'
				2063	conn = self.ftp.ntransfercmd(cmd)
				2064	self.busy = 1
				2065	# Pass back both a suitably decorated object and a retrieval length
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	2066	return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2067	def endtransfer(self):
				2068	if not self.busy:
				2069	return
				2070	self.busy = 0
				2071	try:
				2072	self.ftp.voidresp()
				2073	except ftperrors():
				2074	pass
				2075
				2076	def close(self):
				2077	self.endtransfer()
				2078	try:
				2079	self.ftp.close()
				2080	except ftperrors():
				2081	pass
				2082
				2083	# Proxy handling
				2084	def getproxies_environment():
				2085	"""Return a dictionary of scheme -> proxy server URL mappings.
				2086
				2087	Scan the environment for variables named <scheme>_proxy;
				2088	this seems to be the standard convention. If you need a
				2089	different way, you can pass a proxies dictionary to the
				2090	[Fancy]URLopener constructor.
				2091
				2092	"""
				2093	proxies = {}
				2094	for name, value in os.environ.items():
				2095	name = name.lower()
				2096	if name == 'no_proxy':
				2097	# handled in proxy_bypass_environment
				2098	continue
				2099	if value and name[-6:] == '_proxy':
				2100	proxies[name[:-6]] = value
				2101	return proxies
				2102
				2103	def proxy_bypass_environment(host):
				2104	"""Test if proxies should not be used for a particular host.
				2105
				2106	Checks the environment for a variable named no_proxy, which should
				2107	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2108	"""
				2109	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2110	# '*' is special case for always bypass
				2111	if no_proxy == '*':
				2112	return 1
				2113	# strip port off host
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	2114	hostonly, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2115	# check if the host ends with any of the DNS suffixes
				2116	for name in no_proxy.split(','):
				2117	if name and (hostonly.endswith(name) or host.endswith(name)):
				2118	return 1
				2119	# otherwise, don't bypass
				2120	return 0
				2121
				2122
				2123	if sys.platform == 'darwin':
				2124	def getproxies_internetconfig():
				2125	"""Return a dictionary of scheme -> proxy server URL mappings.
				2126
				2127	By convention the mac uses Internet Config to store
				2128	proxies. An HTTP proxy, for instance, is stored under
				2129	the HttpProxy key.
				2130
				2131	"""
				2132	try:
				2133	import ic
				2134	except ImportError:
				2135	return {}
				2136
				2137	try:
				2138	config = ic.IC()
				2139	except ic.error:
				2140	return {}
				2141	proxies = {}
				2142	# HTTP:
				2143	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2144	try:
				2145	value = config['HTTPProxyHost']
				2146	except ic.error:
				2147	pass
				2148	else:
				2149	proxies['http'] = 'http://%s' % value
				2150	# FTP: XXX To be done.
				2151	# Gopher: XXX To be done.
				2152	return proxies
				2153
				2154	def proxy_bypass(host):
				2155	if getproxies_environment():
				2156	return proxy_bypass_environment(host)
				2157	else:
				2158	return 0
				2159
				2160	def getproxies():
				2161	return getproxies_environment() or getproxies_internetconfig()
				2162
				2163	elif os.name == 'nt':
				2164	def getproxies_registry():
				2165	"""Return a dictionary of scheme -> proxy server URL mappings.
				2166
				2167	Win32 uses the registry to store proxies.
				2168
				2169	"""
				2170	proxies = {}
				2171	try:
				2172	import _winreg
				2173	except ImportError:
				2174	# Std module, so should be around - but you never know!
				2175	return proxies
				2176	try:
				2177	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2178	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2179	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2180	'ProxyEnable')[0]
				2181	if proxyEnable:
				2182	# Returned as Unicode but problems if not converted to ASCII
				2183	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2184	'ProxyServer')[0])
				2185	if '=' in proxyServer:
				2186	# Per-protocol settings
				2187	for p in proxyServer.split(';'):
				2188	protocol, address = p.split('=', 1)
				2189	# See if address has a type:// prefix
				2190	import re
				2191	if not re.match('^([^/:]+)://', address):
				2192	address = '%s://%s' % (protocol, address)
				2193	proxies[protocol] = address
				2194	else:
				2195	# Use one setting for all protocols
				2196	if proxyServer[:5] == 'http:':
				2197	proxies['http'] = proxyServer
				2198	else:
				2199	proxies['http'] = 'http://%s' % proxyServer
				2200	proxies['ftp'] = 'ftp://%s' % proxyServer
				2201	internetSettings.Close()
				2202	except (WindowsError, ValueError, TypeError):
				2203	# Either registry key not found etc, or the value in an
				2204	# unexpected format.
				2205	# proxies already set up to be empty so nothing to do
				2206	pass
				2207	return proxies
				2208
				2209	def getproxies():
				2210	"""Return a dictionary of scheme -> proxy server URL mappings.
				2211
				2212	Returns settings gathered from the environment, if specified,
				2213	or the registry.
				2214
				2215	"""
				2216	return getproxies_environment() or getproxies_registry()
				2217
				2218	def proxy_bypass_registry(host):
				2219	try:
				2220	import _winreg
				2221	import re
				2222	except ImportError:
				2223	# Std modules, so should be around - but you never know!
				2224	return 0
				2225	try:
				2226	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2227	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2228	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2229	'ProxyEnable')[0]
				2230	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2231	'ProxyOverride')[0])
				2232	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2233	except WindowsError:
				2234	return 0
				2235	if not proxyEnable or not proxyOverride:
				2236	return 0
				2237	# try to make a host list from name and IP address.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame^]	2238	rawHost, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2239	host = [rawHost]
				2240	try:
				2241	addr = socket.gethostbyname(rawHost)
				2242	if addr != rawHost:
				2243	host.append(addr)
				2244	except socket.error:
				2245	pass
				2246	try:
				2247	fqdn = socket.getfqdn(rawHost)
				2248	if fqdn != rawHost:
				2249	host.append(fqdn)
				2250	except socket.error:
				2251	pass
				2252	# make a check value list from the registry entry: replace the
				2253	# '<local>' string by the localhost entry and the corresponding
				2254	# canonical entry.
				2255	proxyOverride = proxyOverride.split(';')
				2256	i = 0
				2257	while i < len(proxyOverride):
				2258	if proxyOverride[i] == '<local>':
				2259	proxyOverride[i:i+1] = ['localhost',
				2260	'127.0.0.1',
				2261	socket.gethostname(),
				2262	socket.gethostbyname(
				2263	socket.gethostname())]
				2264	i += 1
				2265	# print proxyOverride
				2266	# now check if we match one of the registry values.
				2267	for test in proxyOverride:
				2268	test = test.replace(".", r"\.") # mask dots
				2269	test = test.replace("", r".") # change glob sequence
				2270	test = test.replace("?", r".") # change glob char
				2271	for val in host:
				2272	# print "%s <--> %s" %( test, val )
				2273	if re.match(test, val, re.I):
				2274	return 1
				2275	return 0
				2276
				2277	def proxy_bypass(host):
				2278	"""Return a dictionary of scheme -> proxy server URL mappings.
				2279
				2280	Returns settings gathered from the environment, if specified,
				2281	or the registry.
				2282
				2283	"""
				2284	if getproxies_environment():
				2285	return proxy_bypass_environment(host)
				2286	else:
				2287	return proxy_bypass_registry(host)
				2288
				2289	else:
				2290	# By default use environment variables
				2291	getproxies = getproxies_environment
				2292	proxy_bypass = proxy_bypass_environment