Blame - Lib/urllib/request.py - platform/external/python/cpython2

blob: 428be4acef67350f233c770c66b4897217914f12 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	# Issues in merging urllib and urllib2:
				2	# 1. They both define a function named urlopen()
				3
				4	"""An extensible library for opening URLs using a variety of protocols
				5
				6	The simplest way to use this module is to call the urlopen function,
				7	which accepts a string containing a URL or a Request object (described
				8	below). It opens the URL and returns the results as file-like
				9	object; the returned object has some extra methods described below.
				10
				11	The OpenerDirector manages a collection of Handler objects that do
				12	all the actual work. Each Handler implements a particular protocol or
				13	option. The OpenerDirector is a composite object that invokes the
				14	Handlers needed to open the requested URL. For example, the
				15	HTTPHandler performs HTTP GET and POST requests and deals with
				16	non-error returns. The HTTPRedirectHandler automatically deals with
				17	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
				18	deals with digest authentication.
				19
				20	urlopen(url, data=None) -- Basic usage is the same as original
				21	urllib. pass the url and optionally data to post to an HTTP URL, and
				22	get a file-like object back. One difference is that you can also pass
				23	a Request instance instead of URL. Raises a URLError (subclass of
				24	IOError); for HTTP errors, raises an HTTPError, which can also be
				25	treated as a valid response.
				26
				27	build_opener -- Function that creates a new OpenerDirector instance.
				28	Will install the default handlers. Accepts one or more Handlers as
				29	arguments, either instances or Handler classes that it will
				30	instantiate. If one of the argument is a subclass of the default
				31	handler, the argument will be installed instead of the default.
				32
				33	install_opener -- Installs a new opener as the default opener.
				34
				35	objects of interest:
				36	OpenerDirector --
				37
				38	Request -- An object that encapsulates the state of a request. The
				39	state can be as simple as the URL. It can also include extra HTTP
				40	headers, e.g. a User-Agent.
				41
				42	BaseHandler --
				43
				44	internals:
				45	BaseHandler and parent
				46	_call_chain conventions
				47
				48	Example usage:
				49
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	50	import urllib.request
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	51
				52	# set up authentication info
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	53	authinfo = urllib.request.HTTPBasicAuthHandler()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	54	authinfo.add_password(realm='PDQ Application',
				55	uri='https://mahler:8092/site-updates.py',
				56	user='klem',
				57	passwd='geheim$parole')
				58
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	59	proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	60
				61	# build a new opener that adds authentication and caching FTP handlers
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	62	opener = urllib.request.build_opener(proxy_support, authinfo,
				63	urllib.request.CacheFTPHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	64
				65	# install it
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	66	urllib.request.install_opener(opener)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	67
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	68	f = urllib.request.urlopen('http://www.python.org/')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	69	"""
				70
				71	# XXX issues:
				72	# If an authentication error handler that tries to perform
				73	# authentication for some reason but fails, how should the error be
				74	# signalled? The client needs to know the HTTP error code. But if
				75	# the handler knows that the problem was, e.g., that it didn't know
				76	# that hash algo that requested in the challenge, it would be good to
				77	# pass that information along to the client, too.
				78	# ftp errors aren't handled cleanly
				79	# check digest against correct (i.e. non-apache) implementation
				80
				81	# Possible extensions:
				82	# complex proxies XXX not sure what exactly was meant by this
				83	# abstract factory for opener
				84
				85	import base64
				86	import email
				87	import hashlib
				88	import http.client
				89	import io
				90	import os
				91	import posixpath
				92	import random
				93	import re
				94	import socket
				95	import sys
				96	import time
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	97	import bisect
				98
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	99	from urllib.error import URLError, HTTPError, ContentTooShortError
				100	from urllib.parse import (
				101	urlparse, urlsplit, urljoin, unwrap, quote, unquote,
				102	splittype, splithost, splitport, splituser, splitpasswd,
				103	splitattr, splitquery, splitvalue, to_bytes)
				104	from urllib.response import addinfourl, addclosehook
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	105
				106	# check for SSL
				107	try:
				108	import ssl
				109	except:
				110	_have_ssl = False
				111	else:
				112	_have_ssl = True
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	113
				114	# used in User-Agent header sent
				115	__version__ = sys.version[:3]
				116
				117	_opener = None
				118	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				119	global _opener
				120	if _opener is None:
				121	_opener = build_opener()
				122	return _opener.open(url, data, timeout)
				123
				124	def install_opener(opener):
				125	global _opener
				126	_opener = opener
				127
				128	# TODO(jhylton): Make this work with the same global opener.
				129	_urlopener = None
				130	def urlretrieve(url, filename=None, reporthook=None, data=None):
				131	global _urlopener
				132	if not _urlopener:
				133	_urlopener = FancyURLopener()
				134	return _urlopener.retrieve(url, filename, reporthook, data)
				135
				136	def urlcleanup():
				137	if _urlopener:
				138	_urlopener.cleanup()
				139	global _opener
				140	if _opener:
				141	_opener = None
				142
				143	# copied from cookielib.py
				144	_cut_port_re = re.compile(r":\d+$")
				145	def request_host(request):
				146	"""Return request-host, as defined by RFC 2965.
				147
				148	Variation from RFC: returned value is lowercased, for convenient
				149	comparison.
				150
				151	"""
				152	url = request.get_full_url()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	153	host = urlparse(url)[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	154	if host == "":
				155	host = request.get_header("Host", "")
				156
				157	# remove port, if present
				158	host = _cut_port_re.sub("", host, 1)
				159	return host.lower()
				160
				161	class Request:
				162
				163	def __init__(self, url, data=None, headers={},
				164	origin_req_host=None, unverifiable=False):
				165	# unwrap('<URL:type://host/path>') --> 'type://host/path'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	166	self.__original = unwrap(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	167	self.type = None
				168	# self.__r_type is what's left after doing the splittype
				169	self.host = None
				170	self.port = None
				171	self.data = data
				172	self.headers = {}
				173	for key, value in headers.items():
				174	self.add_header(key, value)
				175	self.unredirected_hdrs = {}
				176	if origin_req_host is None:
				177	origin_req_host = request_host(self)
				178	self.origin_req_host = origin_req_host
				179	self.unverifiable = unverifiable
				180
				181	def __getattr__(self, attr):
				182	# XXX this is a fallback mechanism to guard against these
				183	# methods getting called in a non-standard order. this may be
				184	# too complicated and/or unnecessary.
				185	# XXX should the __r_XXX attributes be public?
				186	if attr[:12] == '_Request__r_':
				187	name = attr[12:]
				188	if hasattr(Request, 'get_' + name):
				189	getattr(self, 'get_' + name)()
				190	return getattr(self, attr)
				191	raise AttributeError(attr)
				192
				193	def get_method(self):
				194	if self.has_data():
				195	return "POST"
				196	else:
				197	return "GET"
				198
				199	# XXX these helper methods are lame
				200
				201	def add_data(self, data):
				202	self.data = data
				203
				204	def has_data(self):
				205	return self.data is not None
				206
				207	def get_data(self):
				208	return self.data
				209
				210	def get_full_url(self):
				211	return self.__original
				212
				213	def get_type(self):
				214	if self.type is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	215	self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	216	if self.type is None:
				217	raise ValueError("unknown url type: %s" % self.__original)
				218	return self.type
				219
				220	def get_host(self):
				221	if self.host is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	222	self.host, self.__r_host = splithost(self.__r_type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	223	if self.host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	224	self.host = unquote(self.host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	225	return self.host
				226
				227	def get_selector(self):
				228	return self.__r_host
				229
				230	def set_proxy(self, host, type):
				231	self.host, self.type = host, type
				232	self.__r_host = self.__original
				233
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame^]	234	def has_proxy(self):
				235	return self.__r_host == self.__original
				236
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	237	def get_origin_req_host(self):
				238	return self.origin_req_host
				239
				240	def is_unverifiable(self):
				241	return self.unverifiable
				242
				243	def add_header(self, key, val):
				244	# useful for something like authentication
				245	self.headers[key.capitalize()] = val
				246
				247	def add_unredirected_header(self, key, val):
				248	# will not be added to a redirected request
				249	self.unredirected_hdrs[key.capitalize()] = val
				250
				251	def has_header(self, header_name):
				252	return (header_name in self.headers or
				253	header_name in self.unredirected_hdrs)
				254
				255	def get_header(self, header_name, default=None):
				256	return self.headers.get(
				257	header_name,
				258	self.unredirected_hdrs.get(header_name, default))
				259
				260	def header_items(self):
				261	hdrs = self.unredirected_hdrs.copy()
				262	hdrs.update(self.headers)
				263	return list(hdrs.items())
				264
				265	class OpenerDirector:
				266	def __init__(self):
				267	client_version = "Python-urllib/%s" % __version__
				268	self.addheaders = [('User-agent', client_version)]
				269	# manage the individual handlers
				270	self.handlers = []
				271	self.handle_open = {}
				272	self.handle_error = {}
				273	self.process_response = {}
				274	self.process_request = {}
				275
				276	def add_handler(self, handler):
				277	if not hasattr(handler, "add_parent"):
				278	raise TypeError("expected BaseHandler instance, got %r" %
				279	type(handler))
				280
				281	added = False
				282	for meth in dir(handler):
				283	if meth in ["redirect_request", "do_open", "proxy_open"]:
				284	# oops, coincidental match
				285	continue
				286
				287	i = meth.find("_")
				288	protocol = meth[:i]
				289	condition = meth[i+1:]
				290
				291	if condition.startswith("error"):
				292	j = condition.find("_") + i + 1
				293	kind = meth[j+1:]
				294	try:
				295	kind = int(kind)
				296	except ValueError:
				297	pass
				298	lookup = self.handle_error.get(protocol, {})
				299	self.handle_error[protocol] = lookup
				300	elif condition == "open":
				301	kind = protocol
				302	lookup = self.handle_open
				303	elif condition == "response":
				304	kind = protocol
				305	lookup = self.process_response
				306	elif condition == "request":
				307	kind = protocol
				308	lookup = self.process_request
				309	else:
				310	continue
				311
				312	handlers = lookup.setdefault(kind, [])
				313	if handlers:
				314	bisect.insort(handlers, handler)
				315	else:
				316	handlers.append(handler)
				317	added = True
				318
				319	if added:
				320	# the handlers must work in an specific order, the order
				321	# is specified in a Handler attribute
				322	bisect.insort(self.handlers, handler)
				323	handler.add_parent(self)
				324
				325	def close(self):
				326	# Only exists for backwards compatibility.
				327	pass
				328
				329	def _call_chain(self, chain, kind, meth_name, *args):
				330	# Handlers raise an exception if no one else should try to handle
				331	# the request, or return None if they can't but another handler
				332	# could. Otherwise, they return the response.
				333	handlers = chain.get(kind, ())
				334	for handler in handlers:
				335	func = getattr(handler, meth_name)
				336
				337	result = func(*args)
				338	if result is not None:
				339	return result
				340
				341	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
				342	# accept a URL or a Request object
				343	if isinstance(fullurl, str):
				344	req = Request(fullurl, data)
				345	else:
				346	req = fullurl
				347	if data is not None:
				348	req.add_data(data)
				349
				350	req.timeout = timeout
				351	protocol = req.get_type()
				352
				353	# pre-process request
				354	meth_name = protocol+"_request"
				355	for processor in self.process_request.get(protocol, []):
				356	meth = getattr(processor, meth_name)
				357	req = meth(req)
				358
				359	response = self._open(req, data)
				360
				361	# post-process response
				362	meth_name = protocol+"_response"
				363	for processor in self.process_response.get(protocol, []):
				364	meth = getattr(processor, meth_name)
				365	response = meth(req, response)
				366
				367	return response
				368
				369	def _open(self, req, data=None):
				370	result = self._call_chain(self.handle_open, 'default',
				371	'default_open', req)
				372	if result:
				373	return result
				374
				375	protocol = req.get_type()
				376	result = self._call_chain(self.handle_open, protocol, protocol +
				377	'_open', req)
				378	if result:
				379	return result
				380
				381	return self._call_chain(self.handle_open, 'unknown',
				382	'unknown_open', req)
				383
				384	def error(self, proto, *args):
				385	if proto in ('http', 'https'):
				386	# XXX http[s] protocols are special-cased
				387	dict = self.handle_error['http'] # https is not different than http
				388	proto = args[2] # YUCK!
				389	meth_name = 'http_error_%s' % proto
				390	http_err = 1
				391	orig_args = args
				392	else:
				393	dict = self.handle_error
				394	meth_name = proto + '_error'
				395	http_err = 0
				396	args = (dict, proto, meth_name) + args
				397	result = self._call_chain(*args)
				398	if result:
				399	return result
				400
				401	if http_err:
				402	args = (dict, 'default', 'http_error_default') + orig_args
				403	return self._call_chain(*args)
				404
				405	# XXX probably also want an abstract factory that knows when it makes
				406	# sense to skip a superclass in favor of a subclass and when it might
				407	# make sense to include both
				408
				409	def build_opener(*handlers):
				410	"""Create an opener object from a list of handlers.
				411
				412	The opener will use several default handlers, including support
				413	for HTTP and FTP.
				414
				415	If any of the handlers passed as arguments are subclasses of the
				416	default handlers, the default handlers will not be used.
				417	"""
				418	def isclass(obj):
				419	return isinstance(obj, type) or hasattr(obj, "__bases__")
				420
				421	opener = OpenerDirector()
				422	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
				423	HTTPDefaultErrorHandler, HTTPRedirectHandler,
				424	FTPHandler, FileHandler, HTTPErrorProcessor]
				425	if hasattr(http.client, "HTTPSConnection"):
				426	default_classes.append(HTTPSHandler)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	427	skip = set()
				428	for klass in default_classes:
				429	for check in handlers:
				430	if isclass(check):
				431	if issubclass(check, klass):
				432	skip.add(klass)
				433	elif isinstance(check, klass):
				434	skip.add(klass)
				435	for klass in skip:
				436	default_classes.remove(klass)
				437
				438	for klass in default_classes:
				439	opener.add_handler(klass())
				440
				441	for h in handlers:
				442	if isclass(h):
				443	h = h()
				444	opener.add_handler(h)
				445	return opener
				446
				447	class BaseHandler:
				448	handler_order = 500
				449
				450	def add_parent(self, parent):
				451	self.parent = parent
				452
				453	def close(self):
				454	# Only exists for backwards compatibility
				455	pass
				456
				457	def __lt__(self, other):
				458	if not hasattr(other, "handler_order"):
				459	# Try to preserve the old behavior of having custom classes
				460	# inserted after default ones (works only for custom user
				461	# classes which are not aware of handler_order).
				462	return True
				463	return self.handler_order < other.handler_order
				464
				465
				466	class HTTPErrorProcessor(BaseHandler):
				467	"""Process HTTP error responses."""
				468	handler_order = 1000 # after all other processing
				469
				470	def http_response(self, request, response):
				471	code, msg, hdrs = response.code, response.msg, response.info()
				472
				473	# According to RFC 2616, "2xx" code indicates that the client's
				474	# request was successfully received, understood, and accepted.
				475	if not (200 <= code < 300):
				476	response = self.parent.error(
				477	'http', request, response, code, msg, hdrs)
				478
				479	return response
				480
				481	https_response = http_response
				482
				483	class HTTPDefaultErrorHandler(BaseHandler):
				484	def http_error_default(self, req, fp, code, msg, hdrs):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	485	raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	486
				487	class HTTPRedirectHandler(BaseHandler):
				488	# maximum number of redirections to any single URL
				489	# this is needed because of the state that cookies introduce
				490	max_repeats = 4
				491	# maximum total number of redirections (regardless of URL) before
				492	# assuming we're in a loop
				493	max_redirections = 10
				494
				495	def redirect_request(self, req, fp, code, msg, headers, newurl):
				496	"""Return a Request or None in response to a redirect.
				497
				498	This is called by the http_error_30x methods when a
				499	redirection response is received. If a redirection should
				500	take place, return a new Request to allow http_error_30x to
				501	perform the redirect. Otherwise, raise HTTPError if no-one
				502	else should try to handle this url. Return None if you can't
				503	but another Handler might.
				504	"""
				505	m = req.get_method()
				506	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
				507	or code in (301, 302, 303) and m == "POST")):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	508	raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	509
				510	# Strictly (according to RFC 2616), 301 or 302 in response to
				511	# a POST MUST NOT cause a redirection without confirmation
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	512	# from the user (of urllib.request, in this case). In practice,
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	513	# essentially all clients do redirect in this case, so we do
				514	# the same.
				515	# be conciliant with URIs containing a space
				516	newurl = newurl.replace(' ', '%20')
				517	CONTENT_HEADERS = ("content-length", "content-type")
				518	newheaders = dict((k, v) for k, v in req.headers.items()
				519	if k.lower() not in CONTENT_HEADERS)
				520	return Request(newurl,
				521	headers=newheaders,
				522	origin_req_host=req.get_origin_req_host(),
				523	unverifiable=True)
				524
				525	# Implementation note: To avoid the server sending us into an
				526	# infinite loop, the request object needs to track what URLs we
				527	# have already seen. Do this by adding a handler-specific
				528	# attribute to the Request object.
				529	def http_error_302(self, req, fp, code, msg, headers):
				530	# Some servers (incorrectly) return multiple Location headers
				531	# (so probably same goes for URI). Use first header.
				532	if "location" in headers:
				533	newurl = headers["location"]
				534	elif "uri" in headers:
				535	newurl = headers["uri"]
				536	else:
				537	return
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	538	newurl = urljoin(req.get_full_url(), newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	539
				540	# XXX Probably want to forget about the state of the current
				541	# request, although that might interact poorly with other
				542	# handlers that also use handler-specific request attributes
				543	new = self.redirect_request(req, fp, code, msg, headers, newurl)
				544	if new is None:
				545	return
				546
				547	# loop detection
				548	# .redirect_dict has a key url if url was previously visited.
				549	if hasattr(req, 'redirect_dict'):
				550	visited = new.redirect_dict = req.redirect_dict
				551	if (visited.get(newurl, 0) >= self.max_repeats or
				552	len(visited) >= self.max_redirections):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	553	raise HTTPError(req.get_full_url(), code,
				554	self.inf_msg + msg, headers, fp)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	555	else:
				556	visited = new.redirect_dict = req.redirect_dict = {}
				557	visited[newurl] = visited.get(newurl, 0) + 1
				558
				559	# Don't close the fp until we are sure that we won't use it
				560	# with HTTPError.
				561	fp.read()
				562	fp.close()
				563
				564	return self.parent.open(new)
				565
				566	http_error_301 = http_error_303 = http_error_307 = http_error_302
				567
				568	inf_msg = "The HTTP server returned a redirect error that would " \
				569	"lead to an infinite loop.\n" \
				570	"The last 30x error message was:\n"
				571
				572
				573	def _parse_proxy(proxy):
				574	"""Return (scheme, user, password, host/port) given a URL or an authority.
				575
				576	If a URL is supplied, it must have an authority (host:port) component.
				577	According to RFC 3986, having an authority component means the URL must
				578	have two slashes after the scheme:
				579
				580	>>> _parse_proxy('file:/ftp.example.com/')
				581	Traceback (most recent call last):
				582	ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
				583
				584	The first three items of the returned tuple may be None.
				585
				586	Examples of authority parsing:
				587
				588	>>> _parse_proxy('proxy.example.com')
				589	(None, None, None, 'proxy.example.com')
				590	>>> _parse_proxy('proxy.example.com:3128')
				591	(None, None, None, 'proxy.example.com:3128')
				592
				593	The authority component may optionally include userinfo (assumed to be
				594	username:password):
				595
				596	>>> _parse_proxy('joe:password@proxy.example.com')
				597	(None, 'joe', 'password', 'proxy.example.com')
				598	>>> _parse_proxy('joe:password@proxy.example.com:3128')
				599	(None, 'joe', 'password', 'proxy.example.com:3128')
				600
				601	Same examples, but with URLs instead:
				602
				603	>>> _parse_proxy('http://proxy.example.com/')
				604	('http', None, None, 'proxy.example.com')
				605	>>> _parse_proxy('http://proxy.example.com:3128/')
				606	('http', None, None, 'proxy.example.com:3128')
				607	>>> _parse_proxy('http://joe:password@proxy.example.com/')
				608	('http', 'joe', 'password', 'proxy.example.com')
				609	>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
				610	('http', 'joe', 'password', 'proxy.example.com:3128')
				611
				612	Everything after the authority is ignored:
				613
				614	>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
				615	('ftp', 'joe', 'password', 'proxy.example.com')
				616
				617	Test for no trailing '/' case:
				618
				619	>>> _parse_proxy('http://joe:password@proxy.example.com')
				620	('http', 'joe', 'password', 'proxy.example.com')
				621
				622	"""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	623	scheme, r_scheme = splittype(proxy)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	624	if not r_scheme.startswith("/"):
				625	# authority
				626	scheme = None
				627	authority = proxy
				628	else:
				629	# URL
				630	if not r_scheme.startswith("//"):
				631	raise ValueError("proxy URL with no authority: %r" % proxy)
				632	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
				633	# and 3.3.), path is empty or starts with '/'
				634	end = r_scheme.find("/", 2)
				635	if end == -1:
				636	end = None
				637	authority = r_scheme[2:end]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	638	userinfo, hostport = splituser(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	639	if userinfo is not None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	640	user, password = splitpasswd(userinfo)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	641	else:
				642	user = password = None
				643	return scheme, user, password, hostport
				644
				645	class ProxyHandler(BaseHandler):
				646	# Proxies must be in front
				647	handler_order = 100
				648
				649	def __init__(self, proxies=None):
				650	if proxies is None:
				651	proxies = getproxies()
				652	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				653	self.proxies = proxies
				654	for type, url in proxies.items():
				655	setattr(self, '%s_open' % type,
				656	lambda r, proxy=url, type=type, meth=self.proxy_open: \
				657	meth(r, proxy, type))
				658
				659	def proxy_open(self, req, proxy, type):
				660	orig_type = req.get_type()
				661	proxy_type, user, password, hostport = _parse_proxy(proxy)
				662	if proxy_type is None:
				663	proxy_type = orig_type
				664	if user and password:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	665	user_pass = '%s:%s' % (unquote(user),
				666	unquote(password))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	667	creds = base64.b64encode(user_pass.encode()).decode("ascii")
				668	req.add_header('Proxy-authorization', 'Basic ' + creds)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	669	hostport = unquote(hostport)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	670	req.set_proxy(hostport, proxy_type)
				671	if orig_type == proxy_type:
				672	# let other handlers take care of it
				673	return None
				674	else:
				675	# need to start over, because the other handlers don't
				676	# grok the proxy's URL type
				677	# e.g. if we have a constructor arg proxies like so:
				678	# {'http': 'ftp://proxy.example.com'}, we may end up turning
				679	# a request for http://acme.example.com/a into one for
				680	# ftp://proxy.example.com/a
				681	return self.parent.open(req)
				682
				683	class HTTPPasswordMgr:
				684
				685	def __init__(self):
				686	self.passwd = {}
				687
				688	def add_password(self, realm, uri, user, passwd):
				689	# uri could be a single URI or a sequence
				690	if isinstance(uri, str):
				691	uri = [uri]
				692	if not realm in self.passwd:
				693	self.passwd[realm] = {}
				694	for default_port in True, False:
				695	reduced_uri = tuple(
				696	[self.reduce_uri(u, default_port) for u in uri])
				697	self.passwd[realm][reduced_uri] = (user, passwd)
				698
				699	def find_user_password(self, realm, authuri):
				700	domains = self.passwd.get(realm, {})
				701	for default_port in True, False:
				702	reduced_authuri = self.reduce_uri(authuri, default_port)
				703	for uris, authinfo in domains.items():
				704	for uri in uris:
				705	if self.is_suburi(uri, reduced_authuri):
				706	return authinfo
				707	return None, None
				708
				709	def reduce_uri(self, uri, default_port=True):
				710	"""Accept authority or URI and extract only the authority and path."""
				711	# note HTTP URLs do not have a userinfo component
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	712	parts = urlsplit(uri)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	713	if parts[1]:
				714	# URI
				715	scheme = parts[0]
				716	authority = parts[1]
				717	path = parts[2] or '/'
				718	else:
				719	# host or host:port
				720	scheme = None
				721	authority = uri
				722	path = '/'
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	723	host, port = splitport(authority)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	724	if default_port and port is None and scheme is not None:
				725	dport = {"http": 80,
				726	"https": 443,
				727	}.get(scheme)
				728	if dport is not None:
				729	authority = "%s:%d" % (host, dport)
				730	return authority, path
				731
				732	def is_suburi(self, base, test):
				733	"""Check if test is below base in a URI tree
				734
				735	Both args must be URIs in reduced form.
				736	"""
				737	if base == test:
				738	return True
				739	if base[0] != test[0]:
				740	return False
				741	common = posixpath.commonprefix((base[1], test[1]))
				742	if len(common) == len(base[1]):
				743	return True
				744	return False
				745
				746
				747	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
				748
				749	def find_user_password(self, realm, authuri):
				750	user, password = HTTPPasswordMgr.find_user_password(self, realm,
				751	authuri)
				752	if user is not None:
				753	return user, password
				754	return HTTPPasswordMgr.find_user_password(self, None, authuri)
				755
				756
				757	class AbstractBasicAuthHandler:
				758
				759	# XXX this allows for multiple auth-schemes, but will stupidly pick
				760	# the last one with a realm specified.
				761
				762	# allow for double- and single-quoted realm values
				763	# (single quotes are a violation of the RFC, but appear in the wild)
				764	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
				765	'realm=(["\'])(.*?)\\2', re.I)
				766
				767	# XXX could pre-emptively send auth info already accepted (RFC 2617,
				768	# end of section 2, and section 1.2 immediately after "credentials"
				769	# production).
				770
				771	def __init__(self, password_mgr=None):
				772	if password_mgr is None:
				773	password_mgr = HTTPPasswordMgr()
				774	self.passwd = password_mgr
				775	self.add_password = self.passwd.add_password
				776
				777	def http_error_auth_reqed(self, authreq, host, req, headers):
				778	# host may be an authority (without userinfo) or a URL with an
				779	# authority
				780	# XXX could be multiple headers
				781	authreq = headers.get(authreq, None)
				782	if authreq:
				783	mo = AbstractBasicAuthHandler.rx.search(authreq)
				784	if mo:
				785	scheme, quote, realm = mo.groups()
				786	if scheme.lower() == 'basic':
				787	return self.retry_http_basic_auth(host, req, realm)
				788
				789	def retry_http_basic_auth(self, host, req, realm):
				790	user, pw = self.passwd.find_user_password(realm, host)
				791	if pw is not None:
				792	raw = "%s:%s" % (user, pw)
				793	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
				794	if req.headers.get(self.auth_header, None) == auth:
				795	return None
				796	req.add_header(self.auth_header, auth)
				797	return self.parent.open(req)
				798	else:
				799	return None
				800
				801
				802	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				803
				804	auth_header = 'Authorization'
				805
				806	def http_error_401(self, req, fp, code, msg, headers):
				807	url = req.get_full_url()
				808	return self.http_error_auth_reqed('www-authenticate',
				809	url, req, headers)
				810
				811
				812	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
				813
				814	auth_header = 'Proxy-authorization'
				815
				816	def http_error_407(self, req, fp, code, msg, headers):
				817	# http_error_auth_reqed requires that there is no userinfo component in
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	818	# authority. Assume there isn't one, since urllib.request does not (and
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	819	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
				820	# userinfo.
				821	authority = req.get_host()
				822	return self.http_error_auth_reqed('proxy-authenticate',
				823	authority, req, headers)
				824
				825
				826	def randombytes(n):
				827	"""Return n random bytes."""
				828	return os.urandom(n)
				829
				830	class AbstractDigestAuthHandler:
				831	# Digest authentication is specified in RFC 2617.
				832
				833	# XXX The client does not inspect the Authentication-Info header
				834	# in a successful response.
				835
				836	# XXX It should be possible to test this implementation against
				837	# a mock server that just generates a static set of challenges.
				838
				839	# XXX qop="auth-int" supports is shaky
				840
				841	def __init__(self, passwd=None):
				842	if passwd is None:
				843	passwd = HTTPPasswordMgr()
				844	self.passwd = passwd
				845	self.add_password = self.passwd.add_password
				846	self.retried = 0
				847	self.nonce_count = 0
				848
				849	def reset_retry_count(self):
				850	self.retried = 0
				851
				852	def http_error_auth_reqed(self, auth_header, host, req, headers):
				853	authreq = headers.get(auth_header, None)
				854	if self.retried > 5:
				855	# Don't fail endlessly - if we failed once, we'll probably
				856	# fail a second time. Hm. Unless the Password Manager is
				857	# prompting for the information. Crap. This isn't great
				858	# but it's better than the current 'repeat until recursion
				859	# depth exceeded' approach <wink>
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	860	raise HTTPError(req.get_full_url(), 401, "digest auth failed",
				861	headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	862	else:
				863	self.retried += 1
				864	if authreq:
				865	scheme = authreq.split()[0]
				866	if scheme.lower() == 'digest':
				867	return self.retry_http_digest_auth(req, authreq)
				868
				869	def retry_http_digest_auth(self, req, auth):
				870	token, challenge = auth.split(' ', 1)
				871	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
				872	auth = self.get_authorization(req, chal)
				873	if auth:
				874	auth_val = 'Digest %s' % auth
				875	if req.headers.get(self.auth_header, None) == auth_val:
				876	return None
				877	req.add_unredirected_header(self.auth_header, auth_val)
				878	resp = self.parent.open(req)
				879	return resp
				880
				881	def get_cnonce(self, nonce):
				882	# The cnonce-value is an opaque
				883	# quoted string value provided by the client and used by both client
				884	# and server to avoid chosen plaintext attacks, to provide mutual
				885	# authentication, and to provide some message integrity protection.
				886	# This isn't a fabulous effort, but it's probably Good Enough.
				887	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
				888	b = s.encode("ascii") + randombytes(8)
				889	dig = hashlib.sha1(b).hexdigest()
				890	return dig[:16]
				891
				892	def get_authorization(self, req, chal):
				893	try:
				894	realm = chal['realm']
				895	nonce = chal['nonce']
				896	qop = chal.get('qop')
				897	algorithm = chal.get('algorithm', 'MD5')
				898	# mod_digest doesn't send an opaque, even though it isn't
				899	# supposed to be optional
				900	opaque = chal.get('opaque', None)
				901	except KeyError:
				902	return None
				903
				904	H, KD = self.get_algorithm_impls(algorithm)
				905	if H is None:
				906	return None
				907
				908	user, pw = self.passwd.find_user_password(realm, req.get_full_url())
				909	if user is None:
				910	return None
				911
				912	# XXX not implemented yet
				913	if req.has_data():
				914	entdig = self.get_entity_digest(req.get_data(), chal)
				915	else:
				916	entdig = None
				917
				918	A1 = "%s:%s:%s" % (user, realm, pw)
				919	A2 = "%s:%s" % (req.get_method(),
				920	# XXX selector: what about proxies and full urls
				921	req.get_selector())
				922	if qop == 'auth':
				923	self.nonce_count += 1
				924	ncvalue = '%08x' % self.nonce_count
				925	cnonce = self.get_cnonce(nonce)
				926	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
				927	respdig = KD(H(A1), noncebit)
				928	elif qop is None:
				929	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
				930	else:
				931	# XXX handle auth-int.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	932	raise URLError("qop '%s' is not supported." % qop)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	933
				934	# XXX should the partial digests be encoded too?
				935
				936	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
				937	'response="%s"' % (user, realm, nonce, req.get_selector(),
				938	respdig)
				939	if opaque:
				940	base += ', opaque="%s"' % opaque
				941	if entdig:
				942	base += ', digest="%s"' % entdig
				943	base += ', algorithm="%s"' % algorithm
				944	if qop:
				945	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
				946	return base
				947
				948	def get_algorithm_impls(self, algorithm):
				949	# lambdas assume digest modules are imported at the top level
				950	if algorithm == 'MD5':
				951	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
				952	elif algorithm == 'SHA':
				953	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
				954	# XXX MD5-sess
				955	KD = lambda s, d: H("%s:%s" % (s, d))
				956	return H, KD
				957
				958	def get_entity_digest(self, data, chal):
				959	# XXX not implemented yet
				960	return None
				961
				962
				963	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				964	"""An authentication protocol defined by RFC 2069
				965
				966	Digest authentication improves on basic authentication because it
				967	does not transmit passwords in the clear.
				968	"""
				969
				970	auth_header = 'Authorization'
				971	handler_order = 490 # before Basic auth
				972
				973	def http_error_401(self, req, fp, code, msg, headers):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	974	host = urlparse(req.get_full_url())[1]
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	975	retry = self.http_error_auth_reqed('www-authenticate',
				976	host, req, headers)
				977	self.reset_retry_count()
				978	return retry
				979
				980
				981	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
				982
				983	auth_header = 'Proxy-Authorization'
				984	handler_order = 490 # before Basic auth
				985
				986	def http_error_407(self, req, fp, code, msg, headers):
				987	host = req.get_host()
				988	retry = self.http_error_auth_reqed('proxy-authenticate',
				989	host, req, headers)
				990	self.reset_retry_count()
				991	return retry
				992
				993	class AbstractHTTPHandler(BaseHandler):
				994
				995	def __init__(self, debuglevel=0):
				996	self._debuglevel = debuglevel
				997
				998	def set_http_debuglevel(self, level):
				999	self._debuglevel = level
				1000
				1001	def do_request_(self, request):
				1002	host = request.get_host()
				1003	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1004	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1005
				1006	if request.has_data(): # POST
				1007	data = request.get_data()
				1008	if not request.has_header('Content-type'):
				1009	request.add_unredirected_header(
				1010	'Content-type',
				1011	'application/x-www-form-urlencoded')
				1012	if not request.has_header('Content-length'):
				1013	request.add_unredirected_header(
				1014	'Content-length', '%d' % len(data))
				1015
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame^]	1016	sel_host = host
				1017	if request.has_proxy():
				1018	scheme, sel = splittype(request.get_selector())
				1019	sel_host, sel_path = splithost(sel)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1020	if not request.has_header('Host'):
Facundo Batista	72dc1ea	2008-08-16 14:44:32 +0000	[diff] [blame^]	1021	request.add_unredirected_header('Host', sel_host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1022	for name, value in self.parent.addheaders:
				1023	name = name.capitalize()
				1024	if not request.has_header(name):
				1025	request.add_unredirected_header(name, value)
				1026
				1027	return request
				1028
				1029	def do_open(self, http_class, req):
				1030	"""Return an addinfourl object for the request, using http_class.
				1031
				1032	http_class must implement the HTTPConnection API from http.client.
				1033	The addinfourl return value is a file-like object. It also
				1034	has methods and attributes including:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1035	- info(): return a email Message object for the headers
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1036	- geturl(): return the original request URL
				1037	- code: HTTP status code
				1038	"""
				1039	host = req.get_host()
				1040	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1041	raise URLError('no host given')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1042
				1043	h = http_class(host, timeout=req.timeout) # will parse host:port
				1044	headers = dict(req.headers)
				1045	headers.update(req.unredirected_hdrs)
				1046
				1047	# TODO(jhylton): Should this be redesigned to handle
				1048	# persistent connections?
				1049
				1050	# We want to make an HTTP/1.1 request, but the addinfourl
				1051	# class isn't prepared to deal with a persistent connection.
				1052	# It will try to read all remaining data from the socket,
				1053	# which will block while the server waits for the next request.
				1054	# So make sure the connection gets closed after the (only)
				1055	# request.
				1056	headers["Connection"] = "close"
				1057	headers = dict(
				1058	(name.title(), val) for name, val in headers.items())
				1059	try:
				1060	h.request(req.get_method(), req.get_selector(), req.data, headers)
				1061	r = h.getresponse()
				1062	except socket.error as err: # XXX what error?
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1063	raise URLError(err)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1064
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1065	resp = addinfourl(r.fp, r.msg, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1066	resp.code = r.status
				1067	resp.msg = r.reason
				1068	return resp
				1069
				1070
				1071	class HTTPHandler(AbstractHTTPHandler):
				1072
				1073	def http_open(self, req):
				1074	return self.do_open(http.client.HTTPConnection, req)
				1075
				1076	http_request = AbstractHTTPHandler.do_request_
				1077
				1078	if hasattr(http.client, 'HTTPSConnection'):
				1079	class HTTPSHandler(AbstractHTTPHandler):
				1080
				1081	def https_open(self, req):
				1082	return self.do_open(http.client.HTTPSConnection, req)
				1083
				1084	https_request = AbstractHTTPHandler.do_request_
				1085
				1086	class HTTPCookieProcessor(BaseHandler):
				1087	def __init__(self, cookiejar=None):
				1088	import http.cookiejar
				1089	if cookiejar is None:
				1090	cookiejar = http.cookiejar.CookieJar()
				1091	self.cookiejar = cookiejar
				1092
				1093	def http_request(self, request):
				1094	self.cookiejar.add_cookie_header(request)
				1095	return request
				1096
				1097	def http_response(self, request, response):
				1098	self.cookiejar.extract_cookies(response, request)
				1099	return response
				1100
				1101	https_request = http_request
				1102	https_response = http_response
				1103
				1104	class UnknownHandler(BaseHandler):
				1105	def unknown_open(self, req):
				1106	type = req.get_type()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1107	raise URLError('unknown url type: %s' % type)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1108
				1109	def parse_keqv_list(l):
				1110	"""Parse list of key=value strings where keys are not duplicated."""
				1111	parsed = {}
				1112	for elt in l:
				1113	k, v = elt.split('=', 1)
				1114	if v[0] == '"' and v[-1] == '"':
				1115	v = v[1:-1]
				1116	parsed[k] = v
				1117	return parsed
				1118
				1119	def parse_http_list(s):
				1120	"""Parse lists as described by RFC 2068 Section 2.
				1121
				1122	In particular, parse comma-separated lists where the elements of
				1123	the list may include quoted-strings. A quoted-string could
				1124	contain a comma. A non-quoted string could have quotes in the
				1125	middle. Neither commas nor quotes count if they are escaped.
				1126	Only double-quotes count, not single-quotes.
				1127	"""
				1128	res = []
				1129	part = ''
				1130
				1131	escape = quote = False
				1132	for cur in s:
				1133	if escape:
				1134	part += cur
				1135	escape = False
				1136	continue
				1137	if quote:
				1138	if cur == '\\':
				1139	escape = True
				1140	continue
				1141	elif cur == '"':
				1142	quote = False
				1143	part += cur
				1144	continue
				1145
				1146	if cur == ',':
				1147	res.append(part)
				1148	part = ''
				1149	continue
				1150
				1151	if cur == '"':
				1152	quote = True
				1153
				1154	part += cur
				1155
				1156	# append last part
				1157	if part:
				1158	res.append(part)
				1159
				1160	return [part.strip() for part in res]
				1161
				1162	class FileHandler(BaseHandler):
				1163	# Use local file or FTP depending on form of URL
				1164	def file_open(self, req):
				1165	url = req.get_selector()
				1166	if url[:2] == '//' and url[2:3] != '/':
				1167	req.type = 'ftp'
				1168	return self.parent.open(req)
				1169	else:
				1170	return self.open_local_file(req)
				1171
				1172	# names for the localhost
				1173	names = None
				1174	def get_names(self):
				1175	if FileHandler.names is None:
				1176	try:
				1177	FileHandler.names = (socket.gethostbyname('localhost'),
				1178	socket.gethostbyname(socket.gethostname()))
				1179	except socket.gaierror:
				1180	FileHandler.names = (socket.gethostbyname('localhost'),)
				1181	return FileHandler.names
				1182
				1183	# not entirely sure what the rules are here
				1184	def open_local_file(self, req):
				1185	import email.utils
				1186	import mimetypes
				1187	host = req.get_host()
				1188	file = req.get_selector()
				1189	localfile = url2pathname(file)
				1190	try:
				1191	stats = os.stat(localfile)
				1192	size = stats.st_size
				1193	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1194	mtype = mimetypes.guess_type(file)[0]
				1195	headers = email.message_from_string(
				1196	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
				1197	(mtype or 'text/plain', size, modified))
				1198	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1199	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1200	if not host or \
				1201	(not port and _safe_gethostbyname(host) in self.get_names()):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1202	return addinfourl(open(localfile, 'rb'), headers, 'file:'+file)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1203	except OSError as msg:
Georg Brandl	029986a	2008-06-23 11:44:14 +0000	[diff] [blame]	1204	# users shouldn't expect OSErrors coming from urlopen()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1205	raise URLError(msg)
				1206	raise URLError('file not on local host')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1207
				1208	def _safe_gethostbyname(host):
				1209	try:
				1210	return socket.gethostbyname(host)
				1211	except socket.gaierror:
				1212	return None
				1213
				1214	class FTPHandler(BaseHandler):
				1215	def ftp_open(self, req):
				1216	import ftplib
				1217	import mimetypes
				1218	host = req.get_host()
				1219	if not host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1220	raise URLError('ftp error: no host given')
				1221	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1222	if port is None:
				1223	port = ftplib.FTP_PORT
				1224	else:
				1225	port = int(port)
				1226
				1227	# username/password handling
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1228	user, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1229	if user:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1230	user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1231	else:
				1232	passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1233	host = unquote(host)
				1234	user = unquote(user or '')
				1235	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1236
				1237	try:
				1238	host = socket.gethostbyname(host)
				1239	except socket.error as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1240	raise URLError(msg)
				1241	path, attrs = splitattr(req.get_selector())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1242	dirs = path.split('/')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1243	dirs = list(map(unquote, dirs))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1244	dirs, file = dirs[:-1], dirs[-1]
				1245	if dirs and not dirs[0]:
				1246	dirs = dirs[1:]
				1247	try:
				1248	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
				1249	type = file and 'I' or 'D'
				1250	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1251	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1252	if attr.lower() == 'type' and \
				1253	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1254	type = value.upper()
				1255	fp, retrlen = fw.retrfile(file, type)
				1256	headers = ""
				1257	mtype = mimetypes.guess_type(req.get_full_url())[0]
				1258	if mtype:
				1259	headers += "Content-type: %s\n" % mtype
				1260	if retrlen is not None and retrlen >= 0:
				1261	headers += "Content-length: %d\n" % retrlen
				1262	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1263	return addinfourl(fp, headers, req.get_full_url())
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1264	except ftplib.all_errors as msg:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1265	exc = URLError('ftp error: %s' % msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1266	raise exc.with_traceback(sys.exc_info()[2])
				1267
				1268	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1269	fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
				1270	return fw
				1271
				1272	class CacheFTPHandler(FTPHandler):
				1273	# XXX would be nice to have pluggable cache strategies
				1274	# XXX this stuff is definitely not thread safe
				1275	def __init__(self):
				1276	self.cache = {}
				1277	self.timeout = {}
				1278	self.soonest = 0
				1279	self.delay = 60
				1280	self.max_conns = 16
				1281
				1282	def setTimeout(self, t):
				1283	self.delay = t
				1284
				1285	def setMaxConns(self, m):
				1286	self.max_conns = m
				1287
				1288	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
				1289	key = user, host, port, '/'.join(dirs), timeout
				1290	if key in self.cache:
				1291	self.timeout[key] = time.time() + self.delay
				1292	else:
				1293	self.cache[key] = ftpwrapper(user, passwd, host, port,
				1294	dirs, timeout)
				1295	self.timeout[key] = time.time() + self.delay
				1296	self.check_cache()
				1297	return self.cache[key]
				1298
				1299	def check_cache(self):
				1300	# first check for old ones
				1301	t = time.time()
				1302	if self.soonest <= t:
				1303	for k, v in list(self.timeout.items()):
				1304	if v < t:
				1305	self.cache[k].close()
				1306	del self.cache[k]
				1307	del self.timeout[k]
				1308	self.soonest = min(list(self.timeout.values()))
				1309
				1310	# then check the size
				1311	if len(self.cache) == self.max_conns:
				1312	for k, v in list(self.timeout.items()):
				1313	if v == self.soonest:
				1314	del self.cache[k]
				1315	del self.timeout[k]
				1316	break
				1317	self.soonest = min(list(self.timeout.values()))
				1318
				1319	# Code move from the old urllib module
				1320
				1321	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
				1322
				1323	# Helper for non-unix systems
				1324	if os.name == 'mac':
				1325	from macurl2path import url2pathname, pathname2url
				1326	elif os.name == 'nt':
				1327	from nturl2path import url2pathname, pathname2url
				1328	else:
				1329	def url2pathname(pathname):
				1330	"""OS-specific conversion from a relative URL of the 'file' scheme
				1331	to a file system path; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1332	return unquote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1333
				1334	def pathname2url(pathname):
				1335	"""OS-specific conversion from a file system path to a relative URL
				1336	of the 'file' scheme; not recommended for general use."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1337	return quote(pathname)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1338
				1339	# This really consists of two pieces:
				1340	# (1) a class which handles opening of all sorts of URLs
				1341	# (plus assorted utilities etc.)
				1342	# (2) a set of functions for parsing URLs
				1343	# XXX Should these be separated out into different modules?
				1344
				1345
				1346	ftpcache = {}
				1347	class URLopener:
				1348	"""Class to open URLs.
				1349	This is a class rather than just a subroutine because we may need
				1350	more than one set of global protocol-specific options.
				1351	Note -- this is a base class for those who don't want the
				1352	automatic handling of errors type 302 (relocated) and 401
				1353	(authorization needed)."""
				1354
				1355	__tempfiles = None
				1356
				1357	version = "Python-urllib/%s" % __version__
				1358
				1359	# Constructor
				1360	def __init__(self, proxies=None, **x509):
				1361	if proxies is None:
				1362	proxies = getproxies()
				1363	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
				1364	self.proxies = proxies
				1365	self.key_file = x509.get('key_file')
				1366	self.cert_file = x509.get('cert_file')
				1367	self.addheaders = [('User-Agent', self.version)]
				1368	self.__tempfiles = []
				1369	self.__unlink = os.unlink # See cleanup()
				1370	self.tempcache = None
				1371	# Undocumented feature: if you assign {} to tempcache,
				1372	# it is used to cache files retrieved with
				1373	# self.retrieve(). This is not enabled by default
				1374	# since it does not work for changing documents (and I
				1375	# haven't got the logic to check expiration headers
				1376	# yet).
				1377	self.ftpcache = ftpcache
				1378	# Undocumented feature: you can use a different
				1379	# ftp cache by assigning to the .ftpcache member;
				1380	# in case you want logically independent URL openers
				1381	# XXX This is not threadsafe. Bah.
				1382
				1383	def __del__(self):
				1384	self.close()
				1385
				1386	def close(self):
				1387	self.cleanup()
				1388
				1389	def cleanup(self):
				1390	# This code sometimes runs when the rest of this module
				1391	# has already been deleted, so it can't use any globals
				1392	# or import anything.
				1393	if self.__tempfiles:
				1394	for file in self.__tempfiles:
				1395	try:
				1396	self.__unlink(file)
				1397	except OSError:
				1398	pass
				1399	del self.__tempfiles[:]
				1400	if self.tempcache:
				1401	self.tempcache.clear()
				1402
				1403	def addheader(self, *args):
				1404	"""Add a header to be used by the HTTP interface only
				1405	e.g. u.addheader('Accept', 'sound/basic')"""
				1406	self.addheaders.append(args)
				1407
				1408	# External interface
				1409	def open(self, fullurl, data=None):
				1410	"""Use URLopener().open(file) instead of open(file, 'r')."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1411	fullurl = unwrap(to_bytes(fullurl))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1412	if self.tempcache and fullurl in self.tempcache:
				1413	filename, headers = self.tempcache[fullurl]
				1414	fp = open(filename, 'rb')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1415	return addinfourl(fp, headers, fullurl)
				1416	urltype, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1417	if not urltype:
				1418	urltype = 'file'
				1419	if urltype in self.proxies:
				1420	proxy = self.proxies[urltype]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1421	urltype, proxyhost = splittype(proxy)
				1422	host, selector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1423	url = (host, fullurl) # Signal special case to open_*()
				1424	else:
				1425	proxy = None
				1426	name = 'open_' + urltype
				1427	self.type = urltype
				1428	name = name.replace('-', '_')
				1429	if not hasattr(self, name):
				1430	if proxy:
				1431	return self.open_unknown_proxy(proxy, fullurl, data)
				1432	else:
				1433	return self.open_unknown(fullurl, data)
				1434	try:
				1435	if data is None:
				1436	return getattr(self, name)(url)
				1437	else:
				1438	return getattr(self, name)(url, data)
				1439	except socket.error as msg:
				1440	raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
				1441
				1442	def open_unknown(self, fullurl, data=None):
				1443	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1444	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1445	raise IOError('url error', 'unknown url type', type)
				1446
				1447	def open_unknown_proxy(self, proxy, fullurl, data=None):
				1448	"""Overridable interface to open unknown URL type."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1449	type, url = splittype(fullurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1450	raise IOError('url error', 'invalid proxy for %s' % type, proxy)
				1451
				1452	# External interface
				1453	def retrieve(self, url, filename=None, reporthook=None, data=None):
				1454	"""retrieve(url) returns (filename, headers) for a local object
				1455	or (tempfilename, headers) for a remote object."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1456	url = unwrap(to_bytes(url))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1457	if self.tempcache and url in self.tempcache:
				1458	return self.tempcache[url]
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1459	type, url1 = splittype(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1460	if filename is None and (not type or type == 'file'):
				1461	try:
				1462	fp = self.open_local_file(url1)
				1463	hdrs = fp.info()
				1464	del fp
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1465	return url2pathname(splithost(url1)[1]), hdrs
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1466	except IOError as msg:
				1467	pass
				1468	fp = self.open(url, data)
				1469	headers = fp.info()
				1470	if filename:
				1471	tfp = open(filename, 'wb')
				1472	else:
				1473	import tempfile
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1474	garbage, path = splittype(url)
				1475	garbage, path = splithost(path or "")
				1476	path, garbage = splitquery(path or "")
				1477	path, garbage = splitattr(path or "")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1478	suffix = os.path.splitext(path)[1]
				1479	(fd, filename) = tempfile.mkstemp(suffix)
				1480	self.__tempfiles.append(filename)
				1481	tfp = os.fdopen(fd, 'wb')
				1482	result = filename, headers
				1483	if self.tempcache is not None:
				1484	self.tempcache[url] = result
				1485	bs = 1024*8
				1486	size = -1
				1487	read = 0
				1488	blocknum = 0
				1489	if reporthook:
				1490	if "content-length" in headers:
				1491	size = int(headers["Content-Length"])
				1492	reporthook(blocknum, bs, size)
				1493	while 1:
				1494	block = fp.read(bs)
				1495	if not block:
				1496	break
				1497	read += len(block)
				1498	tfp.write(block)
				1499	blocknum += 1
				1500	if reporthook:
				1501	reporthook(blocknum, bs, size)
				1502	fp.close()
				1503	tfp.close()
				1504	del fp
				1505	del tfp
				1506
				1507	# raise exception if actual size does not match content-length header
				1508	if size >= 0 and read < size:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1509	raise ContentTooShortError(
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1510	"retrieval incomplete: got only %i out of %i bytes"
				1511	% (read, size), result)
				1512
				1513	return result
				1514
				1515	# Each method named open_<type> knows how to open that type of URL
				1516
				1517	def _open_generic_http(self, connection_factory, url, data):
				1518	"""Make an HTTP connection using connection_class.
				1519
				1520	This is an internal method that should be called from
				1521	open_http() or open_https().
				1522
				1523	Arguments:
				1524	- connection_factory should take a host name and return an
				1525	HTTPConnection instance.
				1526	- url is the url to retrieval or a host, relative-path pair.
				1527	- data is payload for a POST request or None.
				1528	"""
				1529
				1530	user_passwd = None
				1531	proxy_passwd= None
				1532	if isinstance(url, str):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1533	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1534	if host:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1535	user_passwd, host = splituser(host)
				1536	host = unquote(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1537	realhost = host
				1538	else:
				1539	host, selector = url
				1540	# check whether the proxy contains authorization information
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1541	proxy_passwd, host = splituser(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1542	# now we proceed with the url we want to obtain
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1543	urltype, rest = splittype(selector)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1544	url = rest
				1545	user_passwd = None
				1546	if urltype.lower() != 'http':
				1547	realhost = None
				1548	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1549	realhost, rest = splithost(rest)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1550	if realhost:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1551	user_passwd, realhost = splituser(realhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1552	if user_passwd:
				1553	selector = "%s://%s%s" % (urltype, realhost, rest)
				1554	if proxy_bypass(realhost):
				1555	host = realhost
				1556
				1557	#print "proxy via http:", host, selector
				1558	if not host: raise IOError('http error', 'no host given')
				1559
				1560	if proxy_passwd:
				1561	import base64
				1562	proxy_auth = base64.b64encode(proxy_passwd).strip()
				1563	else:
				1564	proxy_auth = None
				1565
				1566	if user_passwd:
				1567	import base64
				1568	auth = base64.b64encode(user_passwd).strip()
				1569	else:
				1570	auth = None
				1571	http_conn = connection_factory(host)
				1572	# XXX We should fix urllib so that it works with HTTP/1.1.
				1573	http_conn._http_vsn = 10
				1574	http_conn._http_vsn_str = "HTTP/1.0"
				1575
				1576	headers = {}
				1577	if proxy_auth:
				1578	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
				1579	if auth:
				1580	headers["Authorization"] = "Basic %s" % auth
				1581	if realhost:
				1582	headers["Host"] = realhost
				1583	for header, value in self.addheaders:
				1584	headers[header] = value
				1585
				1586	if data is not None:
				1587	headers["Content-Type"] = "application/x-www-form-urlencoded"
				1588	http_conn.request("POST", selector, data, headers)
				1589	else:
				1590	http_conn.request("GET", selector, headers=headers)
				1591
				1592	try:
				1593	response = http_conn.getresponse()
				1594	except http.client.BadStatusLine:
				1595	# something went wrong with the HTTP status line
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1596	raise URLError("http protocol error: bad status line")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1597
				1598	# According to RFC 2616, "2xx" code indicates that the client's
				1599	# request was successfully received, understood, and accepted.
				1600	if 200 <= response.status < 300:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1601	return addinfourl(response.fp, response.msg, "http:" + url,
				1602	response.status)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1603	else:
				1604	return self.http_error(
				1605	url, response.fp,
				1606	response.status, response.reason, response.msg, data)
				1607
				1608	def open_http(self, url, data=None):
				1609	"""Use HTTP protocol."""
				1610	return self._open_generic_http(http.client.HTTPConnection, url, data)
				1611
				1612	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
				1613	"""Handle http errors.
				1614
				1615	Derived class can override this, or provide specific handlers
				1616	named http_error_DDD where DDD is the 3-digit error code."""
				1617	# First check if there's a specific handler for this error
				1618	name = 'http_error_%d' % errcode
				1619	if hasattr(self, name):
				1620	method = getattr(self, name)
				1621	if data is None:
				1622	result = method(url, fp, errcode, errmsg, headers)
				1623	else:
				1624	result = method(url, fp, errcode, errmsg, headers, data)
				1625	if result: return result
				1626	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1627
				1628	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1629	"""Default error handler: close the connection and raise IOError."""
				1630	void = fp.read()
				1631	fp.close()
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1632	raise HTTPError(url, errcode, errmsg, headers, None)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1633
				1634	if _have_ssl:
				1635	def _https_connection(self, host):
				1636	return http.client.HTTPSConnection(host,
				1637	key_file=self.key_file,
				1638	cert_file=self.cert_file)
				1639
				1640	def open_https(self, url, data=None):
				1641	"""Use HTTPS protocol."""
				1642	return self._open_generic_http(self._https_connection, url, data)
				1643
				1644	def open_file(self, url):
				1645	"""Use local file or FTP depending on form of URL."""
				1646	if not isinstance(url, str):
				1647	raise URLError('file error', 'proxy support for file protocol currently not implemented')
				1648	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
				1649	return self.open_ftp(url)
				1650	else:
				1651	return self.open_local_file(url)
				1652
				1653	def open_local_file(self, url):
				1654	"""Use local file."""
				1655	import mimetypes, email.utils
				1656	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1657	host, file = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1658	localname = url2pathname(file)
				1659	try:
				1660	stats = os.stat(localname)
				1661	except OSError as e:
				1662	raise URLError(e.errno, e.strerror, e.filename)
				1663	size = stats.st_size
				1664	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
				1665	mtype = mimetypes.guess_type(url)[0]
				1666	headers = email.message_from_string(
				1667	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
				1668	(mtype or 'text/plain', size, modified))
				1669	if not host:
				1670	urlfile = file
				1671	if file[:1] == '/':
				1672	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1673	return addinfourl(open(localname, 'rb'), headers, urlfile)
				1674	host, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1675	if (not port
				1676	and socket.gethostbyname(host) in (localhost(), thishost())):
				1677	urlfile = file
				1678	if file[:1] == '/':
				1679	urlfile = 'file://' + file
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1680	return addinfourl(open(localname, 'rb'), headers, urlfile)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1681	raise URLError('local file error', 'not on local host')
				1682
				1683	def open_ftp(self, url):
				1684	"""Use FTP protocol."""
				1685	if not isinstance(url, str):
				1686	raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
				1687	import mimetypes
				1688	from io import StringIO
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1689	host, path = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1690	if not host: raise URLError('ftp error', 'no host given')
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1691	host, port = splitport(host)
				1692	user, host = splituser(host)
				1693	if user: user, passwd = splitpasswd(user)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1694	else: passwd = None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1695	host = unquote(host)
				1696	user = unquote(user or '')
				1697	passwd = unquote(passwd or '')
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1698	host = socket.gethostbyname(host)
				1699	if not port:
				1700	import ftplib
				1701	port = ftplib.FTP_PORT
				1702	else:
				1703	port = int(port)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1704	path, attrs = splitattr(path)
				1705	path = unquote(path)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1706	dirs = path.split('/')
				1707	dirs, file = dirs[:-1], dirs[-1]
				1708	if dirs and not dirs[0]: dirs = dirs[1:]
				1709	if dirs and not dirs[0]: dirs[0] = '/'
				1710	key = user, host, port, '/'.join(dirs)
				1711	# XXX thread unsafe!
				1712	if len(self.ftpcache) > MAXFTPCACHE:
				1713	# Prune the cache, rather arbitrarily
				1714	for k in self.ftpcache.keys():
				1715	if k != key:
				1716	v = self.ftpcache[k]
				1717	del self.ftpcache[k]
				1718	v.close()
				1719	try:
				1720	if not key in self.ftpcache:
				1721	self.ftpcache[key] = \
				1722	ftpwrapper(user, passwd, host, port, dirs)
				1723	if not file: type = 'D'
				1724	else: type = 'I'
				1725	for attr in attrs:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1726	attr, value = splitvalue(attr)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1727	if attr.lower() == 'type' and \
				1728	value in ('a', 'A', 'i', 'I', 'd', 'D'):
				1729	type = value.upper()
				1730	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
				1731	mtype = mimetypes.guess_type("ftp:" + url)[0]
				1732	headers = ""
				1733	if mtype:
				1734	headers += "Content-Type: %s\n" % mtype
				1735	if retrlen is not None and retrlen >= 0:
				1736	headers += "Content-Length: %d\n" % retrlen
				1737	headers = email.message_from_string(headers)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1738	return addinfourl(fp, headers, "ftp:" + url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1739	except ftperrors() as msg:
				1740	raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
				1741
				1742	def open_data(self, url, data=None):
				1743	"""Use "data" URL."""
				1744	if not isinstance(url, str):
				1745	raise URLError('data error', 'proxy support for data protocol currently not implemented')
				1746	# ignore POSTed data
				1747	#
				1748	# syntax of data URLs:
				1749	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
				1750	# mediatype := [ type "/" subtype ] *( ";" parameter )
				1751	# data := *urlchar
				1752	# parameter := attribute "=" value
				1753	try:
				1754	[type, data] = url.split(',', 1)
				1755	except ValueError:
				1756	raise IOError('data error', 'bad data URL')
				1757	if not type:
				1758	type = 'text/plain;charset=US-ASCII'
				1759	semi = type.rfind(';')
				1760	if semi >= 0 and '=' not in type[semi:]:
				1761	encoding = type[semi+1:]
				1762	type = type[:semi]
				1763	else:
				1764	encoding = ''
				1765	msg = []
				1766	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
				1767	time.gmtime(time.time())))
				1768	msg.append('Content-type: %s' % type)
				1769	if encoding == 'base64':
				1770	import base64
				1771	data = base64.decodestring(data)
				1772	else:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1773	data = unquote(data)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1774	msg.append('Content-Length: %d' % len(data))
				1775	msg.append('')
				1776	msg.append(data)
				1777	msg = '\n'.join(msg)
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1778	headers = email.message_from_string(msg)
				1779	f = io.StringIO(msg)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1780	#f.fileno = None # needed for addinfourl
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1781	return addinfourl(f, headers, url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1782
				1783
				1784	class FancyURLopener(URLopener):
				1785	"""Derived class with handlers for errors we can handle (perhaps)."""
				1786
				1787	def __init__(self, args, *kwargs):
				1788	URLopener.__init__(self, args, *kwargs)
				1789	self.auth_cache = {}
				1790	self.tries = 0
				1791	self.maxtries = 10
				1792
				1793	def http_error_default(self, url, fp, errcode, errmsg, headers):
				1794	"""Default error handling -- don't raise an exception."""
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1795	return addinfourl(fp, headers, "http:" + url, errcode)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1796
				1797	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
				1798	"""Error 302 -- relocated (temporarily)."""
				1799	self.tries += 1
				1800	if self.maxtries and self.tries >= self.maxtries:
				1801	if hasattr(self, "http_error_500"):
				1802	meth = self.http_error_500
				1803	else:
				1804	meth = self.http_error_default
				1805	self.tries = 0
				1806	return meth(url, fp, 500,
				1807	"Internal Server Error: Redirect Recursion", headers)
				1808	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
				1809	data)
				1810	self.tries = 0
				1811	return result
				1812
				1813	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
				1814	if 'location' in headers:
				1815	newurl = headers['location']
				1816	elif 'uri' in headers:
				1817	newurl = headers['uri']
				1818	else:
				1819	return
				1820	void = fp.read()
				1821	fp.close()
				1822	# In case the server sent a relative URL, join with original:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1823	newurl = urljoin(self.type + ":" + url, newurl)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1824	return self.open(newurl)
				1825
				1826	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
				1827	"""Error 301 -- also relocated (permanently)."""
				1828	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1829
				1830	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
				1831	"""Error 303 -- also relocated (essentially identical to 302)."""
				1832	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1833
				1834	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
				1835	"""Error 307 -- relocated, but turn POST into error."""
				1836	if data is None:
				1837	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
				1838	else:
				1839	return self.http_error_default(url, fp, errcode, errmsg, headers)
				1840
				1841	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
				1842	"""Error 401 -- authentication required.
				1843	This function supports Basic authentication only."""
				1844	if not 'www-authenticate' in headers:
				1845	URLopener.http_error_default(self, url, fp,
				1846	errcode, errmsg, headers)
				1847	stuff = headers['www-authenticate']
				1848	import re
				1849	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1850	if not match:
				1851	URLopener.http_error_default(self, url, fp,
				1852	errcode, errmsg, headers)
				1853	scheme, realm = match.groups()
				1854	if scheme.lower() != 'basic':
				1855	URLopener.http_error_default(self, url, fp,
				1856	errcode, errmsg, headers)
				1857	name = 'retry_' + self.type + '_basic_auth'
				1858	if data is None:
				1859	return getattr(self,name)(url, realm)
				1860	else:
				1861	return getattr(self,name)(url, realm, data)
				1862
				1863	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
				1864	"""Error 407 -- proxy authentication required.
				1865	This function supports Basic authentication only."""
				1866	if not 'proxy-authenticate' in headers:
				1867	URLopener.http_error_default(self, url, fp,
				1868	errcode, errmsg, headers)
				1869	stuff = headers['proxy-authenticate']
				1870	import re
				1871	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
				1872	if not match:
				1873	URLopener.http_error_default(self, url, fp,
				1874	errcode, errmsg, headers)
				1875	scheme, realm = match.groups()
				1876	if scheme.lower() != 'basic':
				1877	URLopener.http_error_default(self, url, fp,
				1878	errcode, errmsg, headers)
				1879	name = 'retry_proxy_' + self.type + '_basic_auth'
				1880	if data is None:
				1881	return getattr(self,name)(url, realm)
				1882	else:
				1883	return getattr(self,name)(url, realm, data)
				1884
				1885	def retry_proxy_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1886	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1887	newurl = 'http://' + host + selector
				1888	proxy = self.proxies['http']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1889	urltype, proxyhost = splittype(proxy)
				1890	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1891	i = proxyhost.find('@') + 1
				1892	proxyhost = proxyhost[i:]
				1893	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1894	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1895	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1896	quote(passwd, safe=''), proxyhost)
				1897	self.proxies['http'] = 'http://' + proxyhost + proxyselector
				1898	if data is None:
				1899	return self.open(newurl)
				1900	else:
				1901	return self.open(newurl, data)
				1902
				1903	def retry_proxy_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1904	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1905	newurl = 'https://' + host + selector
				1906	proxy = self.proxies['https']
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1907	urltype, proxyhost = splittype(proxy)
				1908	proxyhost, proxyselector = splithost(proxyhost)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1909	i = proxyhost.find('@') + 1
				1910	proxyhost = proxyhost[i:]
				1911	user, passwd = self.get_user_passwd(proxyhost, realm, i)
				1912	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1913	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1914	quote(passwd, safe=''), proxyhost)
				1915	self.proxies['https'] = 'https://' + proxyhost + proxyselector
				1916	if data is None:
				1917	return self.open(newurl)
				1918	else:
				1919	return self.open(newurl, data)
				1920
				1921	def retry_http_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1922	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1923	i = host.find('@') + 1
				1924	host = host[i:]
				1925	user, passwd = self.get_user_passwd(host, realm, i)
				1926	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1927	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1928	quote(passwd, safe=''), host)
				1929	newurl = 'http://' + host + selector
				1930	if data is None:
				1931	return self.open(newurl)
				1932	else:
				1933	return self.open(newurl, data)
				1934
				1935	def retry_https_basic_auth(self, url, realm, data=None):
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1936	host, selector = splithost(url)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1937	i = host.find('@') + 1
				1938	host = host[i:]
				1939	user, passwd = self.get_user_passwd(host, realm, i)
				1940	if not (user or passwd): return None
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	1941	host = "%s:%s@%s" % (quote(user, safe=''),
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1942	quote(passwd, safe=''), host)
				1943	newurl = 'https://' + host + selector
				1944	if data is None:
				1945	return self.open(newurl)
				1946	else:
				1947	return self.open(newurl, data)
				1948
				1949	def get_user_passwd(self, host, realm, clear_cache = 0):
				1950	key = realm + '@' + host.lower()
				1951	if key in self.auth_cache:
				1952	if clear_cache:
				1953	del self.auth_cache[key]
				1954	else:
				1955	return self.auth_cache[key]
				1956	user, passwd = self.prompt_user_passwd(host, realm)
				1957	if user or passwd: self.auth_cache[key] = (user, passwd)
				1958	return user, passwd
				1959
				1960	def prompt_user_passwd(self, host, realm):
				1961	"""Override this in a GUI environment!"""
				1962	import getpass
				1963	try:
				1964	user = input("Enter username for %s at %s: " % (realm, host))
				1965	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
				1966	(user, realm, host))
				1967	return user, passwd
				1968	except KeyboardInterrupt:
				1969	print()
				1970	return None, None
				1971
				1972
				1973	# Utility functions
				1974
				1975	_localhost = None
				1976	def localhost():
				1977	"""Return the IP address of the magic hostname 'localhost'."""
				1978	global _localhost
				1979	if _localhost is None:
				1980	_localhost = socket.gethostbyname('localhost')
				1981	return _localhost
				1982
				1983	_thishost = None
				1984	def thishost():
				1985	"""Return the IP address of the current host."""
				1986	global _thishost
				1987	if _thishost is None:
				1988	_thishost = socket.gethostbyname(socket.gethostname())
				1989	return _thishost
				1990
				1991	_ftperrors = None
				1992	def ftperrors():
				1993	"""Return the set of errors raised by the FTP class."""
				1994	global _ftperrors
				1995	if _ftperrors is None:
				1996	import ftplib
				1997	_ftperrors = ftplib.all_errors
				1998	return _ftperrors
				1999
				2000	_noheaders = None
				2001	def noheaders():
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2002	"""Return an empty email Message object."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2003	global _noheaders
				2004	if _noheaders is None:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2005	_noheaders = email.message_from_string("")
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2006	return _noheaders
				2007
				2008
				2009	# Utility classes
				2010
				2011	class ftpwrapper:
				2012	"""Class used by open_ftp() for cache of open FTP connections."""
				2013
				2014	def __init__(self, user, passwd, host, port, dirs, timeout=None):
				2015	self.user = user
				2016	self.passwd = passwd
				2017	self.host = host
				2018	self.port = port
				2019	self.dirs = dirs
				2020	self.timeout = timeout
				2021	self.init()
				2022
				2023	def init(self):
				2024	import ftplib
				2025	self.busy = 0
				2026	self.ftp = ftplib.FTP()
				2027	self.ftp.connect(self.host, self.port, self.timeout)
				2028	self.ftp.login(self.user, self.passwd)
				2029	for dir in self.dirs:
				2030	self.ftp.cwd(dir)
				2031
				2032	def retrfile(self, file, type):
				2033	import ftplib
				2034	self.endtransfer()
				2035	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
				2036	else: cmd = 'TYPE ' + type; isdir = 0
				2037	try:
				2038	self.ftp.voidcmd(cmd)
				2039	except ftplib.all_errors:
				2040	self.init()
				2041	self.ftp.voidcmd(cmd)
				2042	conn = None
				2043	if file and not isdir:
				2044	# Try to retrieve as a file
				2045	try:
				2046	cmd = 'RETR ' + file
				2047	conn = self.ftp.ntransfercmd(cmd)
				2048	except ftplib.error_perm as reason:
				2049	if str(reason)[:3] != '550':
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2050	raise URLError('ftp error', reason).with_traceback(
				2051	sys.exc_info()[2])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2052	if not conn:
				2053	# Set transfer mode to ASCII!
				2054	self.ftp.voidcmd('TYPE A')
				2055	# Try a directory listing. Verify that directory exists.
				2056	if file:
				2057	pwd = self.ftp.pwd()
				2058	try:
				2059	try:
				2060	self.ftp.cwd(file)
				2061	except ftplib.error_perm as reason:
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2062	raise URLError('ftp error', reason) from reason
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2063	finally:
				2064	self.ftp.cwd(pwd)
				2065	cmd = 'LIST ' + file
				2066	else:
				2067	cmd = 'LIST'
				2068	conn = self.ftp.ntransfercmd(cmd)
				2069	self.busy = 1
				2070	# Pass back both a suitably decorated object and a retrieval length
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2071	return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2072	def endtransfer(self):
				2073	if not self.busy:
				2074	return
				2075	self.busy = 0
				2076	try:
				2077	self.ftp.voidresp()
				2078	except ftperrors():
				2079	pass
				2080
				2081	def close(self):
				2082	self.endtransfer()
				2083	try:
				2084	self.ftp.close()
				2085	except ftperrors():
				2086	pass
				2087
				2088	# Proxy handling
				2089	def getproxies_environment():
				2090	"""Return a dictionary of scheme -> proxy server URL mappings.
				2091
				2092	Scan the environment for variables named <scheme>_proxy;
				2093	this seems to be the standard convention. If you need a
				2094	different way, you can pass a proxies dictionary to the
				2095	[Fancy]URLopener constructor.
				2096
				2097	"""
				2098	proxies = {}
				2099	for name, value in os.environ.items():
				2100	name = name.lower()
				2101	if name == 'no_proxy':
				2102	# handled in proxy_bypass_environment
				2103	continue
				2104	if value and name[-6:] == '_proxy':
				2105	proxies[name[:-6]] = value
				2106	return proxies
				2107
				2108	def proxy_bypass_environment(host):
				2109	"""Test if proxies should not be used for a particular host.
				2110
				2111	Checks the environment for a variable named no_proxy, which should
				2112	be a list of DNS suffixes separated by commas, or '*' for all hosts.
				2113	"""
				2114	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
				2115	# '*' is special case for always bypass
				2116	if no_proxy == '*':
				2117	return 1
				2118	# strip port off host
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2119	hostonly, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2120	# check if the host ends with any of the DNS suffixes
				2121	for name in no_proxy.split(','):
				2122	if name and (hostonly.endswith(name) or host.endswith(name)):
				2123	return 1
				2124	# otherwise, don't bypass
				2125	return 0
				2126
				2127
				2128	if sys.platform == 'darwin':
				2129	def getproxies_internetconfig():
				2130	"""Return a dictionary of scheme -> proxy server URL mappings.
				2131
				2132	By convention the mac uses Internet Config to store
				2133	proxies. An HTTP proxy, for instance, is stored under
				2134	the HttpProxy key.
				2135
				2136	"""
				2137	try:
				2138	import ic
				2139	except ImportError:
				2140	return {}
				2141
				2142	try:
				2143	config = ic.IC()
				2144	except ic.error:
				2145	return {}
				2146	proxies = {}
				2147	# HTTP:
				2148	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
				2149	try:
				2150	value = config['HTTPProxyHost']
				2151	except ic.error:
				2152	pass
				2153	else:
				2154	proxies['http'] = 'http://%s' % value
				2155	# FTP: XXX To be done.
				2156	# Gopher: XXX To be done.
				2157	return proxies
				2158
				2159	def proxy_bypass(host):
				2160	if getproxies_environment():
				2161	return proxy_bypass_environment(host)
				2162	else:
				2163	return 0
				2164
				2165	def getproxies():
				2166	return getproxies_environment() or getproxies_internetconfig()
				2167
				2168	elif os.name == 'nt':
				2169	def getproxies_registry():
				2170	"""Return a dictionary of scheme -> proxy server URL mappings.
				2171
				2172	Win32 uses the registry to store proxies.
				2173
				2174	"""
				2175	proxies = {}
				2176	try:
				2177	import _winreg
				2178	except ImportError:
				2179	# Std module, so should be around - but you never know!
				2180	return proxies
				2181	try:
				2182	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2183	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2184	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2185	'ProxyEnable')[0]
				2186	if proxyEnable:
				2187	# Returned as Unicode but problems if not converted to ASCII
				2188	proxyServer = str(_winreg.QueryValueEx(internetSettings,
				2189	'ProxyServer')[0])
				2190	if '=' in proxyServer:
				2191	# Per-protocol settings
				2192	for p in proxyServer.split(';'):
				2193	protocol, address = p.split('=', 1)
				2194	# See if address has a type:// prefix
				2195	import re
				2196	if not re.match('^([^/:]+)://', address):
				2197	address = '%s://%s' % (protocol, address)
				2198	proxies[protocol] = address
				2199	else:
				2200	# Use one setting for all protocols
				2201	if proxyServer[:5] == 'http:':
				2202	proxies['http'] = proxyServer
				2203	else:
				2204	proxies['http'] = 'http://%s' % proxyServer
				2205	proxies['ftp'] = 'ftp://%s' % proxyServer
				2206	internetSettings.Close()
				2207	except (WindowsError, ValueError, TypeError):
				2208	# Either registry key not found etc, or the value in an
				2209	# unexpected format.
				2210	# proxies already set up to be empty so nothing to do
				2211	pass
				2212	return proxies
				2213
				2214	def getproxies():
				2215	"""Return a dictionary of scheme -> proxy server URL mappings.
				2216
				2217	Returns settings gathered from the environment, if specified,
				2218	or the registry.
				2219
				2220	"""
				2221	return getproxies_environment() or getproxies_registry()
				2222
				2223	def proxy_bypass_registry(host):
				2224	try:
				2225	import _winreg
				2226	import re
				2227	except ImportError:
				2228	# Std modules, so should be around - but you never know!
				2229	return 0
				2230	try:
				2231	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
				2232	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
				2233	proxyEnable = _winreg.QueryValueEx(internetSettings,
				2234	'ProxyEnable')[0]
				2235	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
				2236	'ProxyOverride')[0])
				2237	# ^^^^ Returned as Unicode but problems if not converted to ASCII
				2238	except WindowsError:
				2239	return 0
				2240	if not proxyEnable or not proxyOverride:
				2241	return 0
				2242	# try to make a host list from name and IP address.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	2243	rawHost, port = splitport(host)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	2244	host = [rawHost]
				2245	try:
				2246	addr = socket.gethostbyname(rawHost)
				2247	if addr != rawHost:
				2248	host.append(addr)
				2249	except socket.error:
				2250	pass
				2251	try:
				2252	fqdn = socket.getfqdn(rawHost)
				2253	if fqdn != rawHost:
				2254	host.append(fqdn)
				2255	except socket.error:
				2256	pass
				2257	# make a check value list from the registry entry: replace the
				2258	# '<local>' string by the localhost entry and the corresponding
				2259	# canonical entry.
				2260	proxyOverride = proxyOverride.split(';')
				2261	i = 0
				2262	while i < len(proxyOverride):
				2263	if proxyOverride[i] == '<local>':
				2264	proxyOverride[i:i+1] = ['localhost',
				2265	'127.0.0.1',
				2266	socket.gethostname(),
				2267	socket.gethostbyname(
				2268	socket.gethostname())]
				2269	i += 1
				2270	# print proxyOverride
				2271	# now check if we match one of the registry values.
				2272	for test in proxyOverride:
				2273	test = test.replace(".", r"\.") # mask dots
				2274	test = test.replace("", r".") # change glob sequence
				2275	test = test.replace("?", r".") # change glob char
				2276	for val in host:
				2277	# print "%s <--> %s" %( test, val )
				2278	if re.match(test, val, re.I):
				2279	return 1
				2280	return 0
				2281
				2282	def proxy_bypass(host):
				2283	"""Return a dictionary of scheme -> proxy server URL mappings.
				2284
				2285	Returns settings gathered from the environment, if specified,
				2286	or the registry.
				2287
				2288	"""
				2289	if getproxies_environment():
				2290	return proxy_bypass_environment(host)
				2291	else:
				2292	return proxy_bypass_registry(host)
				2293
				2294	else:
				2295	# By default use environment variables
				2296	getproxies = getproxies_environment
				2297	proxy_bypass = proxy_bypass_environment