blob: 4c83bfc780fe56128ac4cb09ed5ffaced864b3ab [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000090import base64
91import ftplib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000092import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000093import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000094import md5
95import mimetypes
96import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000097import os
98import posixpath
99import random
100import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000101import sha
102import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000103import sys
104import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000105import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000106import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000107import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000108
109try:
110 from cStringIO import StringIO
111except ImportError:
112 from StringIO import StringIO
113
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000114# not sure how many of these need to be gotten rid of
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000115from urllib import (unwrap, unquote, splittype, splithost,
116 addinfourl, splitport, splitgophertype, splitquery,
117 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000118
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000119# support for FileHandler, proxies via environment variables
120from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000121
Jeremy Hylton023518a2003-12-17 18:52:16 +0000122__version__ = "2.4"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000123
124_opener = None
125def urlopen(url, data=None):
126 global _opener
127 if _opener is None:
128 _opener = build_opener()
129 return _opener.open(url, data)
130
131def install_opener(opener):
132 global _opener
133 _opener = opener
134
135# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000136# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000137# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000138
139class URLError(IOError):
140 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000141 # the implementation. need to override __init__ and __str__.
142 # It sets self.args for compatibility with other EnvironmentError
143 # subclasses, but args doesn't have the typical format with errno in
144 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000145 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000146 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000147 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000148
149 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000150 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152class HTTPError(URLError, addinfourl):
153 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000154 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000155
156 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000157 self.code = code
158 self.msg = msg
159 self.hdrs = hdrs
160 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000161 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000162 # The addinfourl classes depend on fp being a valid file
163 # object. In some cases, the HTTPError may not have a valid
164 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000165 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000166 if fp is not None:
167 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000168
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000170 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000171
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172class GopherError(URLError):
173 pass
174
Moshe Zadka8a18e992001-03-01 08:40:42 +0000175
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000176class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000177
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000178 def __init__(self, url, data=None, headers={},
179 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000180 # unwrap('<URL:type://host/path>') --> 'type://host/path'
181 self.__original = unwrap(url)
182 self.type = None
183 # self.__r_type is what's left after doing the splittype
184 self.host = None
185 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000186 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000187 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000188 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000189 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000190 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000191 if origin_req_host is None:
192 origin_req_host = cookielib.request_host(self)
193 self.origin_req_host = origin_req_host
194 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000195
196 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000197 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000198 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000199 # too complicated and/or unnecessary.
200 # XXX should the __r_XXX attributes be public?
201 if attr[:12] == '_Request__r_':
202 name = attr[12:]
203 if hasattr(Request, 'get_' + name):
204 getattr(self, 'get_' + name)()
205 return getattr(self, attr)
206 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000207
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000208 def get_method(self):
209 if self.has_data():
210 return "POST"
211 else:
212 return "GET"
213
Jeremy Hylton023518a2003-12-17 18:52:16 +0000214 # XXX these helper methods are lame
215
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000216 def add_data(self, data):
217 self.data = data
218
219 def has_data(self):
220 return self.data is not None
221
222 def get_data(self):
223 return self.data
224
225 def get_full_url(self):
226 return self.__original
227
228 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000229 if self.type is None:
230 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000231 if self.type is None:
232 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000233 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000234
235 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000236 if self.host is None:
237 self.host, self.__r_host = splithost(self.__r_type)
238 if self.host:
239 self.host = unquote(self.host)
240 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000241
242 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000243 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000244
Moshe Zadka8a18e992001-03-01 08:40:42 +0000245 def set_proxy(self, host, type):
246 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000247 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000248
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000249 def get_origin_req_host(self):
250 return self.origin_req_host
251
252 def is_unverifiable(self):
253 return self.unverifiable
254
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000255 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000256 # useful for something like authentication
Brett Cannon86503b12003-05-12 07:29:42 +0000257 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000258
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259 def add_unredirected_header(self, key, val):
260 # will not be added to a redirected request
261 self.unredirected_hdrs[key.capitalize()] = val
262
263 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000264 return (header_name in self.headers or
265 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000266
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000267 def get_header(self, header_name, default=None):
268 return self.headers.get(
269 header_name,
270 self.unredirected_hdrs.get(header_name, default))
271
272 def header_items(self):
273 hdrs = self.unredirected_hdrs.copy()
274 hdrs.update(self.headers)
275 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000276
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000277class OpenerDirector:
278 def __init__(self):
Georg Brandl8d457c72005-06-26 22:01:35 +0000279 client_version = "Python-urllib/%s" % __version__
280 self.addheaders = [('User-agent', client_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000281 # manage the individual handlers
282 self.handlers = []
283 self.handle_open = {}
284 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000285 self.process_response = {}
286 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000287
288 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000289 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000290 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000291 i = meth.find("_")
292 protocol = meth[:i]
293 condition = meth[i+1:]
294
295 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000296 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000297 kind = meth[j+1:]
298 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000299 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000300 except ValueError:
301 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000302 lookup = self.handle_error.get(protocol, {})
303 self.handle_error[protocol] = lookup
304 elif condition == "open":
305 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000306 lookup = self.handle_open
307 elif condition == "response":
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000308 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000309 lookup = self.process_response
310 elif condition == "request":
311 kind = protocol
312 lookup = self.process_request
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000313 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000314 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000315
316 handlers = lookup.setdefault(kind, [])
317 if handlers:
318 bisect.insort(handlers, handler)
319 else:
320 handlers.append(handler)
321 added = True
322
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000324 # XXX why does self.handlers need to be sorted?
325 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000326 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000327
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000328 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000329 # Only exists for backwards compatibility.
330 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000331
332 def _call_chain(self, chain, kind, meth_name, *args):
333 # XXX raise an exception if no one else should try to handle
334 # this url. return None if you can't but someone else could.
335 handlers = chain.get(kind, ())
336 for handler in handlers:
337 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000338
339 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000340 if result is not None:
341 return result
342
343 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000344 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000345 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000346 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000347 else:
348 req = fullurl
349 if data is not None:
350 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000351
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000352 protocol = req.get_type()
353
354 # pre-process request
355 meth_name = protocol+"_request"
356 for processor in self.process_request.get(protocol, []):
357 meth = getattr(processor, meth_name)
358 req = meth(req)
359
360 response = self._open(req, data)
361
362 # post-process response
363 meth_name = protocol+"_response"
364 for processor in self.process_response.get(protocol, []):
365 meth = getattr(processor, meth_name)
366 response = meth(req, response)
367
368 return response
369
370 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000372 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000373 if result:
374 return result
375
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000376 protocol = req.get_type()
377 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000378 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000379 if result:
380 return result
381
382 return self._call_chain(self.handle_open, 'unknown',
383 'unknown_open', req)
384
385 def error(self, proto, *args):
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000386 if proto in ('http', 'https'):
Fred Draked5214b02001-11-08 17:19:29 +0000387 # XXX http[s] protocols are special-cased
388 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000389 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000390 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000391 http_err = 1
392 orig_args = args
393 else:
394 dict = self.handle_error
395 meth_name = proto + '_error'
396 http_err = 0
397 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000398 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000399 if result:
400 return result
401
402 if http_err:
403 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000404 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000405
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000406# XXX probably also want an abstract factory that knows when it makes
407# sense to skip a superclass in favor of a subclass and when it might
408# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000409
410def build_opener(*handlers):
411 """Create an opener object from a list of handlers.
412
413 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000414 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000415
416 If any of the handlers passed as arguments are subclasses of the
417 default handlers, the default handlers will not be used.
418 """
Tim Peterse1190062001-01-15 03:34:38 +0000419
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000420 opener = OpenerDirector()
421 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
422 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000423 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000424 if hasattr(httplib, 'HTTPS'):
425 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000426 skip = []
427 for klass in default_classes:
428 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000429 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000430 if issubclass(check, klass):
431 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000432 elif isinstance(check, klass):
433 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000434 for klass in skip:
435 default_classes.remove(klass)
436
437 for klass in default_classes:
438 opener.add_handler(klass())
439
440 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000441 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000442 h = h()
443 opener.add_handler(h)
444 return opener
445
446class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000447 handler_order = 500
448
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000449 def add_parent(self, parent):
450 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000451
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000452 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000453 # Only exists for backwards compatibility
454 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000455
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000456 def __lt__(self, other):
457 if not hasattr(other, "handler_order"):
458 # Try to preserve the old behavior of having custom classes
459 # inserted after default ones (works only for custom user
460 # classes which are not aware of handler_order).
461 return True
462 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000463
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000464
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000465class HTTPErrorProcessor(BaseHandler):
466 """Process HTTP error responses."""
467 handler_order = 1000 # after all other processing
468
469 def http_response(self, request, response):
470 code, msg, hdrs = response.code, response.msg, response.info()
471
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000472 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000473 response = self.parent.error(
474 'http', request, response, code, msg, hdrs)
475
476 return response
477
478 https_response = http_response
479
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000480class HTTPDefaultErrorHandler(BaseHandler):
481 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000482 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000483
484class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000485 # maximum number of redirections to any single URL
486 # this is needed because of the state that cookies introduce
487 max_repeats = 4
488 # maximum total number of redirections (regardless of URL) before
489 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000490 max_redirections = 10
491
Jeremy Hylton03892952003-05-05 04:09:13 +0000492 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000493 """Return a Request or None in response to a redirect.
494
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000495 This is called by the http_error_30x methods when a
496 redirection response is received. If a redirection should
497 take place, return a new Request to allow http_error_30x to
498 perform the redirect. Otherwise, raise HTTPError if no-one
499 else should try to handle this url. Return None if you can't
500 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000501 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000502 m = req.get_method()
503 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000504 or code in (301, 302, 303) and m == "POST"):
505 # Strictly (according to RFC 2616), 301 or 302 in response
506 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000507 # from the user (of urllib2, in this case). In practice,
508 # essentially all clients do redirect in this case, so we
509 # do the same.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000510 return Request(newurl,
511 headers=req.headers,
512 origin_req_host=req.get_origin_req_host(),
513 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000514 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000515 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000516
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000517 # Implementation note: To avoid the server sending us into an
518 # infinite loop, the request object needs to track what URLs we
519 # have already seen. Do this by adding a handler-specific
520 # attribute to the Request object.
521 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000522 # Some servers (incorrectly) return multiple Location headers
523 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000524 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000525 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000526 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000527 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000528 else:
529 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000530 newurl = urlparse.urljoin(req.get_full_url(), newurl)
531
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000532 # XXX Probably want to forget about the state of the current
533 # request, although that might interact poorly with other
534 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000535 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000536 if new is None:
537 return
538
539 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000540 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000541 if hasattr(req, 'redirect_dict'):
542 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000543 if (visited.get(newurl, 0) >= self.max_repeats or
544 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000545 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000546 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000547 else:
548 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000549 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000550
551 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000552 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000553 fp.read()
554 fp.close()
555
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000556 return self.parent.open(new)
557
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000558 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000559
Martin v. Löwis162f0812003-07-12 07:33:32 +0000560 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000561 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000562 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000563
564class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000565 # Proxies must be in front
566 handler_order = 100
567
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000568 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000569 if proxies is None:
570 proxies = getproxies()
571 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
572 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000573 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000574 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000575 lambda r, proxy=url, type=type, meth=self.proxy_open: \
576 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000577
578 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000579 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000580 type, r_type = splittype(proxy)
Georg Brandl531ceba2006-01-21 07:20:56 +0000581 if not type or r_type.isdigit():
582 # proxy is specified without protocol
583 type = orig_type
584 host = proxy
585 else:
586 host, r_host = splithost(r_type)
587 user_pass, host = splituser(host)
588 user, password = splitpasswd(user_pass)
589 if user and password:
590 user, password = user_pass.split(':', 1)
591 user_pass = base64.encodestring('%s:%s' % (unquote(user),
592 unquote(password))).strip()
593 req.add_header('Proxy-authorization', 'Basic ' + user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000594 host = unquote(host)
595 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000596 if orig_type == type:
597 # let other handlers take care of it
598 # XXX this only makes sense if the proxy is before the
599 # other handlers
600 return None
601 else:
602 # need to start over, because the other handlers don't
603 # grok the proxy's URL type
604 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000605
606# feature suggested by Duncan Booth
607# XXX custom is not a good name
608class CustomProxy:
609 # either pass a function to the constructor or override handle
610 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000611 self.proto = proto
612 self.func = func
613 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000614
615 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000616 if self.func and self.func(req):
617 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000618
619 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000620 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000621
622class CustomProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000623 # Proxies must be in front
624 handler_order = 100
625
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000626 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000627 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000628
629 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000630 proto = req.get_type()
631 try:
632 proxies = self.proxies[proto]
633 except KeyError:
634 return None
635 for p in proxies:
636 if p.handle(req):
637 req.set_proxy(p.get_proxy())
638 return self.parent.open(req)
639 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000640
641 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000642 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000643
644 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000645 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000646 self.proxies[cpo.proto].append(cpo)
647 else:
648 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000649
650class HTTPPasswordMgr:
651 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000652 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000653
654 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000655 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000656 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000657 uri = [uri]
658 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000659 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000660 self.passwd[realm] = {}
661 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000662
663 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000664 domains = self.passwd.get(realm, {})
665 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000666 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000667 for uri in uris:
668 if self.is_suburi(uri, authuri):
669 return authinfo
670 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000671
672 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000673 """Accept netloc or URI and extract only the netloc and path"""
674 parts = urlparse.urlparse(uri)
675 if parts[1]:
676 return parts[1], parts[2] or '/'
677 else:
678 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000679
680 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000681 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000682
Fred Drake13a2c272000-02-10 17:17:14 +0000683 Both args must be URIs in reduced form.
684 """
685 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000686 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000687 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000688 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000689 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000690 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000691 return True
692 return False
Tim Peterse1190062001-01-15 03:34:38 +0000693
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000694
Moshe Zadka8a18e992001-03-01 08:40:42 +0000695class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
696
697 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000698 user, password = HTTPPasswordMgr.find_user_password(self, realm,
699 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000700 if user is not None:
701 return user, password
702 return HTTPPasswordMgr.find_user_password(self, None, authuri)
703
704
705class AbstractBasicAuthHandler:
706
Neal Norwitz853ddd52002-10-09 23:17:04 +0000707 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000708
709 # XXX there can actually be multiple auth-schemes in a
710 # www-authenticate header. should probably be a lot more careful
711 # in parsing them to extract multiple alternatives
712
Moshe Zadka8a18e992001-03-01 08:40:42 +0000713 def __init__(self, password_mgr=None):
714 if password_mgr is None:
715 password_mgr = HTTPPasswordMgr()
716 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000717 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000718
Moshe Zadka8a18e992001-03-01 08:40:42 +0000719 def http_error_auth_reqed(self, authreq, host, req, headers):
720 # XXX could be multiple headers
721 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000722 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000723 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000724 if mo:
725 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000726 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000727 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000728
Moshe Zadka8a18e992001-03-01 08:40:42 +0000729 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000730 # TODO(jhylton): Remove the host argument? It depends on whether
731 # retry_http_basic_auth() is consider part of the public API.
732 # It probably is.
733 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000734 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000735 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000736 auth = 'Basic %s' % base64.encodestring(raw).strip()
737 if req.headers.get(self.auth_header, None) == auth:
738 return None
739 req.add_header(self.auth_header, auth)
740 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000741 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000742 return None
743
Moshe Zadka8a18e992001-03-01 08:40:42 +0000744class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000745
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000746 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000747
Moshe Zadka8a18e992001-03-01 08:40:42 +0000748 def http_error_401(self, req, fp, code, msg, headers):
749 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000750 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000751 host, req, headers)
752
753
754class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
755
Brett Cannon783eaf42003-06-17 21:52:34 +0000756 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000757
758 def http_error_407(self, req, fp, code, msg, headers):
759 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000760 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000761 host, req, headers)
762
763
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000764def randombytes(n):
765 """Return n random bytes."""
766 # Use /dev/urandom if it is available. Fall back to random module
767 # if not. It might be worthwhile to extend this function to use
768 # other platform-specific mechanisms for getting random bytes.
769 if os.path.exists("/dev/urandom"):
770 f = open("/dev/urandom")
771 s = f.read(n)
772 f.close()
773 return s
774 else:
775 L = [chr(random.randrange(0, 256)) for i in range(n)]
776 return "".join(L)
777
Moshe Zadka8a18e992001-03-01 08:40:42 +0000778class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000779 # Digest authentication is specified in RFC 2617.
780
781 # XXX The client does not inspect the Authentication-Info header
782 # in a successful response.
783
784 # XXX It should be possible to test this implementation against
785 # a mock server that just generates a static set of challenges.
786
787 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000788
789 def __init__(self, passwd=None):
790 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000791 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000792 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000793 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000794 self.retried = 0
795 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000796
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000797 def reset_retry_count(self):
798 self.retried = 0
799
800 def http_error_auth_reqed(self, auth_header, host, req, headers):
801 authreq = headers.get(auth_header, None)
802 if self.retried > 5:
803 # Don't fail endlessly - if we failed once, we'll probably
804 # fail a second time. Hm. Unless the Password Manager is
805 # prompting for the information. Crap. This isn't great
806 # but it's better than the current 'repeat until recursion
807 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000808 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000809 headers, None)
810 else:
811 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000812 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000813 scheme = authreq.split()[0]
814 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000815 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000816 else:
817 raise ValueError("AbstractDigestAuthHandler doesn't know "
818 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000819
820 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000821 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000822 chal = parse_keqv_list(parse_http_list(challenge))
823 auth = self.get_authorization(req, chal)
824 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000825 auth_val = 'Digest %s' % auth
826 if req.headers.get(self.auth_header, None) == auth_val:
827 return None
828 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000829 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000830 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000831
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000832 def get_cnonce(self, nonce):
833 # The cnonce-value is an opaque
834 # quoted string value provided by the client and used by both client
835 # and server to avoid chosen plaintext attacks, to provide mutual
836 # authentication, and to provide some message integrity protection.
837 # This isn't a fabulous effort, but it's probably Good Enough.
838 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
839 randombytes(8))).hexdigest()
840 return dig[:16]
841
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000842 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000843 try:
844 realm = chal['realm']
845 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000846 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000847 algorithm = chal.get('algorithm', 'MD5')
848 # mod_digest doesn't send an opaque, even though it isn't
849 # supposed to be optional
850 opaque = chal.get('opaque', None)
851 except KeyError:
852 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000853
Fred Drake13a2c272000-02-10 17:17:14 +0000854 H, KD = self.get_algorithm_impls(algorithm)
855 if H is None:
856 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000857
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000858 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000859 if user is None:
860 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000861
Fred Drake13a2c272000-02-10 17:17:14 +0000862 # XXX not implemented yet
863 if req.has_data():
864 entdig = self.get_entity_digest(req.get_data(), chal)
865 else:
866 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000867
Fred Drake13a2c272000-02-10 17:17:14 +0000868 A1 = "%s:%s:%s" % (user, realm, pw)
Johannes Gijsberscdd625a2005-01-09 05:51:49 +0000869 A2 = "%s:%s" % (req.get_method(),
Fred Drake13a2c272000-02-10 17:17:14 +0000870 # XXX selector: what about proxies and full urls
871 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000872 if qop == 'auth':
873 self.nonce_count += 1
874 ncvalue = '%08x' % self.nonce_count
875 cnonce = self.get_cnonce(nonce)
876 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
877 respdig = KD(H(A1), noncebit)
878 elif qop is None:
879 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
880 else:
881 # XXX handle auth-int.
882 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000883
Fred Drake13a2c272000-02-10 17:17:14 +0000884 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000885
Fred Drake13a2c272000-02-10 17:17:14 +0000886 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
887 'response="%s"' % (user, realm, nonce, req.get_selector(),
888 respdig)
889 if opaque:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000890 base += ', opaque="%s"' % opaque
Fred Drake13a2c272000-02-10 17:17:14 +0000891 if entdig:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000892 base += ', digest="%s"' % entdig
893 base += ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000894 if qop:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000895 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000896 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000897
898 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000899 # lambdas assume digest modules are imported at the top level
900 if algorithm == 'MD5':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000901 H = lambda x: md5.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000902 elif algorithm == 'SHA':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000903 H = lambda x: sha.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000904 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000905 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000906 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000907
908 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000909 # XXX not implemented yet
910 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000911
Moshe Zadka8a18e992001-03-01 08:40:42 +0000912
913class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
914 """An authentication protocol defined by RFC 2069
915
916 Digest authentication improves on basic authentication because it
917 does not transmit passwords in the clear.
918 """
919
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000920 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000921
922 def http_error_401(self, req, fp, code, msg, headers):
923 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000924 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000925 host, req, headers)
926 self.reset_retry_count()
927 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000928
929
930class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
931
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000932 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000933
934 def http_error_407(self, req, fp, code, msg, headers):
935 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000936 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000937 host, req, headers)
938 self.reset_retry_count()
939 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000940
Moshe Zadka8a18e992001-03-01 08:40:42 +0000941class AbstractHTTPHandler(BaseHandler):
942
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000943 def __init__(self, debuglevel=0):
944 self._debuglevel = debuglevel
945
946 def set_http_debuglevel(self, level):
947 self._debuglevel = level
948
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000949 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000950 host = request.get_host()
951 if not host:
952 raise URLError('no host given')
953
954 if request.has_data(): # POST
955 data = request.get_data()
956 if not request.has_header('Content-type'):
957 request.add_unredirected_header(
958 'Content-type',
959 'application/x-www-form-urlencoded')
960 if not request.has_header('Content-length'):
961 request.add_unredirected_header(
962 'Content-length', '%d' % len(data))
963
964 scheme, sel = splittype(request.get_selector())
965 sel_host, sel_path = splithost(sel)
966 if not request.has_header('Host'):
967 request.add_unredirected_header('Host', sel_host or host)
968 for name, value in self.parent.addheaders:
969 name = name.capitalize()
970 if not request.has_header(name):
971 request.add_unredirected_header(name, value)
972
973 return request
974
Moshe Zadka8a18e992001-03-01 08:40:42 +0000975 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +0000976 """Return an addinfourl object for the request, using http_class.
977
978 http_class must implement the HTTPConnection API from httplib.
979 The addinfourl return value is a file-like object. It also
980 has methods and attributes including:
981 - info(): return a mimetools.Message object for the headers
982 - geturl(): return the original request URL
983 - code: HTTP status code
984 """
Moshe Zadka76676802001-04-11 07:44:53 +0000985 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000986 if not host:
987 raise URLError('no host given')
988
Jeremy Hylton828023b2003-05-04 23:44:49 +0000989 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000990 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +0000991
Jeremy Hylton023518a2003-12-17 18:52:16 +0000992 headers = dict(req.headers)
993 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000994 # We want to make an HTTP/1.1 request, but the addinfourl
995 # class isn't prepared to deal with a persistent connection.
996 # It will try to read all remaining data from the socket,
997 # which will block while the server waits for the next request.
998 # So make sure the connection gets closed after the (only)
999 # request.
1000 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +00001001 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +00001002 h.request(req.get_method(), req.get_selector(), req.data, headers)
1003 r = h.getresponse()
1004 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +00001005 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001006
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001007 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001008 # object initialized properly.
1009
1010 # Wrap the HTTPResponse object in socket's file object adapter
1011 # for Windows. That adapter calls recv(), so delegate recv()
1012 # to read(). This weird wrapping allows the returned object to
1013 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001014
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001015 # XXX It might be better to extract the read buffering code
1016 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001017
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001018 r.recv = r.read
1019 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001020
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001021 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001022 resp.code = r.status
1023 resp.msg = r.reason
1024 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001025
Moshe Zadka8a18e992001-03-01 08:40:42 +00001026
1027class HTTPHandler(AbstractHTTPHandler):
1028
1029 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001030 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001031
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001032 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001033
1034if hasattr(httplib, 'HTTPS'):
1035 class HTTPSHandler(AbstractHTTPHandler):
1036
1037 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001038 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001039
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001040 https_request = AbstractHTTPHandler.do_request_
1041
1042class HTTPCookieProcessor(BaseHandler):
1043 def __init__(self, cookiejar=None):
1044 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001045 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001046 self.cookiejar = cookiejar
1047
1048 def http_request(self, request):
1049 self.cookiejar.add_cookie_header(request)
1050 return request
1051
1052 def http_response(self, request, response):
1053 self.cookiejar.extract_cookies(response, request)
1054 return response
1055
1056 https_request = http_request
1057 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001058
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001059class UnknownHandler(BaseHandler):
1060 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001061 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001062 raise URLError('unknown url type: %s' % type)
1063
1064def parse_keqv_list(l):
1065 """Parse list of key=value strings where keys are not duplicated."""
1066 parsed = {}
1067 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001068 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001069 if v[0] == '"' and v[-1] == '"':
1070 v = v[1:-1]
1071 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001072 return parsed
1073
1074def parse_http_list(s):
1075 """Parse lists as described by RFC 2068 Section 2.
Tim Peters9e34c042005-08-26 15:20:46 +00001076
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001077 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001078 the list may include quoted-strings. A quoted-string could
Georg Brandle1b13d22005-08-24 22:20:32 +00001079 contain a comma. A non-quoted string could have quotes in the
1080 middle. Neither commas nor quotes count if they are escaped.
1081 Only double-quotes count, not single-quotes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001082 """
Georg Brandle1b13d22005-08-24 22:20:32 +00001083 res = []
1084 part = ''
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001085
Georg Brandle1b13d22005-08-24 22:20:32 +00001086 escape = quote = False
1087 for cur in s:
1088 if escape:
1089 part += cur
1090 escape = False
1091 continue
1092 if quote:
1093 if cur == '\\':
1094 escape = True
Fred Drake13a2c272000-02-10 17:17:14 +00001095 continue
Georg Brandle1b13d22005-08-24 22:20:32 +00001096 elif cur == '"':
1097 quote = False
1098 part += cur
1099 continue
1100
1101 if cur == ',':
1102 res.append(part)
1103 part = ''
1104 continue
1105
1106 if cur == '"':
1107 quote = True
Tim Peters9e34c042005-08-26 15:20:46 +00001108
Georg Brandle1b13d22005-08-24 22:20:32 +00001109 part += cur
1110
1111 # append last part
1112 if part:
1113 res.append(part)
1114
1115 return [part.strip() for part in res]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001116
1117class FileHandler(BaseHandler):
1118 # Use local file or FTP depending on form of URL
1119 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001120 url = req.get_selector()
1121 if url[:2] == '//' and url[2:3] != '/':
1122 req.type = 'ftp'
1123 return self.parent.open(req)
1124 else:
1125 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001126
1127 # names for the localhost
1128 names = None
1129 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001130 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +00001131 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +00001132 socket.gethostbyname(socket.gethostname()))
1133 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001134
1135 # not entirely sure what the rules are here
1136 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001137 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001138 host = req.get_host()
1139 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001140 localfile = url2pathname(file)
1141 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001142 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001143 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001144 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001145 headers = mimetools.Message(StringIO(
Brett Cannon783eaf42003-06-17 21:52:34 +00001146 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001147 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001148 if host:
1149 host, port = splitport(host)
1150 if not host or \
1151 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001152 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001153 headers, 'file:'+file)
1154 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001155
1156class FTPHandler(BaseHandler):
1157 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001158 host = req.get_host()
1159 if not host:
1160 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001161 host, port = splitport(host)
1162 if port is None:
1163 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001164 else:
1165 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001166
1167 # username/password handling
1168 user, host = splituser(host)
1169 if user:
1170 user, passwd = splitpasswd(user)
1171 else:
1172 passwd = None
1173 host = unquote(host)
1174 user = unquote(user or '')
1175 passwd = unquote(passwd or '')
1176
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001177 try:
1178 host = socket.gethostbyname(host)
1179 except socket.error, msg:
1180 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001181 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001182 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001183 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001184 dirs, file = dirs[:-1], dirs[-1]
1185 if dirs and not dirs[0]:
1186 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001187 try:
1188 fw = self.connect_ftp(user, passwd, host, port, dirs)
1189 type = file and 'I' or 'D'
1190 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001191 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001192 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001193 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001194 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001195 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001196 headers = ""
1197 mtype = mimetypes.guess_type(req.get_full_url())[0]
1198 if mtype:
Brett Cannon783eaf42003-06-17 21:52:34 +00001199 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001200 if retrlen is not None and retrlen >= 0:
Brett Cannon783eaf42003-06-17 21:52:34 +00001201 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001202 sf = StringIO(headers)
1203 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001204 return addinfourl(fp, headers, req.get_full_url())
1205 except ftplib.all_errors, msg:
1206 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001207
1208 def connect_ftp(self, user, passwd, host, port, dirs):
1209 fw = ftpwrapper(user, passwd, host, port, dirs)
1210## fw.ftp.set_debuglevel(1)
1211 return fw
1212
1213class CacheFTPHandler(FTPHandler):
1214 # XXX would be nice to have pluggable cache strategies
1215 # XXX this stuff is definitely not thread safe
1216 def __init__(self):
1217 self.cache = {}
1218 self.timeout = {}
1219 self.soonest = 0
1220 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001221 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001222
1223 def setTimeout(self, t):
1224 self.delay = t
1225
1226 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001227 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001228
1229 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001230 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001231 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001232 self.timeout[key] = time.time() + self.delay
1233 else:
1234 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1235 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001236 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001237 return self.cache[key]
1238
1239 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001240 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001241 t = time.time()
1242 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001243 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001244 if v < t:
1245 self.cache[k].close()
1246 del self.cache[k]
1247 del self.timeout[k]
1248 self.soonest = min(self.timeout.values())
1249
1250 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001251 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001252 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001253 if v == self.soonest:
1254 del self.cache[k]
1255 del self.timeout[k]
1256 break
1257 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001258
1259class GopherHandler(BaseHandler):
1260 def gopher_open(self, req):
Tim Peterse2c9a6c2006-02-18 04:14:16 +00001261 import gopherlib # this raises DeprecationWarning in 2.5
Fred Drake13a2c272000-02-10 17:17:14 +00001262 host = req.get_host()
1263 if not host:
1264 raise GopherError('no host given')
1265 host = unquote(host)
1266 selector = req.get_selector()
1267 type, selector = splitgophertype(selector)
1268 selector, query = splitquery(selector)
1269 selector = unquote(selector)
1270 if query:
1271 query = unquote(query)
1272 fp = gopherlib.send_query(selector, query, host)
1273 else:
1274 fp = gopherlib.send_selector(selector, host)
1275 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001276
1277#bleck! don't use this yet
1278class OpenerFactory:
1279
1280 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001281 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001282 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001283 handlers = []
1284 replacement_handlers = []
1285
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001286 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001287 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001288
1289 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001290 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001291
1292 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001293 opener = OpenerDirector()
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +00001294 for ph in self.default_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001295 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001296 ph = ph()
1297 opener.add_handler(ph)