blob: 21f916ce568a25cfe65018d07e5ece4d35d19b7c [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000090import base64
91import ftplib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000092import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000093import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000094import md5
95import mimetypes
96import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000097import os
98import posixpath
99import random
100import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000101import sha
102import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000103import sys
104import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000105import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000106import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000107import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000108
109try:
110 from cStringIO import StringIO
111except ImportError:
112 from StringIO import StringIO
113
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000114# not sure how many of these need to be gotten rid of
Georg Brandlddb84d72006-03-18 11:35:18 +0000115from urllib import (unwrap, unquote, splittype, splithost, quote,
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000116 addinfourl, splitport, splitgophertype, splitquery,
117 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000118
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000119# support for FileHandler, proxies via environment variables
120from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000121
Georg Brandl81472752006-03-31 20:02:40 +0000122__version__ = "2.5"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000123
124_opener = None
125def urlopen(url, data=None):
126 global _opener
127 if _opener is None:
128 _opener = build_opener()
129 return _opener.open(url, data)
130
131def install_opener(opener):
132 global _opener
133 _opener = opener
134
135# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000136# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000137# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000138
139class URLError(IOError):
140 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000141 # the implementation. need to override __init__ and __str__.
142 # It sets self.args for compatibility with other EnvironmentError
143 # subclasses, but args doesn't have the typical format with errno in
144 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000145 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000146 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000147 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000148
149 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000150 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152class HTTPError(URLError, addinfourl):
153 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000154 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000155
156 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000157 self.code = code
158 self.msg = msg
159 self.hdrs = hdrs
160 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000161 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000162 # The addinfourl classes depend on fp being a valid file
163 # object. In some cases, the HTTPError may not have a valid
164 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000165 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000166 if fp is not None:
167 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000168
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000170 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000171
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172class GopherError(URLError):
173 pass
174
Moshe Zadka8a18e992001-03-01 08:40:42 +0000175
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000176class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000177
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000178 def __init__(self, url, data=None, headers={},
179 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000180 # unwrap('<URL:type://host/path>') --> 'type://host/path'
181 self.__original = unwrap(url)
182 self.type = None
183 # self.__r_type is what's left after doing the splittype
184 self.host = None
185 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000186 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000187 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000188 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000189 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000190 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000191 if origin_req_host is None:
192 origin_req_host = cookielib.request_host(self)
193 self.origin_req_host = origin_req_host
194 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000195
196 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000197 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000198 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000199 # too complicated and/or unnecessary.
200 # XXX should the __r_XXX attributes be public?
201 if attr[:12] == '_Request__r_':
202 name = attr[12:]
203 if hasattr(Request, 'get_' + name):
204 getattr(self, 'get_' + name)()
205 return getattr(self, attr)
206 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000207
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000208 def get_method(self):
209 if self.has_data():
210 return "POST"
211 else:
212 return "GET"
213
Jeremy Hylton023518a2003-12-17 18:52:16 +0000214 # XXX these helper methods are lame
215
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000216 def add_data(self, data):
217 self.data = data
218
219 def has_data(self):
220 return self.data is not None
221
222 def get_data(self):
223 return self.data
224
225 def get_full_url(self):
226 return self.__original
227
228 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000229 if self.type is None:
230 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000231 if self.type is None:
232 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000233 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000234
235 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000236 if self.host is None:
237 self.host, self.__r_host = splithost(self.__r_type)
238 if self.host:
239 self.host = unquote(self.host)
240 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000241
242 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000243 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000244
Moshe Zadka8a18e992001-03-01 08:40:42 +0000245 def set_proxy(self, host, type):
246 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000247 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000248
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000249 def get_origin_req_host(self):
250 return self.origin_req_host
251
252 def is_unverifiable(self):
253 return self.unverifiable
254
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000255 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000256 # useful for something like authentication
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000257 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000258
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259 def add_unredirected_header(self, key, val):
260 # will not be added to a redirected request
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000261 self.unredirected_hdrs[key.capitalize()] = val
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000262
263 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000264 return (header_name in self.headers or
265 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000266
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000267 def get_header(self, header_name, default=None):
268 return self.headers.get(
269 header_name,
270 self.unredirected_hdrs.get(header_name, default))
271
272 def header_items(self):
273 hdrs = self.unredirected_hdrs.copy()
274 hdrs.update(self.headers)
275 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000276
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000277class OpenerDirector:
278 def __init__(self):
Georg Brandl8d457c72005-06-26 22:01:35 +0000279 client_version = "Python-urllib/%s" % __version__
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000280 self.addheaders = [('User-agent', client_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000281 # manage the individual handlers
282 self.handlers = []
283 self.handle_open = {}
284 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000285 self.process_response = {}
286 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000287
288 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000289 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000290 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000291 i = meth.find("_")
292 protocol = meth[:i]
293 condition = meth[i+1:]
294
295 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000296 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000297 kind = meth[j+1:]
298 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000299 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000300 except ValueError:
301 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000302 lookup = self.handle_error.get(protocol, {})
303 self.handle_error[protocol] = lookup
304 elif condition == "open":
305 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000306 lookup = self.handle_open
307 elif condition == "response":
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000308 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000309 lookup = self.process_response
310 elif condition == "request":
311 kind = protocol
312 lookup = self.process_request
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000313 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000314 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000315
316 handlers = lookup.setdefault(kind, [])
317 if handlers:
318 bisect.insort(handlers, handler)
319 else:
320 handlers.append(handler)
321 added = True
322
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000324 # XXX why does self.handlers need to be sorted?
325 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000326 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000327
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000328 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000329 # Only exists for backwards compatibility.
330 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000331
332 def _call_chain(self, chain, kind, meth_name, *args):
333 # XXX raise an exception if no one else should try to handle
334 # this url. return None if you can't but someone else could.
335 handlers = chain.get(kind, ())
336 for handler in handlers:
337 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000338
339 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000340 if result is not None:
341 return result
342
343 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000344 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000345 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000346 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000347 else:
348 req = fullurl
349 if data is not None:
350 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000351
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000352 protocol = req.get_type()
353
354 # pre-process request
355 meth_name = protocol+"_request"
356 for processor in self.process_request.get(protocol, []):
357 meth = getattr(processor, meth_name)
358 req = meth(req)
359
360 response = self._open(req, data)
361
362 # post-process response
363 meth_name = protocol+"_response"
364 for processor in self.process_response.get(protocol, []):
365 meth = getattr(processor, meth_name)
366 response = meth(req, response)
367
368 return response
369
370 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000372 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000373 if result:
374 return result
375
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000376 protocol = req.get_type()
377 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000378 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000379 if result:
380 return result
381
382 return self._call_chain(self.handle_open, 'unknown',
383 'unknown_open', req)
384
385 def error(self, proto, *args):
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000386 if proto in ('http', 'https'):
Fred Draked5214b02001-11-08 17:19:29 +0000387 # XXX http[s] protocols are special-cased
388 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000389 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000390 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000391 http_err = 1
392 orig_args = args
393 else:
394 dict = self.handle_error
395 meth_name = proto + '_error'
396 http_err = 0
397 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000398 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000399 if result:
400 return result
401
402 if http_err:
403 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000404 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000405
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000406# XXX probably also want an abstract factory that knows when it makes
407# sense to skip a superclass in favor of a subclass and when it might
408# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000409
410def build_opener(*handlers):
411 """Create an opener object from a list of handlers.
412
413 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000414 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000415
416 If any of the handlers passed as arguments are subclasses of the
417 default handlers, the default handlers will not be used.
418 """
Tim Peterse1190062001-01-15 03:34:38 +0000419
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000420 opener = OpenerDirector()
421 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
422 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000423 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000424 if hasattr(httplib, 'HTTPS'):
425 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000426 skip = []
427 for klass in default_classes:
428 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000429 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000430 if issubclass(check, klass):
431 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000432 elif isinstance(check, klass):
433 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000434 for klass in skip:
435 default_classes.remove(klass)
436
437 for klass in default_classes:
438 opener.add_handler(klass())
439
440 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000441 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000442 h = h()
443 opener.add_handler(h)
444 return opener
445
446class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000447 handler_order = 500
448
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000449 def add_parent(self, parent):
450 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000451
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000452 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000453 # Only exists for backwards compatibility
454 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000455
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000456 def __lt__(self, other):
457 if not hasattr(other, "handler_order"):
458 # Try to preserve the old behavior of having custom classes
459 # inserted after default ones (works only for custom user
460 # classes which are not aware of handler_order).
461 return True
462 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000463
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000464
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000465class HTTPErrorProcessor(BaseHandler):
466 """Process HTTP error responses."""
467 handler_order = 1000 # after all other processing
468
469 def http_response(self, request, response):
470 code, msg, hdrs = response.code, response.msg, response.info()
471
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000472 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000473 response = self.parent.error(
474 'http', request, response, code, msg, hdrs)
475
476 return response
477
478 https_response = http_response
479
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000480class HTTPDefaultErrorHandler(BaseHandler):
481 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000482 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000483
484class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000485 # maximum number of redirections to any single URL
486 # this is needed because of the state that cookies introduce
487 max_repeats = 4
488 # maximum total number of redirections (regardless of URL) before
489 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000490 max_redirections = 10
491
Jeremy Hylton03892952003-05-05 04:09:13 +0000492 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000493 """Return a Request or None in response to a redirect.
494
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000495 This is called by the http_error_30x methods when a
496 redirection response is received. If a redirection should
497 take place, return a new Request to allow http_error_30x to
498 perform the redirect. Otherwise, raise HTTPError if no-one
499 else should try to handle this url. Return None if you can't
500 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000501 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000502 m = req.get_method()
503 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000504 or code in (301, 302, 303) and m == "POST"):
505 # Strictly (according to RFC 2616), 301 or 302 in response
506 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000507 # from the user (of urllib2, in this case). In practice,
508 # essentially all clients do redirect in this case, so we
509 # do the same.
Georg Brandlddb84d72006-03-18 11:35:18 +0000510 # be conciliant with URIs containing a space
511 newurl = newurl.replace(' ', '%20')
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000512 return Request(newurl,
513 headers=req.headers,
514 origin_req_host=req.get_origin_req_host(),
515 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000516 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000517 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000518
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000519 # Implementation note: To avoid the server sending us into an
520 # infinite loop, the request object needs to track what URLs we
521 # have already seen. Do this by adding a handler-specific
522 # attribute to the Request object.
523 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000524 # Some servers (incorrectly) return multiple Location headers
525 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000526 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000527 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000528 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000529 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000530 else:
531 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000532 newurl = urlparse.urljoin(req.get_full_url(), newurl)
533
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000534 # XXX Probably want to forget about the state of the current
535 # request, although that might interact poorly with other
536 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000537 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000538 if new is None:
539 return
540
541 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000542 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000543 if hasattr(req, 'redirect_dict'):
544 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000545 if (visited.get(newurl, 0) >= self.max_repeats or
546 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000547 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000548 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000549 else:
550 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000551 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000552
553 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000554 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000555 fp.read()
556 fp.close()
557
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000558 return self.parent.open(new)
559
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000560 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000561
Martin v. Löwis162f0812003-07-12 07:33:32 +0000562 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000563 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000564 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000565
566class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000567 # Proxies must be in front
568 handler_order = 100
569
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000570 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000571 if proxies is None:
572 proxies = getproxies()
573 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
574 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000575 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000576 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000577 lambda r, proxy=url, type=type, meth=self.proxy_open: \
578 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000579
580 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000581 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000582 type, r_type = splittype(proxy)
Georg Brandl531ceba2006-01-21 07:20:56 +0000583 if not type or r_type.isdigit():
584 # proxy is specified without protocol
585 type = orig_type
586 host = proxy
587 else:
588 host, r_host = splithost(r_type)
589 user_pass, host = splituser(host)
590 user, password = splitpasswd(user_pass)
591 if user and password:
592 user, password = user_pass.split(':', 1)
593 user_pass = base64.encodestring('%s:%s' % (unquote(user),
594 unquote(password))).strip()
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000595 req.add_header('Proxy-authorization', 'Basic ' + user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000596 host = unquote(host)
597 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000598 if orig_type == type:
599 # let other handlers take care of it
600 # XXX this only makes sense if the proxy is before the
601 # other handlers
602 return None
603 else:
604 # need to start over, because the other handlers don't
605 # grok the proxy's URL type
606 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000607
608# feature suggested by Duncan Booth
609# XXX custom is not a good name
610class CustomProxy:
611 # either pass a function to the constructor or override handle
612 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000613 self.proto = proto
614 self.func = func
615 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000616
617 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000618 if self.func and self.func(req):
619 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000620
621 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000622 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000623
624class CustomProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000625 # Proxies must be in front
626 handler_order = 100
627
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000628 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000629 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000630
631 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000632 proto = req.get_type()
633 try:
634 proxies = self.proxies[proto]
635 except KeyError:
636 return None
637 for p in proxies:
638 if p.handle(req):
639 req.set_proxy(p.get_proxy())
640 return self.parent.open(req)
641 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000642
643 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000644 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000645
646 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000647 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000648 self.proxies[cpo.proto].append(cpo)
649 else:
650 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000651
652class HTTPPasswordMgr:
653 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000654 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000655
656 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000657 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000658 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000659 uri = [uri]
660 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000661 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000662 self.passwd[realm] = {}
663 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000664
665 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000666 domains = self.passwd.get(realm, {})
667 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000668 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000669 for uri in uris:
670 if self.is_suburi(uri, authuri):
671 return authinfo
672 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000673
674 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000675 """Accept netloc or URI and extract only the netloc and path"""
676 parts = urlparse.urlparse(uri)
677 if parts[1]:
678 return parts[1], parts[2] or '/'
679 else:
680 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000681
682 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000683 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000684
Fred Drake13a2c272000-02-10 17:17:14 +0000685 Both args must be URIs in reduced form.
686 """
687 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000688 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000689 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000690 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000691 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000692 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000693 return True
694 return False
Tim Peterse1190062001-01-15 03:34:38 +0000695
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000696
Moshe Zadka8a18e992001-03-01 08:40:42 +0000697class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
698
699 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000700 user, password = HTTPPasswordMgr.find_user_password(self, realm,
701 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000702 if user is not None:
703 return user, password
704 return HTTPPasswordMgr.find_user_password(self, None, authuri)
705
706
707class AbstractBasicAuthHandler:
708
Neal Norwitz853ddd52002-10-09 23:17:04 +0000709 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000710
711 # XXX there can actually be multiple auth-schemes in a
712 # www-authenticate header. should probably be a lot more careful
713 # in parsing them to extract multiple alternatives
714
Moshe Zadka8a18e992001-03-01 08:40:42 +0000715 def __init__(self, password_mgr=None):
716 if password_mgr is None:
717 password_mgr = HTTPPasswordMgr()
718 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000719 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000720
Moshe Zadka8a18e992001-03-01 08:40:42 +0000721 def http_error_auth_reqed(self, authreq, host, req, headers):
722 # XXX could be multiple headers
723 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000724 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000725 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000726 if mo:
727 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000728 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000729 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000730
Moshe Zadka8a18e992001-03-01 08:40:42 +0000731 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000732 # TODO(jhylton): Remove the host argument? It depends on whether
733 # retry_http_basic_auth() is consider part of the public API.
734 # It probably is.
735 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000736 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000737 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000738 auth = 'Basic %s' % base64.encodestring(raw).strip()
739 if req.headers.get(self.auth_header, None) == auth:
740 return None
741 req.add_header(self.auth_header, auth)
742 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000743 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000744 return None
745
Moshe Zadka8a18e992001-03-01 08:40:42 +0000746class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000747
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000748 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000749
Moshe Zadka8a18e992001-03-01 08:40:42 +0000750 def http_error_401(self, req, fp, code, msg, headers):
751 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000752 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000753 host, req, headers)
754
755
756class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
757
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000758 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000759
760 def http_error_407(self, req, fp, code, msg, headers):
761 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000762 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000763 host, req, headers)
764
765
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000766def randombytes(n):
767 """Return n random bytes."""
768 # Use /dev/urandom if it is available. Fall back to random module
769 # if not. It might be worthwhile to extend this function to use
770 # other platform-specific mechanisms for getting random bytes.
771 if os.path.exists("/dev/urandom"):
772 f = open("/dev/urandom")
773 s = f.read(n)
774 f.close()
775 return s
776 else:
777 L = [chr(random.randrange(0, 256)) for i in range(n)]
778 return "".join(L)
779
Moshe Zadka8a18e992001-03-01 08:40:42 +0000780class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000781 # Digest authentication is specified in RFC 2617.
782
783 # XXX The client does not inspect the Authentication-Info header
784 # in a successful response.
785
786 # XXX It should be possible to test this implementation against
787 # a mock server that just generates a static set of challenges.
788
789 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000790
791 def __init__(self, passwd=None):
792 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000793 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000794 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000795 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000796 self.retried = 0
797 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000798
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000799 def reset_retry_count(self):
800 self.retried = 0
801
802 def http_error_auth_reqed(self, auth_header, host, req, headers):
803 authreq = headers.get(auth_header, None)
804 if self.retried > 5:
805 # Don't fail endlessly - if we failed once, we'll probably
806 # fail a second time. Hm. Unless the Password Manager is
807 # prompting for the information. Crap. This isn't great
808 # but it's better than the current 'repeat until recursion
809 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000810 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000811 headers, None)
812 else:
813 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000814 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000815 scheme = authreq.split()[0]
816 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000817 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000818 else:
819 raise ValueError("AbstractDigestAuthHandler doesn't know "
820 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000821
822 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000823 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000824 chal = parse_keqv_list(parse_http_list(challenge))
825 auth = self.get_authorization(req, chal)
826 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000827 auth_val = 'Digest %s' % auth
828 if req.headers.get(self.auth_header, None) == auth_val:
829 return None
830 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000831 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000832 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000833
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000834 def get_cnonce(self, nonce):
835 # The cnonce-value is an opaque
836 # quoted string value provided by the client and used by both client
837 # and server to avoid chosen plaintext attacks, to provide mutual
838 # authentication, and to provide some message integrity protection.
839 # This isn't a fabulous effort, but it's probably Good Enough.
840 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
841 randombytes(8))).hexdigest()
842 return dig[:16]
843
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000844 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000845 try:
846 realm = chal['realm']
847 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000848 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000849 algorithm = chal.get('algorithm', 'MD5')
850 # mod_digest doesn't send an opaque, even though it isn't
851 # supposed to be optional
852 opaque = chal.get('opaque', None)
853 except KeyError:
854 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000855
Fred Drake13a2c272000-02-10 17:17:14 +0000856 H, KD = self.get_algorithm_impls(algorithm)
857 if H is None:
858 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000859
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000860 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000861 if user is None:
862 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000863
Fred Drake13a2c272000-02-10 17:17:14 +0000864 # XXX not implemented yet
865 if req.has_data():
866 entdig = self.get_entity_digest(req.get_data(), chal)
867 else:
868 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000869
Fred Drake13a2c272000-02-10 17:17:14 +0000870 A1 = "%s:%s:%s" % (user, realm, pw)
Johannes Gijsberscdd625a2005-01-09 05:51:49 +0000871 A2 = "%s:%s" % (req.get_method(),
Fred Drake13a2c272000-02-10 17:17:14 +0000872 # XXX selector: what about proxies and full urls
873 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000874 if qop == 'auth':
875 self.nonce_count += 1
876 ncvalue = '%08x' % self.nonce_count
877 cnonce = self.get_cnonce(nonce)
878 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
879 respdig = KD(H(A1), noncebit)
880 elif qop is None:
881 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
882 else:
883 # XXX handle auth-int.
884 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000885
Fred Drake13a2c272000-02-10 17:17:14 +0000886 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000887
Fred Drake13a2c272000-02-10 17:17:14 +0000888 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
889 'response="%s"' % (user, realm, nonce, req.get_selector(),
890 respdig)
891 if opaque:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000892 base += ', opaque="%s"' % opaque
Fred Drake13a2c272000-02-10 17:17:14 +0000893 if entdig:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000894 base += ', digest="%s"' % entdig
895 base += ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000896 if qop:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000897 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000898 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000899
900 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000901 # lambdas assume digest modules are imported at the top level
902 if algorithm == 'MD5':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000903 H = lambda x: md5.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000904 elif algorithm == 'SHA':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000905 H = lambda x: sha.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000906 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000907 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000908 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000909
910 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000911 # XXX not implemented yet
912 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000913
Moshe Zadka8a18e992001-03-01 08:40:42 +0000914
915class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
916 """An authentication protocol defined by RFC 2069
917
918 Digest authentication improves on basic authentication because it
919 does not transmit passwords in the clear.
920 """
921
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000922 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000923
924 def http_error_401(self, req, fp, code, msg, headers):
925 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000926 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000927 host, req, headers)
928 self.reset_retry_count()
929 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000930
931
932class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
933
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000934 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000935
936 def http_error_407(self, req, fp, code, msg, headers):
937 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000938 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000939 host, req, headers)
940 self.reset_retry_count()
941 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000942
Moshe Zadka8a18e992001-03-01 08:40:42 +0000943class AbstractHTTPHandler(BaseHandler):
944
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000945 def __init__(self, debuglevel=0):
946 self._debuglevel = debuglevel
947
948 def set_http_debuglevel(self, level):
949 self._debuglevel = level
950
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000951 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000952 host = request.get_host()
953 if not host:
954 raise URLError('no host given')
955
956 if request.has_data(): # POST
957 data = request.get_data()
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000958 if not request.has_header('Content-type'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000959 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000960 'Content-type',
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000961 'application/x-www-form-urlencoded')
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000962 if not request.has_header('Content-length'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000963 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000964 'Content-length', '%d' % len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000965
966 scheme, sel = splittype(request.get_selector())
967 sel_host, sel_path = splithost(sel)
968 if not request.has_header('Host'):
969 request.add_unredirected_header('Host', sel_host or host)
970 for name, value in self.parent.addheaders:
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000971 name = name.capitalize()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000972 if not request.has_header(name):
973 request.add_unredirected_header(name, value)
974
975 return request
976
Moshe Zadka8a18e992001-03-01 08:40:42 +0000977 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +0000978 """Return an addinfourl object for the request, using http_class.
979
980 http_class must implement the HTTPConnection API from httplib.
981 The addinfourl return value is a file-like object. It also
982 has methods and attributes including:
983 - info(): return a mimetools.Message object for the headers
984 - geturl(): return the original request URL
985 - code: HTTP status code
986 """
Moshe Zadka76676802001-04-11 07:44:53 +0000987 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000988 if not host:
989 raise URLError('no host given')
990
Jeremy Hylton828023b2003-05-04 23:44:49 +0000991 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000992 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +0000993
Jeremy Hylton023518a2003-12-17 18:52:16 +0000994 headers = dict(req.headers)
995 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000996 # We want to make an HTTP/1.1 request, but the addinfourl
997 # class isn't prepared to deal with a persistent connection.
998 # It will try to read all remaining data from the socket,
999 # which will block while the server waits for the next request.
1000 # So make sure the connection gets closed after the (only)
1001 # request.
1002 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +00001003 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +00001004 h.request(req.get_method(), req.get_selector(), req.data, headers)
1005 r = h.getresponse()
1006 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +00001007 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001008
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001009 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001010 # object initialized properly.
1011
1012 # Wrap the HTTPResponse object in socket's file object adapter
1013 # for Windows. That adapter calls recv(), so delegate recv()
1014 # to read(). This weird wrapping allows the returned object to
1015 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001016
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001017 # XXX It might be better to extract the read buffering code
1018 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001019
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001020 r.recv = r.read
1021 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001022
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001023 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001024 resp.code = r.status
1025 resp.msg = r.reason
1026 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001027
Moshe Zadka8a18e992001-03-01 08:40:42 +00001028
1029class HTTPHandler(AbstractHTTPHandler):
1030
1031 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001032 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001033
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001034 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001035
1036if hasattr(httplib, 'HTTPS'):
1037 class HTTPSHandler(AbstractHTTPHandler):
1038
1039 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001040 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001041
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001042 https_request = AbstractHTTPHandler.do_request_
1043
1044class HTTPCookieProcessor(BaseHandler):
1045 def __init__(self, cookiejar=None):
1046 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001047 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001048 self.cookiejar = cookiejar
1049
1050 def http_request(self, request):
1051 self.cookiejar.add_cookie_header(request)
1052 return request
1053
1054 def http_response(self, request, response):
1055 self.cookiejar.extract_cookies(response, request)
1056 return response
1057
1058 https_request = http_request
1059 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001060
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001061class UnknownHandler(BaseHandler):
1062 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001063 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001064 raise URLError('unknown url type: %s' % type)
1065
1066def parse_keqv_list(l):
1067 """Parse list of key=value strings where keys are not duplicated."""
1068 parsed = {}
1069 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001070 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001071 if v[0] == '"' and v[-1] == '"':
1072 v = v[1:-1]
1073 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001074 return parsed
1075
1076def parse_http_list(s):
1077 """Parse lists as described by RFC 2068 Section 2.
Tim Peters9e34c042005-08-26 15:20:46 +00001078
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001079 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001080 the list may include quoted-strings. A quoted-string could
Georg Brandle1b13d22005-08-24 22:20:32 +00001081 contain a comma. A non-quoted string could have quotes in the
1082 middle. Neither commas nor quotes count if they are escaped.
1083 Only double-quotes count, not single-quotes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001084 """
Georg Brandle1b13d22005-08-24 22:20:32 +00001085 res = []
1086 part = ''
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001087
Georg Brandle1b13d22005-08-24 22:20:32 +00001088 escape = quote = False
1089 for cur in s:
1090 if escape:
1091 part += cur
1092 escape = False
1093 continue
1094 if quote:
1095 if cur == '\\':
1096 escape = True
Fred Drake13a2c272000-02-10 17:17:14 +00001097 continue
Georg Brandle1b13d22005-08-24 22:20:32 +00001098 elif cur == '"':
1099 quote = False
1100 part += cur
1101 continue
1102
1103 if cur == ',':
1104 res.append(part)
1105 part = ''
1106 continue
1107
1108 if cur == '"':
1109 quote = True
Tim Peters9e34c042005-08-26 15:20:46 +00001110
Georg Brandle1b13d22005-08-24 22:20:32 +00001111 part += cur
1112
1113 # append last part
1114 if part:
1115 res.append(part)
1116
1117 return [part.strip() for part in res]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001118
1119class FileHandler(BaseHandler):
1120 # Use local file or FTP depending on form of URL
1121 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001122 url = req.get_selector()
1123 if url[:2] == '//' and url[2:3] != '/':
1124 req.type = 'ftp'
1125 return self.parent.open(req)
1126 else:
1127 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001128
1129 # names for the localhost
1130 names = None
1131 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001132 if FileHandler.names is None:
Georg Brandl4eb521e2006-04-02 20:37:17 +00001133 try:
1134 FileHandler.names = (socket.gethostbyname('localhost'),
1135 socket.gethostbyname(socket.gethostname()))
1136 except socket.gaierror:
1137 FileHandler.names = (socket.gethostbyname('localhost'),)
Fred Drake13a2c272000-02-10 17:17:14 +00001138 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001139
1140 # not entirely sure what the rules are here
1141 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001142 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001143 host = req.get_host()
1144 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001145 localfile = url2pathname(file)
1146 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001147 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001148 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001149 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001150 headers = mimetools.Message(StringIO(
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001151 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001152 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001153 if host:
1154 host, port = splitport(host)
1155 if not host or \
1156 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001157 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001158 headers, 'file:'+file)
1159 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001160
1161class FTPHandler(BaseHandler):
1162 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001163 host = req.get_host()
1164 if not host:
1165 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001166 host, port = splitport(host)
1167 if port is None:
1168 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001169 else:
1170 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001171
1172 # username/password handling
1173 user, host = splituser(host)
1174 if user:
1175 user, passwd = splitpasswd(user)
1176 else:
1177 passwd = None
1178 host = unquote(host)
1179 user = unquote(user or '')
1180 passwd = unquote(passwd or '')
1181
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001182 try:
1183 host = socket.gethostbyname(host)
1184 except socket.error, msg:
1185 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001186 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001187 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001188 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001189 dirs, file = dirs[:-1], dirs[-1]
1190 if dirs and not dirs[0]:
1191 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001192 try:
1193 fw = self.connect_ftp(user, passwd, host, port, dirs)
1194 type = file and 'I' or 'D'
1195 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001196 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001197 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001198 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001199 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001200 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001201 headers = ""
1202 mtype = mimetypes.guess_type(req.get_full_url())[0]
1203 if mtype:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001204 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001205 if retrlen is not None and retrlen >= 0:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001206 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001207 sf = StringIO(headers)
1208 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001209 return addinfourl(fp, headers, req.get_full_url())
1210 except ftplib.all_errors, msg:
1211 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001212
1213 def connect_ftp(self, user, passwd, host, port, dirs):
1214 fw = ftpwrapper(user, passwd, host, port, dirs)
1215## fw.ftp.set_debuglevel(1)
1216 return fw
1217
1218class CacheFTPHandler(FTPHandler):
1219 # XXX would be nice to have pluggable cache strategies
1220 # XXX this stuff is definitely not thread safe
1221 def __init__(self):
1222 self.cache = {}
1223 self.timeout = {}
1224 self.soonest = 0
1225 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001226 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001227
1228 def setTimeout(self, t):
1229 self.delay = t
1230
1231 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001232 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001233
1234 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001235 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001236 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001237 self.timeout[key] = time.time() + self.delay
1238 else:
1239 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1240 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001241 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001242 return self.cache[key]
1243
1244 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001245 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001246 t = time.time()
1247 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001248 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001249 if v < t:
1250 self.cache[k].close()
1251 del self.cache[k]
1252 del self.timeout[k]
1253 self.soonest = min(self.timeout.values())
1254
1255 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001256 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001257 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001258 if v == self.soonest:
1259 del self.cache[k]
1260 del self.timeout[k]
1261 break
1262 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001263
1264class GopherHandler(BaseHandler):
1265 def gopher_open(self, req):
Tim Peterse2c9a6c2006-02-18 04:14:16 +00001266 import gopherlib # this raises DeprecationWarning in 2.5
Fred Drake13a2c272000-02-10 17:17:14 +00001267 host = req.get_host()
1268 if not host:
1269 raise GopherError('no host given')
1270 host = unquote(host)
1271 selector = req.get_selector()
1272 type, selector = splitgophertype(selector)
1273 selector, query = splitquery(selector)
1274 selector = unquote(selector)
1275 if query:
1276 query = unquote(query)
1277 fp = gopherlib.send_query(selector, query, host)
1278 else:
1279 fp = gopherlib.send_selector(selector, host)
1280 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001281
1282#bleck! don't use this yet
1283class OpenerFactory:
1284
1285 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001286 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001287 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001288 handlers = []
1289 replacement_handlers = []
1290
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001291 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001292 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001293
1294 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001295 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001296
1297 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001298 opener = OpenerDirector()
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +00001299 for ph in self.default_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001300 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001301 ph = ph()
1302 opener.add_handler(ph)