blob: 042f2ba6f2c3415d238345f28aa8ce4f63096fb3 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
Tim Peterse1190062001-01-15 03:34:38 +000078 # that hash algo that requested in the challenge, it would be good to
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000079 # pass that information along to the client, too.
80
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
91import UserDict
92import httplib
93import re
94import base64
95import types
96import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000097import md5
98import mimetypes
99import mimetools
100import ftplib
101import sys
102import time
103import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000104import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000105
106try:
107 from cStringIO import StringIO
108except ImportError:
109 from StringIO import StringIO
110
111try:
112 import sha
113except ImportError:
114 # need 1.5.2 final
115 sha = None
116
117# not sure how many of these need to be gotten rid of
118from urllib import unwrap, unquote, splittype, splithost, \
119 addinfourl, splitport, splitgophertype, splitquery, \
120 splitattr, ftpwrapper, noheaders
121
122# support for proxies via environment variables
123from urllib import getproxies
124
125# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000126from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000127
128__version__ = "2.0a1"
129
130_opener = None
131def urlopen(url, data=None):
132 global _opener
133 if _opener is None:
134 _opener = build_opener()
135 return _opener.open(url, data)
136
137def install_opener(opener):
138 global _opener
139 _opener = opener
140
141# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000142# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000143 # subtypes.
144
145class URLError(IOError):
146 # URLError is a sub-type of IOError, but it doesn't share any of
147 # the implementation. need to override __init__ and __str__
148 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000149 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000150
151 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000152 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000153
154class HTTPError(URLError, addinfourl):
155 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000156 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000157
158 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000159 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000160 self.code = code
161 self.msg = msg
162 self.hdrs = hdrs
163 self.fp = fp
164 # XXX
165 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000166
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000167 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000168 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169
170 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000171 # XXX is this safe? what if user catches exception, then
172 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000173 if self.fp:
174 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000175
176class GopherError(URLError):
177 pass
178
Moshe Zadka8a18e992001-03-01 08:40:42 +0000179
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000180class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000181
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000182 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000183 # unwrap('<URL:type://host/path>') --> 'type://host/path'
184 self.__original = unwrap(url)
185 self.type = None
186 # self.__r_type is what's left after doing the splittype
187 self.host = None
188 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000189 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000190 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191 self.headers.update(headers)
192
193 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000194 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000195 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000196 # too complicated and/or unnecessary.
197 # XXX should the __r_XXX attributes be public?
198 if attr[:12] == '_Request__r_':
199 name = attr[12:]
200 if hasattr(Request, 'get_' + name):
201 getattr(self, 'get_' + name)()
202 return getattr(self, attr)
203 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000204
205 def add_data(self, data):
206 self.data = data
207
208 def has_data(self):
209 return self.data is not None
210
211 def get_data(self):
212 return self.data
213
214 def get_full_url(self):
215 return self.__original
216
217 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000218 if self.type is None:
219 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000220 assert self.type is not None, self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000221 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000222
223 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000224 if self.host is None:
225 self.host, self.__r_host = splithost(self.__r_type)
226 if self.host:
227 self.host = unquote(self.host)
228 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000229
230 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000231 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000232
Moshe Zadka8a18e992001-03-01 08:40:42 +0000233 def set_proxy(self, host, type):
234 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000235 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000236
237 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000238 # useful for something like authentication
239 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000240
241class OpenerDirector:
242 def __init__(self):
243 server_version = "Python-urllib/%s" % __version__
244 self.addheaders = [('User-agent', server_version)]
245 # manage the individual handlers
246 self.handlers = []
247 self.handle_open = {}
248 self.handle_error = {}
249
250 def add_handler(self, handler):
251 added = 0
252 for meth in get_methods(handler):
253 if meth[-5:] == '_open':
254 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000255 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000256 self.handle_open[protocol].append(handler)
257 else:
258 self.handle_open[protocol] = [handler]
259 added = 1
260 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000261 i = meth.find('_')
262 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000263 if j != -1 and meth[i+1:j] == 'error':
264 proto = meth[:i]
265 kind = meth[j+1:]
266 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000267 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000268 except ValueError:
269 pass
270 dict = self.handle_error.get(proto, {})
271 if dict.has_key(kind):
272 dict[kind].append(handler)
273 else:
274 dict[kind] = [handler]
275 self.handle_error[proto] = dict
276 added = 1
277 continue
278 if added:
279 self.handlers.append(handler)
280 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000281
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000282 def __del__(self):
283 self.close()
284
285 def close(self):
286 for handler in self.handlers:
287 handler.close()
288 self.handlers = []
289
290 def _call_chain(self, chain, kind, meth_name, *args):
291 # XXX raise an exception if no one else should try to handle
292 # this url. return None if you can't but someone else could.
293 handlers = chain.get(kind, ())
294 for handler in handlers:
295 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000296
297 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000298 if result is not None:
299 return result
300
301 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000302 # accept a URL or a Request object
303 if type(fullurl) == types.StringType:
304 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000305 else:
306 req = fullurl
307 if data is not None:
308 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000309 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000310
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000311 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000312 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000313 if result:
314 return result
315
Fred Drake13a2c272000-02-10 17:17:14 +0000316 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000317 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000318 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 if result:
320 return result
321
322 return self._call_chain(self.handle_open, 'unknown',
323 'unknown_open', req)
324
325 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000326 if proto in ['http', 'https']:
327 # XXX http[s] protocols are special cased
328 dict = self.handle_error['http'] # https is not different then http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000329 proto = args[2] # YUCK!
330 meth_name = 'http_error_%d' % proto
331 http_err = 1
332 orig_args = args
333 else:
334 dict = self.handle_error
335 meth_name = proto + '_error'
336 http_err = 0
337 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000338 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000339 if result:
340 return result
341
342 if http_err:
343 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000344 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345
346def is_callable(obj):
347 # not quite like builtin callable (which I didn't know existed),
348 # not entirely sure it needs to be different
349 if type(obj) in (types.BuiltinFunctionType,
Fred Drake13a2c272000-02-10 17:17:14 +0000350 types.BuiltinMethodType, types.LambdaType,
351 types.MethodType):
352 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000353 if type(obj) == types.InstanceType:
Fred Drake13a2c272000-02-10 17:17:14 +0000354 return hasattr(obj, '__call__')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000355 return 0
356
357def get_methods(inst):
358 methods = {}
359 classes = []
360 classes.append(inst.__class__)
361 while classes:
362 klass = classes[0]
363 del classes[0]
364 classes = classes + list(klass.__bases__)
365 for name in dir(klass):
366 attr = getattr(klass, name)
367 if type(attr) == types.UnboundMethodType:
368 methods[name] = 1
369 for name in dir(inst):
Fred Drake13a2c272000-02-10 17:17:14 +0000370 if is_callable(getattr(inst, name)):
371 methods[name] = 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000372 return methods.keys()
373
374# XXX probably also want an abstract factory that knows things like
375 # the fact that a ProxyHandler needs to get inserted first.
376# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000377 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000378
379def build_opener(*handlers):
380 """Create an opener object from a list of handlers.
381
382 The opener will use several default handlers, including support
383 for HTTP and FTP. If there is a ProxyHandler, it must be at the
384 front of the list of handlers. (Yuck.)
385
386 If any of the handlers passed as arguments are subclasses of the
387 default handlers, the default handlers will not be used.
388 """
Tim Peterse1190062001-01-15 03:34:38 +0000389
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000390 opener = OpenerDirector()
391 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
392 HTTPDefaultErrorHandler, HTTPRedirectHandler,
393 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000394 if hasattr(httplib, 'HTTPS'):
395 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000396 skip = []
397 for klass in default_classes:
398 for check in handlers:
399 if type(check) == types.ClassType:
400 if issubclass(check, klass):
401 skip.append(klass)
402 elif type(check) == types.InstanceType:
403 if isinstance(check, klass):
404 skip.append(klass)
405 for klass in skip:
406 default_classes.remove(klass)
407
408 for klass in default_classes:
409 opener.add_handler(klass())
410
411 for h in handlers:
412 if type(h) == types.ClassType:
413 h = h()
414 opener.add_handler(h)
415 return opener
416
417class BaseHandler:
418 def add_parent(self, parent):
419 self.parent = parent
420 def close(self):
421 self.parent = None
422
423class HTTPDefaultErrorHandler(BaseHandler):
424 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000425 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000426
427class HTTPRedirectHandler(BaseHandler):
428 # Implementation note: To avoid the server sending us into an
429 # infinite loop, the request object needs to track what URLs we
430 # have already seen. Do this by adding a handler-specific
431 # attribute to the Request object.
432 def http_error_302(self, req, fp, code, msg, headers):
433 if headers.has_key('location'):
434 newurl = headers['location']
435 elif headers.has_key('uri'):
436 newurl = headers['uri']
437 else:
438 return
439 nil = fp.read()
440 fp.close()
441
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000442 newurl = urlparse.urljoin(req.get_full_url(), newurl)
443
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000444 # XXX Probably want to forget about the state of the current
445 # request, although that might interact poorly with other
446 # handlers that also use handler-specific request attributes
447 new = Request(newurl, req.get_data())
448 new.error_302_dict = {}
449 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000450 if len(req.error_302_dict)>10 or \
451 req.error_302_dict.has_key(newurl):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000452 raise HTTPError(req.get_full_url(), code,
453 self.inf_msg + msg, headers)
454 new.error_302_dict.update(req.error_302_dict)
455 new.error_302_dict[newurl] = newurl
456 return self.parent.open(new)
457
458 http_error_301 = http_error_302
459
460 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000461 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000462 "The last 302 error message was:\n"
463
464class ProxyHandler(BaseHandler):
465 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000466 if proxies is None:
467 proxies = getproxies()
468 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
469 self.proxies = proxies
470 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000471 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000472 lambda r, proxy=url, type=type, meth=self.proxy_open: \
473 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000474
475 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000476 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000477 type, r_type = splittype(proxy)
478 host, XXX = splithost(r_type)
479 if '@' in host:
480 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000481 user_pass = base64.encodestring(unquote(user_pass)).strip()
482 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000483 host = unquote(host)
484 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000485 if orig_type == type:
486 # let other handlers take care of it
487 # XXX this only makes sense if the proxy is before the
488 # other handlers
489 return None
490 else:
491 # need to start over, because the other handlers don't
492 # grok the proxy's URL type
493 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000494
495# feature suggested by Duncan Booth
496# XXX custom is not a good name
497class CustomProxy:
498 # either pass a function to the constructor or override handle
499 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000500 self.proto = proto
501 self.func = func
502 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000503
504 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000505 if self.func and self.func(req):
506 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000507
508 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000509 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000510
511class CustomProxyHandler(BaseHandler):
512 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000513 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000514
515 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000516 proto = req.get_type()
517 try:
518 proxies = self.proxies[proto]
519 except KeyError:
520 return None
521 for p in proxies:
522 if p.handle(req):
523 req.set_proxy(p.get_proxy())
524 return self.parent.open(req)
525 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000526
527 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000528 p
529 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000530
531 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000532 if self.proxies.has_key(cpo.proto):
533 self.proxies[cpo.proto].append(cpo)
534 else:
535 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000536
537class HTTPPasswordMgr:
538 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000539 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000540
541 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000542 # uri could be a single URI or a sequence
543 if type(uri) == types.StringType:
544 uri = [uri]
545 uri = tuple(map(self.reduce_uri, uri))
546 if not self.passwd.has_key(realm):
547 self.passwd[realm] = {}
548 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000549
550 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000551 domains = self.passwd.get(realm, {})
552 authuri = self.reduce_uri(authuri)
553 for uris, authinfo in domains.items():
554 for uri in uris:
555 if self.is_suburi(uri, authuri):
556 return authinfo
557 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000558
559 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000560 """Accept netloc or URI and extract only the netloc and path"""
561 parts = urlparse.urlparse(uri)
562 if parts[1]:
563 return parts[1], parts[2] or '/'
564 else:
565 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000566
567 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000568 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000569
Fred Drake13a2c272000-02-10 17:17:14 +0000570 Both args must be URIs in reduced form.
571 """
572 if base == test:
573 return 1
574 if base[0] != test[0]:
575 return 0
Moshe Zadka8a18e992001-03-01 08:40:42 +0000576 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000577 if len(common) == len(base[1]):
578 return 1
579 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000580
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000581
Moshe Zadka8a18e992001-03-01 08:40:42 +0000582class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
583
584 def find_user_password(self, realm, authuri):
585 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
586 if user is not None:
587 return user, password
588 return HTTPPasswordMgr.find_user_password(self, None, authuri)
589
590
591class AbstractBasicAuthHandler:
592
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000593 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
594
595 # XXX there can actually be multiple auth-schemes in a
596 # www-authenticate header. should probably be a lot more careful
597 # in parsing them to extract multiple alternatives
598
Moshe Zadka8a18e992001-03-01 08:40:42 +0000599 def __init__(self, password_mgr=None):
600 if password_mgr is None:
601 password_mgr = HTTPPasswordMgr()
602 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000603 self.add_password = self.passwd.add_password
604 self.__current_realm = None
605 # if __current_realm is not None, then the server must have
606 # refused our name/password and is asking for authorization
607 # again. must be careful to set it to None on successful
Tim Peterse1190062001-01-15 03:34:38 +0000608 # return.
609
Moshe Zadka8a18e992001-03-01 08:40:42 +0000610 def http_error_auth_reqed(self, authreq, host, req, headers):
611 # XXX could be multiple headers
612 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000613 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000614 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000615 if mo:
616 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000617 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000618 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000619
Moshe Zadka8a18e992001-03-01 08:40:42 +0000620 def retry_http_basic_auth(self, host, req, realm):
Fred Drake13a2c272000-02-10 17:17:14 +0000621 if self.__current_realm is None:
622 self.__current_realm = realm
623 else:
624 self.__current_realm = realm
625 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000626 user,pw = self.passwd.find_user_password(realm, host)
627 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000628 raw = "%s:%s" % (user, pw)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000629 auth = base64.encodestring(raw).strip()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000630 req.add_header(self.header, 'Basic %s' % auth)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000631 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000632 self.__current_realm = None
633 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000634 else:
Fred Drake13a2c272000-02-10 17:17:14 +0000635 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636 return None
637
Moshe Zadka8a18e992001-03-01 08:40:42 +0000638class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000639
Moshe Zadka8a18e992001-03-01 08:40:42 +0000640 header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000641
Moshe Zadka8a18e992001-03-01 08:40:42 +0000642 def http_error_401(self, req, fp, code, msg, headers):
643 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000644 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000645 host, req, headers)
646
647
648class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
649
650 header = 'Proxy-Authorization'
651
652 def http_error_407(self, req, fp, code, msg, headers):
653 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000654 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000655 host, req, headers)
656
657
658class AbstractDigestAuthHandler:
659
660 def __init__(self, passwd=None):
661 if passwd is None:
662 passwd = HTTPPassowrdMgr()
663 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000664 self.add_password = self.passwd.add_password
665 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000666
Moshe Zadka8a18e992001-03-01 08:40:42 +0000667 def http_error_auth_reqed(self, authreq, host, req, headers):
668 authreq = headers.get(self.header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000669 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000670 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000671 if kind == 'Digest':
672 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000673
674 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000675 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000676 chal = parse_keqv_list(parse_http_list(challenge))
677 auth = self.get_authorization(req, chal)
678 if auth:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000679 req.add_header(self.header, 'Digest %s' % auth)
Fred Drake13a2c272000-02-10 17:17:14 +0000680 resp = self.parent.open(req)
681 self.__current_realm = None
682 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000683
684 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000685 try:
686 realm = chal['realm']
687 nonce = chal['nonce']
688 algorithm = chal.get('algorithm', 'MD5')
689 # mod_digest doesn't send an opaque, even though it isn't
690 # supposed to be optional
691 opaque = chal.get('opaque', None)
692 except KeyError:
693 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000694
Fred Drake13a2c272000-02-10 17:17:14 +0000695 if self.__current_realm is None:
696 self.__current_realm = realm
697 else:
698 self.__current_realm = realm
699 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000700
Fred Drake13a2c272000-02-10 17:17:14 +0000701 H, KD = self.get_algorithm_impls(algorithm)
702 if H is None:
703 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000704
Fred Drake13a2c272000-02-10 17:17:14 +0000705 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000706 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000707 if user is None:
708 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000709
Fred Drake13a2c272000-02-10 17:17:14 +0000710 # XXX not implemented yet
711 if req.has_data():
712 entdig = self.get_entity_digest(req.get_data(), chal)
713 else:
714 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000715
Fred Drake13a2c272000-02-10 17:17:14 +0000716 A1 = "%s:%s:%s" % (user, realm, pw)
717 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
718 # XXX selector: what about proxies and full urls
719 req.get_selector())
720 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
721 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000722
Fred Drake13a2c272000-02-10 17:17:14 +0000723 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
724 'response="%s"' % (user, realm, nonce, req.get_selector(),
725 respdig)
726 if opaque:
727 base = base + ', opaque="%s"' % opaque
728 if entdig:
729 base = base + ', digest="%s"' % entdig
730 if algorithm != 'MD5':
731 base = base + ', algorithm="%s"' % algorithm
732 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000733
734 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000735 # lambdas assume digest modules are imported at the top level
736 if algorithm == 'MD5':
737 H = lambda x, e=encode_digest:e(md5.new(x).digest())
738 elif algorithm == 'SHA':
739 H = lambda x, e=encode_digest:e(sha.new(x).digest())
740 # XXX MD5-sess
741 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
742 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000743
744 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000745 # XXX not implemented yet
746 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000747
Moshe Zadka8a18e992001-03-01 08:40:42 +0000748
749class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
750 """An authentication protocol defined by RFC 2069
751
752 Digest authentication improves on basic authentication because it
753 does not transmit passwords in the clear.
754 """
755
756 header = 'Authorization'
757
758 def http_error_401(self, req, fp, code, msg, headers):
759 host = urlparse.urlparse(req.get_full_url())[1]
760 self.http_error_auth_reqed('www-authenticate', host, req, headers)
761
762
763class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
764
765 header = 'Proxy-Authorization'
766
767 def http_error_407(self, req, fp, code, msg, headers):
768 host = req.get_host()
769 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
770
771
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000772def encode_digest(digest):
773 hexrep = []
774 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000775 n = (ord(c) >> 4) & 0xf
776 hexrep.append(hex(n)[-1])
777 n = ord(c) & 0xf
778 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000779 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000780
781
Moshe Zadka8a18e992001-03-01 08:40:42 +0000782class AbstractHTTPHandler(BaseHandler):
783
784 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000785 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000786 if not host:
787 raise URLError('no host given')
788
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000789 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000790 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000791 if req.has_data():
792 data = req.get_data()
793 h.putrequest('POST', req.get_selector())
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000794 if not req.headers.has_key('Content-type'):
795 h.putheader('Content-type',
796 'application/x-www-form-urlencoded')
797 if not req.headers.has_key('Content-length'):
798 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000799 else:
800 h.putrequest('GET', req.get_selector())
801 except socket.error, err:
802 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000803
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000804 h.putheader('Host', host)
805 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000806 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000807 for k, v in req.headers.items():
808 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000809 h.endheaders()
810 if req.has_data():
811 h.send(data + '\r\n')
812
813 code, msg, hdrs = h.getreply()
814 fp = h.getfile()
815 if code == 200:
816 return addinfourl(fp, hdrs, req.get_full_url())
817 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000818 return self.parent.error('http', req, fp, code, msg, hdrs)
819
Moshe Zadka8a18e992001-03-01 08:40:42 +0000820
821class HTTPHandler(AbstractHTTPHandler):
822
823 def http_open(self, req):
824 return self.do_open(httplib.HTTP, req)
825
826
827if hasattr(httplib, 'HTTPS'):
828 class HTTPSHandler(AbstractHTTPHandler):
829
830 def https_open(self, req):
831 return self.do_open(httplib.HTTPS, req)
832
833
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000834class UnknownHandler(BaseHandler):
835 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000836 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000837 raise URLError('unknown url type: %s' % type)
838
839def parse_keqv_list(l):
840 """Parse list of key=value strings where keys are not duplicated."""
841 parsed = {}
842 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000843 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000844 if v[0] == '"' and v[-1] == '"':
845 v = v[1:-1]
846 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000847 return parsed
848
849def parse_http_list(s):
850 """Parse lists as described by RFC 2068 Section 2.
851
852 In particular, parse comman-separated lists where the elements of
853 the list may include quoted-strings. A quoted-string could
854 contain a comma.
855 """
856 # XXX this function could probably use more testing
857
858 list = []
859 end = len(s)
860 i = 0
861 inquote = 0
862 start = 0
863 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000864 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000865 c = cur.find(',')
866 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000867 if c == -1:
868 list.append(s[start:])
869 break
870 if q == -1:
871 if inquote:
872 raise ValueError, "unbalanced quotes"
873 else:
874 list.append(s[start:i+c])
875 i = i + c + 1
876 continue
877 if inquote:
878 if q < c:
879 list.append(s[start:i+c])
880 i = i + c + 1
881 start = i
882 inquote = 0
883 else:
Tim Peterse1190062001-01-15 03:34:38 +0000884 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000885 else:
886 if c < q:
887 list.append(s[start:i+c])
888 i = i + c + 1
889 start = i
890 else:
891 inquote = 1
892 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000893 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000894
895class FileHandler(BaseHandler):
896 # Use local file or FTP depending on form of URL
897 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000898 url = req.get_selector()
899 if url[:2] == '//' and url[2:3] != '/':
900 req.type = 'ftp'
901 return self.parent.open(req)
902 else:
903 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000904
905 # names for the localhost
906 names = None
907 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000908 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000909 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000910 socket.gethostbyname(socket.gethostname()))
911 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000912
913 # not entirely sure what the rules are here
914 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000915 mtype = mimetypes.guess_type(req.get_selector())[0]
916 headers = mimetools.Message(StringIO('Content-Type: %s\n' \
917 % (mtype or 'text/plain')))
918 host = req.get_host()
919 file = req.get_selector()
920 if host:
921 host, port = splitport(host)
922 if not host or \
923 (not port and socket.gethostbyname(host) in self.get_names()):
924 return addinfourl(open(url2pathname(file), 'rb'),
925 headers, 'file:'+file)
926 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000927
928class FTPHandler(BaseHandler):
929 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000930 host = req.get_host()
931 if not host:
932 raise IOError, ('ftp error', 'no host given')
933 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000934 try:
935 host = socket.gethostbyname(host)
936 except socket.error, msg:
937 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000938 host, port = splitport(host)
939 if port is None:
940 port = ftplib.FTP_PORT
941 path, attrs = splitattr(req.get_selector())
942 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000943 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000944 dirs, file = dirs[:-1], dirs[-1]
945 if dirs and not dirs[0]:
946 dirs = dirs[1:]
947 user = passwd = '' # XXX
948 try:
949 fw = self.connect_ftp(user, passwd, host, port, dirs)
950 type = file and 'I' or 'D'
951 for attr in attrs:
952 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000953 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000954 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000955 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000956 fp, retrlen = fw.retrfile(file, type)
957 if retrlen is not None and retrlen >= 0:
958 sf = StringIO('Content-Length: %d\n' % retrlen)
959 headers = mimetools.Message(sf)
960 else:
961 headers = noheaders()
962 return addinfourl(fp, headers, req.get_full_url())
963 except ftplib.all_errors, msg:
964 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000965
966 def connect_ftp(self, user, passwd, host, port, dirs):
967 fw = ftpwrapper(user, passwd, host, port, dirs)
968## fw.ftp.set_debuglevel(1)
969 return fw
970
971class CacheFTPHandler(FTPHandler):
972 # XXX would be nice to have pluggable cache strategies
973 # XXX this stuff is definitely not thread safe
974 def __init__(self):
975 self.cache = {}
976 self.timeout = {}
977 self.soonest = 0
978 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000979 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000980
981 def setTimeout(self, t):
982 self.delay = t
983
984 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000985 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000986
987 def connect_ftp(self, user, passwd, host, port, dirs):
988 key = user, passwd, host, port
989 if self.cache.has_key(key):
990 self.timeout[key] = time.time() + self.delay
991 else:
992 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
993 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000994 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000995 return self.cache[key]
996
997 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000998 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000999 t = time.time()
1000 if self.soonest <= t:
1001 for k, v in self.timeout.items():
1002 if v < t:
1003 self.cache[k].close()
1004 del self.cache[k]
1005 del self.timeout[k]
1006 self.soonest = min(self.timeout.values())
1007
1008 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001009 if len(self.cache) == self.max_conns:
1010 for k, v in self.timeout.items():
1011 if v == self.soonest:
1012 del self.cache[k]
1013 del self.timeout[k]
1014 break
1015 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001016
1017class GopherHandler(BaseHandler):
1018 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001019 host = req.get_host()
1020 if not host:
1021 raise GopherError('no host given')
1022 host = unquote(host)
1023 selector = req.get_selector()
1024 type, selector = splitgophertype(selector)
1025 selector, query = splitquery(selector)
1026 selector = unquote(selector)
1027 if query:
1028 query = unquote(query)
1029 fp = gopherlib.send_query(selector, query, host)
1030 else:
1031 fp = gopherlib.send_selector(selector, host)
1032 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001033
1034#bleck! don't use this yet
1035class OpenerFactory:
1036
1037 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001038 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001039 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001040 proxy_handlers = [ProxyHandler]
1041 handlers = []
1042 replacement_handlers = []
1043
1044 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001045 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001046
1047 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001048 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001049
1050 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001051 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001052
1053 def build_opener(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001054 opener = OpenerDirectory()
1055 for ph in self.proxy_handlers:
1056 if type(ph) == types.ClassType:
1057 ph = ph()
1058 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001059
1060if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001061 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001062 # are internal to CNRI. Need to set up a public server with the
1063 # right authentication configuration for test purposes.
1064 if socket.gethostname() == 'bitdiddle':
1065 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001066 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001067 localhost = 'localhost'
1068 else:
1069 localhost = None
1070 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001071 # Thanks to Fred for finding these!
1072 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1073 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001074
Fred Drake13a2c272000-02-10 17:17:14 +00001075 'file:/etc/passwd',
1076 'file://nonsensename/etc/passwd',
1077 'ftp://www.python.org/pub/tmp/httplib.py',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001078 'ftp://www.python.org/pub/tmp/imageop.c',
1079 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001080 'http://www.espn.com/', # redirect
1081 'http://www.python.org/Spanish/Inquistion/',
1082 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1083 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1084 'http://www.python.org/',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001085 'ftp://prep.ai.mit.edu/welcome.msg',
1086 'ftp://www.python.org/pub/tmp/figure.prn',
1087 'ftp://www.python.org/pub/tmp/interp.pl',
Fred Drake13a2c272000-02-10 17:17:14 +00001088 'http://checkproxy.cnri.reston.va.us/test/test.html',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001089 ]
1090
1091 if localhost is not None:
1092 urls = urls + [
1093 'file://%s/etc/passwd' % localhost,
1094 'http://%s/simple/' % localhost,
1095 'http://%s/digest/' % localhost,
1096 'http://%s/not/found.h' % localhost,
1097 ]
1098
1099 bauth = HTTPBasicAuthHandler()
1100 bauth.add_password('basic_test_realm', localhost, 'jhylton',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001101 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001102 dauth = HTTPDigestAuthHandler()
1103 dauth.add_password('digest_test_realm', localhost, 'jhylton',
1104 'password')
1105
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001106
1107 cfh = CacheFTPHandler()
1108 cfh.setTimeout(1)
1109
1110 # XXX try out some custom proxy objects too!
1111 def at_cnri(req):
Fred Drake13a2c272000-02-10 17:17:14 +00001112 host = req.get_host()
1113 print host
1114 if host[-18:] == '.cnri.reston.va.us':
1115 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001116 p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1117 ph = CustomProxyHandler(p)
1118
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001119 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001120
1121 for url in urls:
1122 if type(url) == types.TupleType:
1123 url, req = url
1124 else:
1125 req = None
1126 print url
1127 try:
1128 f = urlopen(url, req)
1129 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001130 print "IOError:", err
1131 except socket.error, err:
1132 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001133 else:
1134 buf = f.read()
1135 f.close()
1136 print "read %d bytes" % len(buf)
1137 print
1138 time.sleep(0.1)