blob: 992c83b5a1d99e42b27b5a5d7f4d104e0f207aad [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
Tim Peterse1190062001-01-15 03:34:38 +000078 # that hash algo that requested in the challenge, it would be good to
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000079 # pass that information along to the client, too.
80
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
91import UserDict
92import httplib
93import re
94import base64
95import types
96import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000097import md5
98import mimetypes
99import mimetools
100import ftplib
101import sys
102import time
103import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000104import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000105
106try:
107 from cStringIO import StringIO
108except ImportError:
109 from StringIO import StringIO
110
111try:
112 import sha
113except ImportError:
114 # need 1.5.2 final
115 sha = None
116
117# not sure how many of these need to be gotten rid of
118from urllib import unwrap, unquote, splittype, splithost, \
119 addinfourl, splitport, splitgophertype, splitquery, \
120 splitattr, ftpwrapper, noheaders
121
122# support for proxies via environment variables
123from urllib import getproxies
124
125# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000126from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000127
128__version__ = "2.0a1"
129
130_opener = None
131def urlopen(url, data=None):
132 global _opener
133 if _opener is None:
134 _opener = build_opener()
135 return _opener.open(url, data)
136
137def install_opener(opener):
138 global _opener
139 _opener = opener
140
141# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000142# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000143 # subtypes.
144
145class URLError(IOError):
146 # URLError is a sub-type of IOError, but it doesn't share any of
147 # the implementation. need to override __init__ and __str__
148 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000149 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000150
151 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000152 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000153
154class HTTPError(URLError, addinfourl):
155 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000156 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000157
158 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000159 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000160 self.code = code
161 self.msg = msg
162 self.hdrs = hdrs
163 self.fp = fp
164 # XXX
165 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000166
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000167 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000168 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169
170 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000171 # XXX is this safe? what if user catches exception, then
172 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000173 if self.fp:
174 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000175
176class GopherError(URLError):
177 pass
178
Moshe Zadka8a18e992001-03-01 08:40:42 +0000179
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000180class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000181
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000182 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000183 # unwrap('<URL:type://host/path>') --> 'type://host/path'
184 self.__original = unwrap(url)
185 self.type = None
186 # self.__r_type is what's left after doing the splittype
187 self.host = None
188 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000189 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000190 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191 self.headers.update(headers)
192
193 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000194 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000195 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000196 # too complicated and/or unnecessary.
197 # XXX should the __r_XXX attributes be public?
198 if attr[:12] == '_Request__r_':
199 name = attr[12:]
200 if hasattr(Request, 'get_' + name):
201 getattr(self, 'get_' + name)()
202 return getattr(self, attr)
203 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000204
205 def add_data(self, data):
206 self.data = data
207
208 def has_data(self):
209 return self.data is not None
210
211 def get_data(self):
212 return self.data
213
214 def get_full_url(self):
215 return self.__original
216
217 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000218 if self.type is None:
219 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000220 if self.type is None:
221 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000222 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000223
224 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000225 if self.host is None:
226 self.host, self.__r_host = splithost(self.__r_type)
227 if self.host:
228 self.host = unquote(self.host)
229 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000230
231 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000232 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000233
Moshe Zadka8a18e992001-03-01 08:40:42 +0000234 def set_proxy(self, host, type):
235 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000236 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000237
238 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000239 # useful for something like authentication
240 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000241
242class OpenerDirector:
243 def __init__(self):
244 server_version = "Python-urllib/%s" % __version__
245 self.addheaders = [('User-agent', server_version)]
246 # manage the individual handlers
247 self.handlers = []
248 self.handle_open = {}
249 self.handle_error = {}
250
251 def add_handler(self, handler):
252 added = 0
253 for meth in get_methods(handler):
254 if meth[-5:] == '_open':
255 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000256 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000257 self.handle_open[protocol].append(handler)
258 else:
259 self.handle_open[protocol] = [handler]
260 added = 1
261 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000262 i = meth.find('_')
263 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000264 if j != -1 and meth[i+1:j] == 'error':
265 proto = meth[:i]
266 kind = meth[j+1:]
267 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000268 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000269 except ValueError:
270 pass
271 dict = self.handle_error.get(proto, {})
272 if dict.has_key(kind):
273 dict[kind].append(handler)
274 else:
275 dict[kind] = [handler]
276 self.handle_error[proto] = dict
277 added = 1
278 continue
279 if added:
280 self.handlers.append(handler)
281 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000282
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000283 def __del__(self):
284 self.close()
285
286 def close(self):
287 for handler in self.handlers:
288 handler.close()
289 self.handlers = []
290
291 def _call_chain(self, chain, kind, meth_name, *args):
292 # XXX raise an exception if no one else should try to handle
293 # this url. return None if you can't but someone else could.
294 handlers = chain.get(kind, ())
295 for handler in handlers:
296 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000297
298 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000299 if result is not None:
300 return result
301
302 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000303 # accept a URL or a Request object
304 if type(fullurl) == types.StringType:
305 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000306 else:
307 req = fullurl
308 if data is not None:
309 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000310 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000311
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000312 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000313 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000314 if result:
315 return result
316
Fred Drake13a2c272000-02-10 17:17:14 +0000317 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000318 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000319 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000320 if result:
321 return result
322
323 return self._call_chain(self.handle_open, 'unknown',
324 'unknown_open', req)
325
326 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000327 if proto in ['http', 'https']:
328 # XXX http[s] protocols are special cased
329 dict = self.handle_error['http'] # https is not different then http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000330 proto = args[2] # YUCK!
331 meth_name = 'http_error_%d' % proto
332 http_err = 1
333 orig_args = args
334 else:
335 dict = self.handle_error
336 meth_name = proto + '_error'
337 http_err = 0
338 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000339 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000340 if result:
341 return result
342
343 if http_err:
344 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000345 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000346
347def is_callable(obj):
348 # not quite like builtin callable (which I didn't know existed),
349 # not entirely sure it needs to be different
350 if type(obj) in (types.BuiltinFunctionType,
Fred Drake13a2c272000-02-10 17:17:14 +0000351 types.BuiltinMethodType, types.LambdaType,
352 types.MethodType):
353 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000354 if type(obj) == types.InstanceType:
Fred Drake13a2c272000-02-10 17:17:14 +0000355 return hasattr(obj, '__call__')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000356 return 0
357
358def get_methods(inst):
359 methods = {}
360 classes = []
361 classes.append(inst.__class__)
362 while classes:
363 klass = classes[0]
364 del classes[0]
365 classes = classes + list(klass.__bases__)
366 for name in dir(klass):
367 attr = getattr(klass, name)
368 if type(attr) == types.UnboundMethodType:
369 methods[name] = 1
370 for name in dir(inst):
Fred Drake13a2c272000-02-10 17:17:14 +0000371 if is_callable(getattr(inst, name)):
372 methods[name] = 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000373 return methods.keys()
374
375# XXX probably also want an abstract factory that knows things like
376 # the fact that a ProxyHandler needs to get inserted first.
377# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000378 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000379
380def build_opener(*handlers):
381 """Create an opener object from a list of handlers.
382
383 The opener will use several default handlers, including support
384 for HTTP and FTP. If there is a ProxyHandler, it must be at the
385 front of the list of handlers. (Yuck.)
386
387 If any of the handlers passed as arguments are subclasses of the
388 default handlers, the default handlers will not be used.
389 """
Tim Peterse1190062001-01-15 03:34:38 +0000390
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000391 opener = OpenerDirector()
392 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
393 HTTPDefaultErrorHandler, HTTPRedirectHandler,
394 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000395 if hasattr(httplib, 'HTTPS'):
396 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000397 skip = []
398 for klass in default_classes:
399 for check in handlers:
400 if type(check) == types.ClassType:
401 if issubclass(check, klass):
402 skip.append(klass)
403 elif type(check) == types.InstanceType:
404 if isinstance(check, klass):
405 skip.append(klass)
406 for klass in skip:
407 default_classes.remove(klass)
408
409 for klass in default_classes:
410 opener.add_handler(klass())
411
412 for h in handlers:
413 if type(h) == types.ClassType:
414 h = h()
415 opener.add_handler(h)
416 return opener
417
418class BaseHandler:
419 def add_parent(self, parent):
420 self.parent = parent
421 def close(self):
422 self.parent = None
423
424class HTTPDefaultErrorHandler(BaseHandler):
425 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000426 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000427
428class HTTPRedirectHandler(BaseHandler):
429 # Implementation note: To avoid the server sending us into an
430 # infinite loop, the request object needs to track what URLs we
431 # have already seen. Do this by adding a handler-specific
432 # attribute to the Request object.
433 def http_error_302(self, req, fp, code, msg, headers):
434 if headers.has_key('location'):
435 newurl = headers['location']
436 elif headers.has_key('uri'):
437 newurl = headers['uri']
438 else:
439 return
440 nil = fp.read()
441 fp.close()
442
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000443 newurl = urlparse.urljoin(req.get_full_url(), newurl)
444
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000445 # XXX Probably want to forget about the state of the current
446 # request, although that might interact poorly with other
447 # handlers that also use handler-specific request attributes
448 new = Request(newurl, req.get_data())
449 new.error_302_dict = {}
450 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000451 if len(req.error_302_dict)>10 or \
452 req.error_302_dict.has_key(newurl):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000453 raise HTTPError(req.get_full_url(), code,
454 self.inf_msg + msg, headers)
455 new.error_302_dict.update(req.error_302_dict)
456 new.error_302_dict[newurl] = newurl
457 return self.parent.open(new)
458
459 http_error_301 = http_error_302
460
461 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000462 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000463 "The last 302 error message was:\n"
464
465class ProxyHandler(BaseHandler):
466 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000467 if proxies is None:
468 proxies = getproxies()
469 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
470 self.proxies = proxies
471 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000472 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000473 lambda r, proxy=url, type=type, meth=self.proxy_open: \
474 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000475
476 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000477 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000478 type, r_type = splittype(proxy)
479 host, XXX = splithost(r_type)
480 if '@' in host:
481 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000482 user_pass = base64.encodestring(unquote(user_pass)).strip()
483 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000484 host = unquote(host)
485 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000486 if orig_type == type:
487 # let other handlers take care of it
488 # XXX this only makes sense if the proxy is before the
489 # other handlers
490 return None
491 else:
492 # need to start over, because the other handlers don't
493 # grok the proxy's URL type
494 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000495
496# feature suggested by Duncan Booth
497# XXX custom is not a good name
498class CustomProxy:
499 # either pass a function to the constructor or override handle
500 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000501 self.proto = proto
502 self.func = func
503 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000504
505 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000506 if self.func and self.func(req):
507 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000508
509 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000510 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000511
512class CustomProxyHandler(BaseHandler):
513 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000514 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000515
516 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000517 proto = req.get_type()
518 try:
519 proxies = self.proxies[proto]
520 except KeyError:
521 return None
522 for p in proxies:
523 if p.handle(req):
524 req.set_proxy(p.get_proxy())
525 return self.parent.open(req)
526 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000527
528 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000529 p
530 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000531
532 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000533 if self.proxies.has_key(cpo.proto):
534 self.proxies[cpo.proto].append(cpo)
535 else:
536 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000537
538class HTTPPasswordMgr:
539 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000540 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000541
542 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000543 # uri could be a single URI or a sequence
544 if type(uri) == types.StringType:
545 uri = [uri]
546 uri = tuple(map(self.reduce_uri, uri))
547 if not self.passwd.has_key(realm):
548 self.passwd[realm] = {}
549 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000550
551 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000552 domains = self.passwd.get(realm, {})
553 authuri = self.reduce_uri(authuri)
554 for uris, authinfo in domains.items():
555 for uri in uris:
556 if self.is_suburi(uri, authuri):
557 return authinfo
558 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000559
560 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000561 """Accept netloc or URI and extract only the netloc and path"""
562 parts = urlparse.urlparse(uri)
563 if parts[1]:
564 return parts[1], parts[2] or '/'
565 else:
566 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000567
568 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000569 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000570
Fred Drake13a2c272000-02-10 17:17:14 +0000571 Both args must be URIs in reduced form.
572 """
573 if base == test:
574 return 1
575 if base[0] != test[0]:
576 return 0
Moshe Zadka8a18e992001-03-01 08:40:42 +0000577 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000578 if len(common) == len(base[1]):
579 return 1
580 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000581
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000582
Moshe Zadka8a18e992001-03-01 08:40:42 +0000583class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
584
585 def find_user_password(self, realm, authuri):
586 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
587 if user is not None:
588 return user, password
589 return HTTPPasswordMgr.find_user_password(self, None, authuri)
590
591
592class AbstractBasicAuthHandler:
593
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000594 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
595
596 # XXX there can actually be multiple auth-schemes in a
597 # www-authenticate header. should probably be a lot more careful
598 # in parsing them to extract multiple alternatives
599
Moshe Zadka8a18e992001-03-01 08:40:42 +0000600 def __init__(self, password_mgr=None):
601 if password_mgr is None:
602 password_mgr = HTTPPasswordMgr()
603 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000604 self.add_password = self.passwd.add_password
605 self.__current_realm = None
606 # if __current_realm is not None, then the server must have
607 # refused our name/password and is asking for authorization
608 # again. must be careful to set it to None on successful
Tim Peterse1190062001-01-15 03:34:38 +0000609 # return.
610
Moshe Zadka8a18e992001-03-01 08:40:42 +0000611 def http_error_auth_reqed(self, authreq, host, req, headers):
612 # XXX could be multiple headers
613 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000614 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000615 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000616 if mo:
617 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000618 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000619 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000620
Moshe Zadka8a18e992001-03-01 08:40:42 +0000621 def retry_http_basic_auth(self, host, req, realm):
Fred Drake13a2c272000-02-10 17:17:14 +0000622 if self.__current_realm is None:
623 self.__current_realm = realm
624 else:
625 self.__current_realm = realm
626 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000627 user,pw = self.passwd.find_user_password(realm, host)
628 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000629 raw = "%s:%s" % (user, pw)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000630 auth = base64.encodestring(raw).strip()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000631 req.add_header(self.header, 'Basic %s' % auth)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000632 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000633 self.__current_realm = None
634 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000635 else:
Fred Drake13a2c272000-02-10 17:17:14 +0000636 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000637 return None
638
Moshe Zadka8a18e992001-03-01 08:40:42 +0000639class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000640
Moshe Zadka8a18e992001-03-01 08:40:42 +0000641 header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000642
Moshe Zadka8a18e992001-03-01 08:40:42 +0000643 def http_error_401(self, req, fp, code, msg, headers):
644 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000645 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000646 host, req, headers)
647
648
649class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
650
651 header = 'Proxy-Authorization'
652
653 def http_error_407(self, req, fp, code, msg, headers):
654 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000655 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000656 host, req, headers)
657
658
659class AbstractDigestAuthHandler:
660
661 def __init__(self, passwd=None):
662 if passwd is None:
663 passwd = HTTPPassowrdMgr()
664 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000665 self.add_password = self.passwd.add_password
666 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000667
Moshe Zadka8a18e992001-03-01 08:40:42 +0000668 def http_error_auth_reqed(self, authreq, host, req, headers):
669 authreq = headers.get(self.header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000670 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000671 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000672 if kind == 'Digest':
673 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000674
675 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000676 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000677 chal = parse_keqv_list(parse_http_list(challenge))
678 auth = self.get_authorization(req, chal)
679 if auth:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000680 req.add_header(self.header, 'Digest %s' % auth)
Fred Drake13a2c272000-02-10 17:17:14 +0000681 resp = self.parent.open(req)
682 self.__current_realm = None
683 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000684
685 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000686 try:
687 realm = chal['realm']
688 nonce = chal['nonce']
689 algorithm = chal.get('algorithm', 'MD5')
690 # mod_digest doesn't send an opaque, even though it isn't
691 # supposed to be optional
692 opaque = chal.get('opaque', None)
693 except KeyError:
694 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000695
Fred Drake13a2c272000-02-10 17:17:14 +0000696 if self.__current_realm is None:
697 self.__current_realm = realm
698 else:
699 self.__current_realm = realm
700 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000701
Fred Drake13a2c272000-02-10 17:17:14 +0000702 H, KD = self.get_algorithm_impls(algorithm)
703 if H is None:
704 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000705
Fred Drake13a2c272000-02-10 17:17:14 +0000706 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000707 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000708 if user is None:
709 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000710
Fred Drake13a2c272000-02-10 17:17:14 +0000711 # XXX not implemented yet
712 if req.has_data():
713 entdig = self.get_entity_digest(req.get_data(), chal)
714 else:
715 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000716
Fred Drake13a2c272000-02-10 17:17:14 +0000717 A1 = "%s:%s:%s" % (user, realm, pw)
718 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
719 # XXX selector: what about proxies and full urls
720 req.get_selector())
721 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
722 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000723
Fred Drake13a2c272000-02-10 17:17:14 +0000724 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
725 'response="%s"' % (user, realm, nonce, req.get_selector(),
726 respdig)
727 if opaque:
728 base = base + ', opaque="%s"' % opaque
729 if entdig:
730 base = base + ', digest="%s"' % entdig
731 if algorithm != 'MD5':
732 base = base + ', algorithm="%s"' % algorithm
733 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000734
735 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000736 # lambdas assume digest modules are imported at the top level
737 if algorithm == 'MD5':
738 H = lambda x, e=encode_digest:e(md5.new(x).digest())
739 elif algorithm == 'SHA':
740 H = lambda x, e=encode_digest:e(sha.new(x).digest())
741 # XXX MD5-sess
742 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
743 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000744
745 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000746 # XXX not implemented yet
747 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000748
Moshe Zadka8a18e992001-03-01 08:40:42 +0000749
750class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
751 """An authentication protocol defined by RFC 2069
752
753 Digest authentication improves on basic authentication because it
754 does not transmit passwords in the clear.
755 """
756
757 header = 'Authorization'
758
759 def http_error_401(self, req, fp, code, msg, headers):
760 host = urlparse.urlparse(req.get_full_url())[1]
761 self.http_error_auth_reqed('www-authenticate', host, req, headers)
762
763
764class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
765
766 header = 'Proxy-Authorization'
767
768 def http_error_407(self, req, fp, code, msg, headers):
769 host = req.get_host()
770 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
771
772
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000773def encode_digest(digest):
774 hexrep = []
775 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000776 n = (ord(c) >> 4) & 0xf
777 hexrep.append(hex(n)[-1])
778 n = ord(c) & 0xf
779 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000780 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000781
782
Moshe Zadka8a18e992001-03-01 08:40:42 +0000783class AbstractHTTPHandler(BaseHandler):
784
785 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000786 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000787 if not host:
788 raise URLError('no host given')
789
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000790 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000791 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000792 if req.has_data():
793 data = req.get_data()
794 h.putrequest('POST', req.get_selector())
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000795 if not req.headers.has_key('Content-type'):
796 h.putheader('Content-type',
797 'application/x-www-form-urlencoded')
798 if not req.headers.has_key('Content-length'):
799 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000800 else:
801 h.putrequest('GET', req.get_selector())
802 except socket.error, err:
803 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000804
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000805 h.putheader('Host', host)
806 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000807 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000808 for k, v in req.headers.items():
809 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000810 h.endheaders()
811 if req.has_data():
812 h.send(data + '\r\n')
813
814 code, msg, hdrs = h.getreply()
815 fp = h.getfile()
816 if code == 200:
817 return addinfourl(fp, hdrs, req.get_full_url())
818 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000819 return self.parent.error('http', req, fp, code, msg, hdrs)
820
Moshe Zadka8a18e992001-03-01 08:40:42 +0000821
822class HTTPHandler(AbstractHTTPHandler):
823
824 def http_open(self, req):
825 return self.do_open(httplib.HTTP, req)
826
827
828if hasattr(httplib, 'HTTPS'):
829 class HTTPSHandler(AbstractHTTPHandler):
830
831 def https_open(self, req):
832 return self.do_open(httplib.HTTPS, req)
833
834
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000835class UnknownHandler(BaseHandler):
836 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000837 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000838 raise URLError('unknown url type: %s' % type)
839
840def parse_keqv_list(l):
841 """Parse list of key=value strings where keys are not duplicated."""
842 parsed = {}
843 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000844 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000845 if v[0] == '"' and v[-1] == '"':
846 v = v[1:-1]
847 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000848 return parsed
849
850def parse_http_list(s):
851 """Parse lists as described by RFC 2068 Section 2.
852
853 In particular, parse comman-separated lists where the elements of
854 the list may include quoted-strings. A quoted-string could
855 contain a comma.
856 """
857 # XXX this function could probably use more testing
858
859 list = []
860 end = len(s)
861 i = 0
862 inquote = 0
863 start = 0
864 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000865 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000866 c = cur.find(',')
867 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000868 if c == -1:
869 list.append(s[start:])
870 break
871 if q == -1:
872 if inquote:
873 raise ValueError, "unbalanced quotes"
874 else:
875 list.append(s[start:i+c])
876 i = i + c + 1
877 continue
878 if inquote:
879 if q < c:
880 list.append(s[start:i+c])
881 i = i + c + 1
882 start = i
883 inquote = 0
884 else:
Tim Peterse1190062001-01-15 03:34:38 +0000885 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000886 else:
887 if c < q:
888 list.append(s[start:i+c])
889 i = i + c + 1
890 start = i
891 else:
892 inquote = 1
893 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000894 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000895
896class FileHandler(BaseHandler):
897 # Use local file or FTP depending on form of URL
898 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000899 url = req.get_selector()
900 if url[:2] == '//' and url[2:3] != '/':
901 req.type = 'ftp'
902 return self.parent.open(req)
903 else:
904 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000905
906 # names for the localhost
907 names = None
908 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000909 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000910 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000911 socket.gethostbyname(socket.gethostname()))
912 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000913
914 # not entirely sure what the rules are here
915 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000916 mtype = mimetypes.guess_type(req.get_selector())[0]
917 headers = mimetools.Message(StringIO('Content-Type: %s\n' \
918 % (mtype or 'text/plain')))
919 host = req.get_host()
920 file = req.get_selector()
921 if host:
922 host, port = splitport(host)
923 if not host or \
924 (not port and socket.gethostbyname(host) in self.get_names()):
925 return addinfourl(open(url2pathname(file), 'rb'),
926 headers, 'file:'+file)
927 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000928
929class FTPHandler(BaseHandler):
930 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000931 host = req.get_host()
932 if not host:
933 raise IOError, ('ftp error', 'no host given')
934 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000935 try:
936 host = socket.gethostbyname(host)
937 except socket.error, msg:
938 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000939 host, port = splitport(host)
940 if port is None:
941 port = ftplib.FTP_PORT
942 path, attrs = splitattr(req.get_selector())
943 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000944 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000945 dirs, file = dirs[:-1], dirs[-1]
946 if dirs and not dirs[0]:
947 dirs = dirs[1:]
948 user = passwd = '' # XXX
949 try:
950 fw = self.connect_ftp(user, passwd, host, port, dirs)
951 type = file and 'I' or 'D'
952 for attr in attrs:
953 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000954 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000955 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000956 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000957 fp, retrlen = fw.retrfile(file, type)
958 if retrlen is not None and retrlen >= 0:
959 sf = StringIO('Content-Length: %d\n' % retrlen)
960 headers = mimetools.Message(sf)
961 else:
962 headers = noheaders()
963 return addinfourl(fp, headers, req.get_full_url())
964 except ftplib.all_errors, msg:
965 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000966
967 def connect_ftp(self, user, passwd, host, port, dirs):
968 fw = ftpwrapper(user, passwd, host, port, dirs)
969## fw.ftp.set_debuglevel(1)
970 return fw
971
972class CacheFTPHandler(FTPHandler):
973 # XXX would be nice to have pluggable cache strategies
974 # XXX this stuff is definitely not thread safe
975 def __init__(self):
976 self.cache = {}
977 self.timeout = {}
978 self.soonest = 0
979 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000980 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000981
982 def setTimeout(self, t):
983 self.delay = t
984
985 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000986 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000987
988 def connect_ftp(self, user, passwd, host, port, dirs):
989 key = user, passwd, host, port
990 if self.cache.has_key(key):
991 self.timeout[key] = time.time() + self.delay
992 else:
993 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
994 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000995 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000996 return self.cache[key]
997
998 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000999 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001000 t = time.time()
1001 if self.soonest <= t:
1002 for k, v in self.timeout.items():
1003 if v < t:
1004 self.cache[k].close()
1005 del self.cache[k]
1006 del self.timeout[k]
1007 self.soonest = min(self.timeout.values())
1008
1009 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001010 if len(self.cache) == self.max_conns:
1011 for k, v in self.timeout.items():
1012 if v == self.soonest:
1013 del self.cache[k]
1014 del self.timeout[k]
1015 break
1016 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001017
1018class GopherHandler(BaseHandler):
1019 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001020 host = req.get_host()
1021 if not host:
1022 raise GopherError('no host given')
1023 host = unquote(host)
1024 selector = req.get_selector()
1025 type, selector = splitgophertype(selector)
1026 selector, query = splitquery(selector)
1027 selector = unquote(selector)
1028 if query:
1029 query = unquote(query)
1030 fp = gopherlib.send_query(selector, query, host)
1031 else:
1032 fp = gopherlib.send_selector(selector, host)
1033 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001034
1035#bleck! don't use this yet
1036class OpenerFactory:
1037
1038 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001039 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001040 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001041 proxy_handlers = [ProxyHandler]
1042 handlers = []
1043 replacement_handlers = []
1044
1045 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001046 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001047
1048 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001049 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001050
1051 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001052 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001053
1054 def build_opener(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001055 opener = OpenerDirectory()
1056 for ph in self.proxy_handlers:
1057 if type(ph) == types.ClassType:
1058 ph = ph()
1059 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001060
1061if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001062 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001063 # are internal to CNRI. Need to set up a public server with the
1064 # right authentication configuration for test purposes.
1065 if socket.gethostname() == 'bitdiddle':
1066 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001067 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001068 localhost = 'localhost'
1069 else:
1070 localhost = None
1071 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001072 # Thanks to Fred for finding these!
1073 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1074 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001075
Fred Drake13a2c272000-02-10 17:17:14 +00001076 'file:/etc/passwd',
1077 'file://nonsensename/etc/passwd',
1078 'ftp://www.python.org/pub/tmp/httplib.py',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001079 'ftp://www.python.org/pub/tmp/imageop.c',
1080 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001081 'http://www.espn.com/', # redirect
1082 'http://www.python.org/Spanish/Inquistion/',
1083 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1084 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1085 'http://www.python.org/',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001086 'ftp://prep.ai.mit.edu/welcome.msg',
1087 'ftp://www.python.org/pub/tmp/figure.prn',
1088 'ftp://www.python.org/pub/tmp/interp.pl',
Fred Drake13a2c272000-02-10 17:17:14 +00001089 'http://checkproxy.cnri.reston.va.us/test/test.html',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001090 ]
1091
1092 if localhost is not None:
1093 urls = urls + [
1094 'file://%s/etc/passwd' % localhost,
1095 'http://%s/simple/' % localhost,
1096 'http://%s/digest/' % localhost,
1097 'http://%s/not/found.h' % localhost,
1098 ]
1099
1100 bauth = HTTPBasicAuthHandler()
1101 bauth.add_password('basic_test_realm', localhost, 'jhylton',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001102 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001103 dauth = HTTPDigestAuthHandler()
1104 dauth.add_password('digest_test_realm', localhost, 'jhylton',
1105 'password')
1106
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001107
1108 cfh = CacheFTPHandler()
1109 cfh.setTimeout(1)
1110
1111 # XXX try out some custom proxy objects too!
1112 def at_cnri(req):
Fred Drake13a2c272000-02-10 17:17:14 +00001113 host = req.get_host()
1114 print host
1115 if host[-18:] == '.cnri.reston.va.us':
1116 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001117 p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1118 ph = CustomProxyHandler(p)
1119
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001120 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001121
1122 for url in urls:
1123 if type(url) == types.TupleType:
1124 url, req = url
1125 else:
1126 req = None
1127 print url
1128 try:
1129 f = urlopen(url, req)
1130 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001131 print "IOError:", err
1132 except socket.error, err:
1133 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001134 else:
1135 buf = f.read()
1136 f.close()
1137 print "read %d bytes" % len(buf)
1138 print
1139 time.sleep(0.1)