blob: f9e6bf254f4332a39697105800f2eebc7b0a52f9 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
Tim Peterse1190062001-01-15 03:34:38 +000078 # that hash algo that requested in the challenge, it would be good to
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000079 # pass that information along to the client, too.
80
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import httplib
92import re
93import base64
94import types
95import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000096import md5
97import mimetypes
98import mimetools
99import ftplib
100import sys
101import time
102import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000103import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104
105try:
106 from cStringIO import StringIO
107except ImportError:
108 from StringIO import StringIO
109
110try:
111 import sha
112except ImportError:
113 # need 1.5.2 final
114 sha = None
115
116# not sure how many of these need to be gotten rid of
117from urllib import unwrap, unquote, splittype, splithost, \
118 addinfourl, splitport, splitgophertype, splitquery, \
119 splitattr, ftpwrapper, noheaders
120
121# support for proxies via environment variables
122from urllib import getproxies
123
124# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000125from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000126
127__version__ = "2.0a1"
128
129_opener = None
130def urlopen(url, data=None):
131 global _opener
132 if _opener is None:
133 _opener = build_opener()
134 return _opener.open(url, data)
135
136def install_opener(opener):
137 global _opener
138 _opener = opener
139
140# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000141# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000142 # subtypes.
143
144class URLError(IOError):
145 # URLError is a sub-type of IOError, but it doesn't share any of
146 # the implementation. need to override __init__ and __str__
147 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000148 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000149
150 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000151 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153class HTTPError(URLError, addinfourl):
154 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000155 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000156
157 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000158 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000159 self.code = code
160 self.msg = msg
161 self.hdrs = hdrs
162 self.fp = fp
163 # XXX
164 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000165
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000166 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000167 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000168
169 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000170 # XXX is this safe? what if user catches exception, then
171 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000172 if self.fp:
173 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000174
175class GopherError(URLError):
176 pass
177
Moshe Zadka8a18e992001-03-01 08:40:42 +0000178
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000179class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000180
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000181 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000182 # unwrap('<URL:type://host/path>') --> 'type://host/path'
183 self.__original = unwrap(url)
184 self.type = None
185 # self.__r_type is what's left after doing the splittype
186 self.host = None
187 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000188 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000189 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000190 self.headers.update(headers)
191
192 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000193 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000194 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000195 # too complicated and/or unnecessary.
196 # XXX should the __r_XXX attributes be public?
197 if attr[:12] == '_Request__r_':
198 name = attr[12:]
199 if hasattr(Request, 'get_' + name):
200 getattr(self, 'get_' + name)()
201 return getattr(self, attr)
202 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000203
204 def add_data(self, data):
205 self.data = data
206
207 def has_data(self):
208 return self.data is not None
209
210 def get_data(self):
211 return self.data
212
213 def get_full_url(self):
214 return self.__original
215
216 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000217 if self.type is None:
218 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000219 if self.type is None:
220 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000221 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000222
223 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000224 if self.host is None:
225 self.host, self.__r_host = splithost(self.__r_type)
226 if self.host:
227 self.host = unquote(self.host)
228 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000229
230 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000231 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000232
Moshe Zadka8a18e992001-03-01 08:40:42 +0000233 def set_proxy(self, host, type):
234 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000235 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000236
237 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000238 # useful for something like authentication
239 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000240
241class OpenerDirector:
242 def __init__(self):
243 server_version = "Python-urllib/%s" % __version__
244 self.addheaders = [('User-agent', server_version)]
245 # manage the individual handlers
246 self.handlers = []
247 self.handle_open = {}
248 self.handle_error = {}
249
250 def add_handler(self, handler):
251 added = 0
252 for meth in get_methods(handler):
253 if meth[-5:] == '_open':
254 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000255 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000256 self.handle_open[protocol].append(handler)
257 else:
258 self.handle_open[protocol] = [handler]
259 added = 1
260 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000261 i = meth.find('_')
262 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000263 if j != -1 and meth[i+1:j] == 'error':
264 proto = meth[:i]
265 kind = meth[j+1:]
266 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000267 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000268 except ValueError:
269 pass
270 dict = self.handle_error.get(proto, {})
271 if dict.has_key(kind):
272 dict[kind].append(handler)
273 else:
274 dict[kind] = [handler]
275 self.handle_error[proto] = dict
276 added = 1
277 continue
278 if added:
279 self.handlers.append(handler)
280 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000281
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000282 def __del__(self):
283 self.close()
284
285 def close(self):
286 for handler in self.handlers:
287 handler.close()
288 self.handlers = []
289
290 def _call_chain(self, chain, kind, meth_name, *args):
291 # XXX raise an exception if no one else should try to handle
292 # this url. return None if you can't but someone else could.
293 handlers = chain.get(kind, ())
294 for handler in handlers:
295 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000296
297 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000298 if result is not None:
299 return result
300
301 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000302 # accept a URL or a Request object
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000303 if isinstance(fullurl, types.StringType):
Fred Drake13a2c272000-02-10 17:17:14 +0000304 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000305 else:
306 req = fullurl
307 if data is not None:
308 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000309 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000310
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000311 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000312 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000313 if result:
314 return result
315
Fred Drake13a2c272000-02-10 17:17:14 +0000316 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000317 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000318 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 if result:
320 return result
321
322 return self._call_chain(self.handle_open, 'unknown',
323 'unknown_open', req)
324
325 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000326 if proto in ['http', 'https']:
327 # XXX http[s] protocols are special cased
328 dict = self.handle_error['http'] # https is not different then http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000329 proto = args[2] # YUCK!
330 meth_name = 'http_error_%d' % proto
331 http_err = 1
332 orig_args = args
333 else:
334 dict = self.handle_error
335 meth_name = proto + '_error'
336 http_err = 0
337 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000338 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000339 if result:
340 return result
341
342 if http_err:
343 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000344 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345
346def is_callable(obj):
347 # not quite like builtin callable (which I didn't know existed),
348 # not entirely sure it needs to be different
349 if type(obj) in (types.BuiltinFunctionType,
Fred Drake13a2c272000-02-10 17:17:14 +0000350 types.BuiltinMethodType, types.LambdaType,
351 types.MethodType):
352 return 1
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000353 if isinstance(obj, types.InstanceType):
Fred Drake13a2c272000-02-10 17:17:14 +0000354 return hasattr(obj, '__call__')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000355 return 0
356
357def get_methods(inst):
358 methods = {}
359 classes = []
360 classes.append(inst.__class__)
361 while classes:
362 klass = classes[0]
363 del classes[0]
364 classes = classes + list(klass.__bases__)
365 for name in dir(klass):
366 attr = getattr(klass, name)
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000367 if isinstance(attr, types.UnboundMethodType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000368 methods[name] = 1
369 for name in dir(inst):
Fred Drake13a2c272000-02-10 17:17:14 +0000370 if is_callable(getattr(inst, name)):
371 methods[name] = 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000372 return methods.keys()
373
374# XXX probably also want an abstract factory that knows things like
375 # the fact that a ProxyHandler needs to get inserted first.
376# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000377 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000378
379def build_opener(*handlers):
380 """Create an opener object from a list of handlers.
381
382 The opener will use several default handlers, including support
383 for HTTP and FTP. If there is a ProxyHandler, it must be at the
384 front of the list of handlers. (Yuck.)
385
386 If any of the handlers passed as arguments are subclasses of the
387 default handlers, the default handlers will not be used.
388 """
Tim Peterse1190062001-01-15 03:34:38 +0000389
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000390 opener = OpenerDirector()
391 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
392 HTTPDefaultErrorHandler, HTTPRedirectHandler,
393 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000394 if hasattr(httplib, 'HTTPS'):
395 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000396 skip = []
397 for klass in default_classes:
398 for check in handlers:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000399 if isinstance(check, types.ClassType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000400 if issubclass(check, klass):
401 skip.append(klass)
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000402 elif isinstance(check, types.InstanceType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000403 if isinstance(check, klass):
404 skip.append(klass)
405 for klass in skip:
406 default_classes.remove(klass)
407
408 for klass in default_classes:
409 opener.add_handler(klass())
410
411 for h in handlers:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000412 if isinstance(h, types.ClassType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000413 h = h()
414 opener.add_handler(h)
415 return opener
416
417class BaseHandler:
418 def add_parent(self, parent):
419 self.parent = parent
420 def close(self):
421 self.parent = None
422
423class HTTPDefaultErrorHandler(BaseHandler):
424 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000425 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000426
427class HTTPRedirectHandler(BaseHandler):
428 # Implementation note: To avoid the server sending us into an
429 # infinite loop, the request object needs to track what URLs we
430 # have already seen. Do this by adding a handler-specific
431 # attribute to the Request object.
432 def http_error_302(self, req, fp, code, msg, headers):
433 if headers.has_key('location'):
434 newurl = headers['location']
435 elif headers.has_key('uri'):
436 newurl = headers['uri']
437 else:
438 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000439 newurl = urlparse.urljoin(req.get_full_url(), newurl)
440
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000441 # XXX Probably want to forget about the state of the current
442 # request, although that might interact poorly with other
443 # handlers that also use handler-specific request attributes
444 new = Request(newurl, req.get_data())
445 new.error_302_dict = {}
446 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000447 if len(req.error_302_dict)>10 or \
448 req.error_302_dict.has_key(newurl):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000449 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000450 self.inf_msg + msg, headers, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000451 new.error_302_dict.update(req.error_302_dict)
452 new.error_302_dict[newurl] = newurl
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000453
454 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000455 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000456 fp.read()
457 fp.close()
458
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000459 return self.parent.open(new)
460
461 http_error_301 = http_error_302
462
463 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000464 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000465 "The last 302 error message was:\n"
466
467class ProxyHandler(BaseHandler):
468 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000469 if proxies is None:
470 proxies = getproxies()
471 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
472 self.proxies = proxies
473 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000474 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000475 lambda r, proxy=url, type=type, meth=self.proxy_open: \
476 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000477
478 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000479 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000480 type, r_type = splittype(proxy)
481 host, XXX = splithost(r_type)
482 if '@' in host:
483 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000484 user_pass = base64.encodestring(unquote(user_pass)).strip()
485 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000486 host = unquote(host)
487 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000488 if orig_type == type:
489 # let other handlers take care of it
490 # XXX this only makes sense if the proxy is before the
491 # other handlers
492 return None
493 else:
494 # need to start over, because the other handlers don't
495 # grok the proxy's URL type
496 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000497
498# feature suggested by Duncan Booth
499# XXX custom is not a good name
500class CustomProxy:
501 # either pass a function to the constructor or override handle
502 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000503 self.proto = proto
504 self.func = func
505 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000506
507 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000508 if self.func and self.func(req):
509 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000510
511 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000512 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000513
514class CustomProxyHandler(BaseHandler):
515 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000516 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000517
518 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000519 proto = req.get_type()
520 try:
521 proxies = self.proxies[proto]
522 except KeyError:
523 return None
524 for p in proxies:
525 if p.handle(req):
526 req.set_proxy(p.get_proxy())
527 return self.parent.open(req)
528 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000529
530 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000531 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000532
533 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000534 if self.proxies.has_key(cpo.proto):
535 self.proxies[cpo.proto].append(cpo)
536 else:
537 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000538
539class HTTPPasswordMgr:
540 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000541 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000542
543 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000544 # uri could be a single URI or a sequence
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000545 if isinstance(uri, types.StringType):
Fred Drake13a2c272000-02-10 17:17:14 +0000546 uri = [uri]
547 uri = tuple(map(self.reduce_uri, uri))
548 if not self.passwd.has_key(realm):
549 self.passwd[realm] = {}
550 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000551
552 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000553 domains = self.passwd.get(realm, {})
554 authuri = self.reduce_uri(authuri)
555 for uris, authinfo in domains.items():
556 for uri in uris:
557 if self.is_suburi(uri, authuri):
558 return authinfo
559 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000560
561 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000562 """Accept netloc or URI and extract only the netloc and path"""
563 parts = urlparse.urlparse(uri)
564 if parts[1]:
565 return parts[1], parts[2] or '/'
566 else:
567 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000568
569 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000570 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000571
Fred Drake13a2c272000-02-10 17:17:14 +0000572 Both args must be URIs in reduced form.
573 """
574 if base == test:
575 return 1
576 if base[0] != test[0]:
577 return 0
Moshe Zadka8a18e992001-03-01 08:40:42 +0000578 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000579 if len(common) == len(base[1]):
580 return 1
581 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000582
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000583
Moshe Zadka8a18e992001-03-01 08:40:42 +0000584class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
585
586 def find_user_password(self, realm, authuri):
587 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
588 if user is not None:
589 return user, password
590 return HTTPPasswordMgr.find_user_password(self, None, authuri)
591
592
593class AbstractBasicAuthHandler:
594
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000595 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
596
597 # XXX there can actually be multiple auth-schemes in a
598 # www-authenticate header. should probably be a lot more careful
599 # in parsing them to extract multiple alternatives
600
Moshe Zadka8a18e992001-03-01 08:40:42 +0000601 def __init__(self, password_mgr=None):
602 if password_mgr is None:
603 password_mgr = HTTPPasswordMgr()
604 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000605 self.add_password = self.passwd.add_password
606 self.__current_realm = None
607 # if __current_realm is not None, then the server must have
608 # refused our name/password and is asking for authorization
609 # again. must be careful to set it to None on successful
Tim Peterse1190062001-01-15 03:34:38 +0000610 # return.
611
Moshe Zadka8a18e992001-03-01 08:40:42 +0000612 def http_error_auth_reqed(self, authreq, host, req, headers):
613 # XXX could be multiple headers
614 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000615 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000616 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000617 if mo:
618 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000619 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000620 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000621
Moshe Zadka8a18e992001-03-01 08:40:42 +0000622 def retry_http_basic_auth(self, host, req, realm):
Fred Drake13a2c272000-02-10 17:17:14 +0000623 if self.__current_realm is None:
624 self.__current_realm = realm
625 else:
626 self.__current_realm = realm
627 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000628 user,pw = self.passwd.find_user_password(realm, host)
629 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000630 raw = "%s:%s" % (user, pw)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000631 auth = base64.encodestring(raw).strip()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000632 req.add_header(self.header, 'Basic %s' % auth)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000633 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000634 self.__current_realm = None
635 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636 else:
Fred Drake13a2c272000-02-10 17:17:14 +0000637 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000638 return None
639
Moshe Zadka8a18e992001-03-01 08:40:42 +0000640class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000641
Moshe Zadka8a18e992001-03-01 08:40:42 +0000642 header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000643
Moshe Zadka8a18e992001-03-01 08:40:42 +0000644 def http_error_401(self, req, fp, code, msg, headers):
645 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000646 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000647 host, req, headers)
648
649
650class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
651
652 header = 'Proxy-Authorization'
653
654 def http_error_407(self, req, fp, code, msg, headers):
655 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000656 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000657 host, req, headers)
658
659
660class AbstractDigestAuthHandler:
661
662 def __init__(self, passwd=None):
663 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000664 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000665 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000666 self.add_password = self.passwd.add_password
667 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000668
Moshe Zadka8a18e992001-03-01 08:40:42 +0000669 def http_error_auth_reqed(self, authreq, host, req, headers):
670 authreq = headers.get(self.header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000671 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000672 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000673 if kind == 'Digest':
674 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675
676 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000677 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000678 chal = parse_keqv_list(parse_http_list(challenge))
679 auth = self.get_authorization(req, chal)
680 if auth:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000681 req.add_header(self.header, 'Digest %s' % auth)
Fred Drake13a2c272000-02-10 17:17:14 +0000682 resp = self.parent.open(req)
683 self.__current_realm = None
684 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000685
686 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000687 try:
688 realm = chal['realm']
689 nonce = chal['nonce']
690 algorithm = chal.get('algorithm', 'MD5')
691 # mod_digest doesn't send an opaque, even though it isn't
692 # supposed to be optional
693 opaque = chal.get('opaque', None)
694 except KeyError:
695 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000696
Fred Drake13a2c272000-02-10 17:17:14 +0000697 if self.__current_realm is None:
698 self.__current_realm = realm
699 else:
700 self.__current_realm = realm
701 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000702
Fred Drake13a2c272000-02-10 17:17:14 +0000703 H, KD = self.get_algorithm_impls(algorithm)
704 if H is None:
705 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000706
Fred Drake13a2c272000-02-10 17:17:14 +0000707 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000708 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000709 if user is None:
710 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000711
Fred Drake13a2c272000-02-10 17:17:14 +0000712 # XXX not implemented yet
713 if req.has_data():
714 entdig = self.get_entity_digest(req.get_data(), chal)
715 else:
716 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000717
Fred Drake13a2c272000-02-10 17:17:14 +0000718 A1 = "%s:%s:%s" % (user, realm, pw)
719 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
720 # XXX selector: what about proxies and full urls
721 req.get_selector())
722 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
723 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000724
Fred Drake13a2c272000-02-10 17:17:14 +0000725 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
726 'response="%s"' % (user, realm, nonce, req.get_selector(),
727 respdig)
728 if opaque:
729 base = base + ', opaque="%s"' % opaque
730 if entdig:
731 base = base + ', digest="%s"' % entdig
732 if algorithm != 'MD5':
733 base = base + ', algorithm="%s"' % algorithm
734 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000735
736 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000737 # lambdas assume digest modules are imported at the top level
738 if algorithm == 'MD5':
739 H = lambda x, e=encode_digest:e(md5.new(x).digest())
740 elif algorithm == 'SHA':
741 H = lambda x, e=encode_digest:e(sha.new(x).digest())
742 # XXX MD5-sess
743 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
744 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000745
746 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000747 # XXX not implemented yet
748 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000749
Moshe Zadka8a18e992001-03-01 08:40:42 +0000750
751class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
752 """An authentication protocol defined by RFC 2069
753
754 Digest authentication improves on basic authentication because it
755 does not transmit passwords in the clear.
756 """
757
758 header = 'Authorization'
759
760 def http_error_401(self, req, fp, code, msg, headers):
761 host = urlparse.urlparse(req.get_full_url())[1]
762 self.http_error_auth_reqed('www-authenticate', host, req, headers)
763
764
765class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
766
767 header = 'Proxy-Authorization'
768
769 def http_error_407(self, req, fp, code, msg, headers):
770 host = req.get_host()
771 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
772
773
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000774def encode_digest(digest):
775 hexrep = []
776 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000777 n = (ord(c) >> 4) & 0xf
778 hexrep.append(hex(n)[-1])
779 n = ord(c) & 0xf
780 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000781 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000782
783
Moshe Zadka8a18e992001-03-01 08:40:42 +0000784class AbstractHTTPHandler(BaseHandler):
785
786 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000787 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000788 if not host:
789 raise URLError('no host given')
790
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000791 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000792 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000793 if req.has_data():
794 data = req.get_data()
795 h.putrequest('POST', req.get_selector())
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000796 if not req.headers.has_key('Content-type'):
797 h.putheader('Content-type',
798 'application/x-www-form-urlencoded')
799 if not req.headers.has_key('Content-length'):
800 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000801 else:
802 h.putrequest('GET', req.get_selector())
803 except socket.error, err:
804 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000805
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000806 h.putheader('Host', host)
807 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000808 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000809 for k, v in req.headers.items():
810 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000811 h.endheaders()
812 if req.has_data():
Fred Drakeec3dfde2001-07-04 05:18:29 +0000813 h.send(data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000814
815 code, msg, hdrs = h.getreply()
816 fp = h.getfile()
817 if code == 200:
818 return addinfourl(fp, hdrs, req.get_full_url())
819 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000820 return self.parent.error('http', req, fp, code, msg, hdrs)
821
Moshe Zadka8a18e992001-03-01 08:40:42 +0000822
823class HTTPHandler(AbstractHTTPHandler):
824
825 def http_open(self, req):
826 return self.do_open(httplib.HTTP, req)
827
828
829if hasattr(httplib, 'HTTPS'):
830 class HTTPSHandler(AbstractHTTPHandler):
831
832 def https_open(self, req):
833 return self.do_open(httplib.HTTPS, req)
834
835
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000836class UnknownHandler(BaseHandler):
837 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000838 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000839 raise URLError('unknown url type: %s' % type)
840
841def parse_keqv_list(l):
842 """Parse list of key=value strings where keys are not duplicated."""
843 parsed = {}
844 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000845 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000846 if v[0] == '"' and v[-1] == '"':
847 v = v[1:-1]
848 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000849 return parsed
850
851def parse_http_list(s):
852 """Parse lists as described by RFC 2068 Section 2.
853
854 In particular, parse comman-separated lists where the elements of
855 the list may include quoted-strings. A quoted-string could
856 contain a comma.
857 """
858 # XXX this function could probably use more testing
859
860 list = []
861 end = len(s)
862 i = 0
863 inquote = 0
864 start = 0
865 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000866 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000867 c = cur.find(',')
868 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000869 if c == -1:
870 list.append(s[start:])
871 break
872 if q == -1:
873 if inquote:
874 raise ValueError, "unbalanced quotes"
875 else:
876 list.append(s[start:i+c])
877 i = i + c + 1
878 continue
879 if inquote:
880 if q < c:
881 list.append(s[start:i+c])
882 i = i + c + 1
883 start = i
884 inquote = 0
885 else:
Tim Peterse1190062001-01-15 03:34:38 +0000886 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000887 else:
888 if c < q:
889 list.append(s[start:i+c])
890 i = i + c + 1
891 start = i
892 else:
893 inquote = 1
894 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000895 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000896
897class FileHandler(BaseHandler):
898 # Use local file or FTP depending on form of URL
899 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000900 url = req.get_selector()
901 if url[:2] == '//' and url[2:3] != '/':
902 req.type = 'ftp'
903 return self.parent.open(req)
904 else:
905 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000906
907 # names for the localhost
908 names = None
909 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000910 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000911 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000912 socket.gethostbyname(socket.gethostname()))
913 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000914
915 # not entirely sure what the rules are here
916 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000917 mtype = mimetypes.guess_type(req.get_selector())[0]
918 headers = mimetools.Message(StringIO('Content-Type: %s\n' \
919 % (mtype or 'text/plain')))
920 host = req.get_host()
921 file = req.get_selector()
922 if host:
923 host, port = splitport(host)
924 if not host or \
925 (not port and socket.gethostbyname(host) in self.get_names()):
926 return addinfourl(open(url2pathname(file), 'rb'),
927 headers, 'file:'+file)
928 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000929
930class FTPHandler(BaseHandler):
931 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000932 host = req.get_host()
933 if not host:
934 raise IOError, ('ftp error', 'no host given')
935 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000936 try:
937 host = socket.gethostbyname(host)
938 except socket.error, msg:
939 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000940 host, port = splitport(host)
941 if port is None:
942 port = ftplib.FTP_PORT
943 path, attrs = splitattr(req.get_selector())
944 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000945 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000946 dirs, file = dirs[:-1], dirs[-1]
947 if dirs and not dirs[0]:
948 dirs = dirs[1:]
949 user = passwd = '' # XXX
950 try:
951 fw = self.connect_ftp(user, passwd, host, port, dirs)
952 type = file and 'I' or 'D'
953 for attr in attrs:
954 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000955 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000956 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000957 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000958 fp, retrlen = fw.retrfile(file, type)
959 if retrlen is not None and retrlen >= 0:
960 sf = StringIO('Content-Length: %d\n' % retrlen)
961 headers = mimetools.Message(sf)
962 else:
963 headers = noheaders()
964 return addinfourl(fp, headers, req.get_full_url())
965 except ftplib.all_errors, msg:
966 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000967
968 def connect_ftp(self, user, passwd, host, port, dirs):
969 fw = ftpwrapper(user, passwd, host, port, dirs)
970## fw.ftp.set_debuglevel(1)
971 return fw
972
973class CacheFTPHandler(FTPHandler):
974 # XXX would be nice to have pluggable cache strategies
975 # XXX this stuff is definitely not thread safe
976 def __init__(self):
977 self.cache = {}
978 self.timeout = {}
979 self.soonest = 0
980 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000981 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000982
983 def setTimeout(self, t):
984 self.delay = t
985
986 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000987 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000988
989 def connect_ftp(self, user, passwd, host, port, dirs):
990 key = user, passwd, host, port
991 if self.cache.has_key(key):
992 self.timeout[key] = time.time() + self.delay
993 else:
994 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
995 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000996 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000997 return self.cache[key]
998
999 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001000 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001001 t = time.time()
1002 if self.soonest <= t:
1003 for k, v in self.timeout.items():
1004 if v < t:
1005 self.cache[k].close()
1006 del self.cache[k]
1007 del self.timeout[k]
1008 self.soonest = min(self.timeout.values())
1009
1010 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001011 if len(self.cache) == self.max_conns:
1012 for k, v in self.timeout.items():
1013 if v == self.soonest:
1014 del self.cache[k]
1015 del self.timeout[k]
1016 break
1017 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001018
1019class GopherHandler(BaseHandler):
1020 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001021 host = req.get_host()
1022 if not host:
1023 raise GopherError('no host given')
1024 host = unquote(host)
1025 selector = req.get_selector()
1026 type, selector = splitgophertype(selector)
1027 selector, query = splitquery(selector)
1028 selector = unquote(selector)
1029 if query:
1030 query = unquote(query)
1031 fp = gopherlib.send_query(selector, query, host)
1032 else:
1033 fp = gopherlib.send_selector(selector, host)
1034 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001035
1036#bleck! don't use this yet
1037class OpenerFactory:
1038
1039 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001040 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001041 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001042 proxy_handlers = [ProxyHandler]
1043 handlers = []
1044 replacement_handlers = []
1045
1046 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001047 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001048
1049 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001050 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001051
1052 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001053 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001054
1055 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001056 opener = OpenerDirector()
Fred Drake13a2c272000-02-10 17:17:14 +00001057 for ph in self.proxy_handlers:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +00001058 if isinstance(ph, types.ClassType):
Fred Drake13a2c272000-02-10 17:17:14 +00001059 ph = ph()
1060 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001061
1062if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001063 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001064 # are internal to CNRI. Need to set up a public server with the
1065 # right authentication configuration for test purposes.
1066 if socket.gethostname() == 'bitdiddle':
1067 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001068 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001069 localhost = 'localhost'
1070 else:
1071 localhost = None
1072 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001073 # Thanks to Fred for finding these!
1074 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1075 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001076
Fred Drake13a2c272000-02-10 17:17:14 +00001077 'file:/etc/passwd',
1078 'file://nonsensename/etc/passwd',
1079 'ftp://www.python.org/pub/tmp/httplib.py',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001080 'ftp://www.python.org/pub/tmp/imageop.c',
1081 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001082 'http://www.espn.com/', # redirect
1083 'http://www.python.org/Spanish/Inquistion/',
1084 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1085 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1086 'http://www.python.org/',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001087 'ftp://prep.ai.mit.edu/welcome.msg',
1088 'ftp://www.python.org/pub/tmp/figure.prn',
1089 'ftp://www.python.org/pub/tmp/interp.pl',
Fred Drake13a2c272000-02-10 17:17:14 +00001090 'http://checkproxy.cnri.reston.va.us/test/test.html',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001091 ]
1092
1093 if localhost is not None:
1094 urls = urls + [
1095 'file://%s/etc/passwd' % localhost,
1096 'http://%s/simple/' % localhost,
1097 'http://%s/digest/' % localhost,
1098 'http://%s/not/found.h' % localhost,
1099 ]
1100
1101 bauth = HTTPBasicAuthHandler()
1102 bauth.add_password('basic_test_realm', localhost, 'jhylton',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001103 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001104 dauth = HTTPDigestAuthHandler()
1105 dauth.add_password('digest_test_realm', localhost, 'jhylton',
1106 'password')
1107
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001108
1109 cfh = CacheFTPHandler()
1110 cfh.setTimeout(1)
1111
1112 # XXX try out some custom proxy objects too!
1113 def at_cnri(req):
Fred Drake13a2c272000-02-10 17:17:14 +00001114 host = req.get_host()
1115 print host
1116 if host[-18:] == '.cnri.reston.va.us':
1117 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001118 p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1119 ph = CustomProxyHandler(p)
1120
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001121 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001122
1123 for url in urls:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +00001124 if isinstance(url, types.TupleType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001125 url, req = url
1126 else:
1127 req = None
1128 print url
1129 try:
1130 f = urlopen(url, req)
1131 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001132 print "IOError:", err
1133 except socket.error, err:
1134 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001135 else:
1136 buf = f.read()
1137 f.close()
1138 print "read %d bytes" % len(buf)
1139 print
1140 time.sleep(0.1)