blob: beb9fa5af074947b6694580f7de1a5f0d9be82d0 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
Tim Peterse1190062001-01-15 03:34:38 +000078 # that hash algo that requested in the challenge, it would be good to
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000079 # pass that information along to the client, too.
80
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import httplib
92import re
93import base64
94import types
95import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000096import md5
97import mimetypes
98import mimetools
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +000099import rfc822
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000100import ftplib
101import sys
102import time
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000103import os
104import stat
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000105import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000106import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000107
108try:
109 from cStringIO import StringIO
110except ImportError:
111 from StringIO import StringIO
112
113try:
114 import sha
115except ImportError:
116 # need 1.5.2 final
117 sha = None
118
119# not sure how many of these need to be gotten rid of
120from urllib import unwrap, unquote, splittype, splithost, \
121 addinfourl, splitport, splitgophertype, splitquery, \
122 splitattr, ftpwrapper, noheaders
123
124# support for proxies via environment variables
125from urllib import getproxies
126
127# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000128from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000129
130__version__ = "2.0a1"
131
132_opener = None
133def urlopen(url, data=None):
134 global _opener
135 if _opener is None:
136 _opener = build_opener()
137 return _opener.open(url, data)
138
139def install_opener(opener):
140 global _opener
141 _opener = opener
142
143# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000144# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000145 # subtypes.
146
147class URLError(IOError):
148 # URLError is a sub-type of IOError, but it doesn't share any of
149 # the implementation. need to override __init__ and __str__
150 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000151 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000154 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000155
156class HTTPError(URLError, addinfourl):
157 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000158 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000159
160 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000161 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000162 self.code = code
163 self.msg = msg
164 self.hdrs = hdrs
165 self.fp = fp
166 # XXX
167 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000168
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000170 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000171
172 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000173 # XXX is this safe? what if user catches exception, then
174 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000175 if self.fp:
176 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000177
178class GopherError(URLError):
179 pass
180
Moshe Zadka8a18e992001-03-01 08:40:42 +0000181
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000182class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000183
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000184 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000185 # unwrap('<URL:type://host/path>') --> 'type://host/path'
186 self.__original = unwrap(url)
187 self.type = None
188 # self.__r_type is what's left after doing the splittype
189 self.host = None
190 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000192 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000193 self.headers.update(headers)
194
195 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000196 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000197 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000198 # too complicated and/or unnecessary.
199 # XXX should the __r_XXX attributes be public?
200 if attr[:12] == '_Request__r_':
201 name = attr[12:]
202 if hasattr(Request, 'get_' + name):
203 getattr(self, 'get_' + name)()
204 return getattr(self, attr)
205 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000206
207 def add_data(self, data):
208 self.data = data
209
210 def has_data(self):
211 return self.data is not None
212
213 def get_data(self):
214 return self.data
215
216 def get_full_url(self):
217 return self.__original
218
219 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000220 if self.type is None:
221 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000222 if self.type is None:
223 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000224 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000225
226 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000227 if self.host is None:
228 self.host, self.__r_host = splithost(self.__r_type)
229 if self.host:
230 self.host = unquote(self.host)
231 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000232
233 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000234 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000235
Moshe Zadka8a18e992001-03-01 08:40:42 +0000236 def set_proxy(self, host, type):
237 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000238 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000239
240 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000241 # useful for something like authentication
242 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000243
244class OpenerDirector:
245 def __init__(self):
246 server_version = "Python-urllib/%s" % __version__
247 self.addheaders = [('User-agent', server_version)]
248 # manage the individual handlers
249 self.handlers = []
250 self.handle_open = {}
251 self.handle_error = {}
252
253 def add_handler(self, handler):
254 added = 0
255 for meth in get_methods(handler):
256 if meth[-5:] == '_open':
257 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000258 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000259 self.handle_open[protocol].append(handler)
260 else:
261 self.handle_open[protocol] = [handler]
262 added = 1
263 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000264 i = meth.find('_')
265 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000266 if j != -1 and meth[i+1:j] == 'error':
267 proto = meth[:i]
268 kind = meth[j+1:]
269 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000270 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000271 except ValueError:
272 pass
273 dict = self.handle_error.get(proto, {})
274 if dict.has_key(kind):
275 dict[kind].append(handler)
276 else:
277 dict[kind] = [handler]
278 self.handle_error[proto] = dict
279 added = 1
280 continue
281 if added:
282 self.handlers.append(handler)
283 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000284
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000285 def __del__(self):
286 self.close()
287
288 def close(self):
289 for handler in self.handlers:
290 handler.close()
291 self.handlers = []
292
293 def _call_chain(self, chain, kind, meth_name, *args):
294 # XXX raise an exception if no one else should try to handle
295 # this url. return None if you can't but someone else could.
296 handlers = chain.get(kind, ())
297 for handler in handlers:
298 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000299
300 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000301 if result is not None:
302 return result
303
304 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000305 # accept a URL or a Request object
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000306 if isinstance(fullurl, types.StringType):
Fred Drake13a2c272000-02-10 17:17:14 +0000307 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000308 else:
309 req = fullurl
310 if data is not None:
311 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000312 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000313
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000314 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000315 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000316 if result:
317 return result
318
Fred Drake13a2c272000-02-10 17:17:14 +0000319 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000320 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000321 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000322 if result:
323 return result
324
325 return self._call_chain(self.handle_open, 'unknown',
326 'unknown_open', req)
327
328 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000329 if proto in ['http', 'https']:
330 # XXX http[s] protocols are special cased
331 dict = self.handle_error['http'] # https is not different then http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000332 proto = args[2] # YUCK!
333 meth_name = 'http_error_%d' % proto
334 http_err = 1
335 orig_args = args
336 else:
337 dict = self.handle_error
338 meth_name = proto + '_error'
339 http_err = 0
340 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000341 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000342 if result:
343 return result
344
345 if http_err:
346 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000347 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000348
349def is_callable(obj):
350 # not quite like builtin callable (which I didn't know existed),
351 # not entirely sure it needs to be different
352 if type(obj) in (types.BuiltinFunctionType,
Fred Drake13a2c272000-02-10 17:17:14 +0000353 types.BuiltinMethodType, types.LambdaType,
354 types.MethodType):
355 return 1
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000356 if isinstance(obj, types.InstanceType):
Fred Drake13a2c272000-02-10 17:17:14 +0000357 return hasattr(obj, '__call__')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000358 return 0
359
360def get_methods(inst):
361 methods = {}
362 classes = []
363 classes.append(inst.__class__)
364 while classes:
365 klass = classes[0]
366 del classes[0]
367 classes = classes + list(klass.__bases__)
368 for name in dir(klass):
369 attr = getattr(klass, name)
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000370 if isinstance(attr, types.UnboundMethodType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 methods[name] = 1
372 for name in dir(inst):
Fred Drake13a2c272000-02-10 17:17:14 +0000373 if is_callable(getattr(inst, name)):
374 methods[name] = 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000375 return methods.keys()
376
377# XXX probably also want an abstract factory that knows things like
378 # the fact that a ProxyHandler needs to get inserted first.
379# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000380 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000381
382def build_opener(*handlers):
383 """Create an opener object from a list of handlers.
384
385 The opener will use several default handlers, including support
386 for HTTP and FTP. If there is a ProxyHandler, it must be at the
387 front of the list of handlers. (Yuck.)
388
389 If any of the handlers passed as arguments are subclasses of the
390 default handlers, the default handlers will not be used.
391 """
Tim Peterse1190062001-01-15 03:34:38 +0000392
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000393 opener = OpenerDirector()
394 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
395 HTTPDefaultErrorHandler, HTTPRedirectHandler,
396 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000397 if hasattr(httplib, 'HTTPS'):
398 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000399 skip = []
400 for klass in default_classes:
401 for check in handlers:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000402 if isinstance(check, types.ClassType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000403 if issubclass(check, klass):
404 skip.append(klass)
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000405 elif isinstance(check, types.InstanceType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000406 if isinstance(check, klass):
407 skip.append(klass)
408 for klass in skip:
409 default_classes.remove(klass)
410
411 for klass in default_classes:
412 opener.add_handler(klass())
413
414 for h in handlers:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000415 if isinstance(h, types.ClassType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000416 h = h()
417 opener.add_handler(h)
418 return opener
419
420class BaseHandler:
421 def add_parent(self, parent):
422 self.parent = parent
423 def close(self):
424 self.parent = None
425
426class HTTPDefaultErrorHandler(BaseHandler):
427 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000428 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000429
430class HTTPRedirectHandler(BaseHandler):
431 # Implementation note: To avoid the server sending us into an
432 # infinite loop, the request object needs to track what URLs we
433 # have already seen. Do this by adding a handler-specific
434 # attribute to the Request object.
435 def http_error_302(self, req, fp, code, msg, headers):
436 if headers.has_key('location'):
437 newurl = headers['location']
438 elif headers.has_key('uri'):
439 newurl = headers['uri']
440 else:
441 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000442 newurl = urlparse.urljoin(req.get_full_url(), newurl)
443
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000444 # XXX Probably want to forget about the state of the current
445 # request, although that might interact poorly with other
446 # handlers that also use handler-specific request attributes
447 new = Request(newurl, req.get_data())
448 new.error_302_dict = {}
449 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000450 if len(req.error_302_dict)>10 or \
451 req.error_302_dict.has_key(newurl):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000452 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000453 self.inf_msg + msg, headers, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000454 new.error_302_dict.update(req.error_302_dict)
455 new.error_302_dict[newurl] = newurl
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000456
457 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000458 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000459 fp.read()
460 fp.close()
461
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000462 return self.parent.open(new)
463
464 http_error_301 = http_error_302
465
466 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000467 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000468 "The last 302 error message was:\n"
469
470class ProxyHandler(BaseHandler):
471 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000472 if proxies is None:
473 proxies = getproxies()
474 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
475 self.proxies = proxies
476 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000477 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000478 lambda r, proxy=url, type=type, meth=self.proxy_open: \
479 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000480
481 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000482 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000483 type, r_type = splittype(proxy)
484 host, XXX = splithost(r_type)
485 if '@' in host:
486 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000487 user_pass = base64.encodestring(unquote(user_pass)).strip()
488 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000489 host = unquote(host)
490 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000491 if orig_type == type:
492 # let other handlers take care of it
493 # XXX this only makes sense if the proxy is before the
494 # other handlers
495 return None
496 else:
497 # need to start over, because the other handlers don't
498 # grok the proxy's URL type
499 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000500
501# feature suggested by Duncan Booth
502# XXX custom is not a good name
503class CustomProxy:
504 # either pass a function to the constructor or override handle
505 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000506 self.proto = proto
507 self.func = func
508 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000509
510 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000511 if self.func and self.func(req):
512 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000513
514 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000515 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000516
517class CustomProxyHandler(BaseHandler):
518 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000519 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000520
521 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000522 proto = req.get_type()
523 try:
524 proxies = self.proxies[proto]
525 except KeyError:
526 return None
527 for p in proxies:
528 if p.handle(req):
529 req.set_proxy(p.get_proxy())
530 return self.parent.open(req)
531 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000532
533 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000534 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000535
536 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000537 if self.proxies.has_key(cpo.proto):
538 self.proxies[cpo.proto].append(cpo)
539 else:
540 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000541
542class HTTPPasswordMgr:
543 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000544 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000545
546 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000547 # uri could be a single URI or a sequence
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +0000548 if isinstance(uri, types.StringType):
Fred Drake13a2c272000-02-10 17:17:14 +0000549 uri = [uri]
550 uri = tuple(map(self.reduce_uri, uri))
551 if not self.passwd.has_key(realm):
552 self.passwd[realm] = {}
553 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000554
555 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000556 domains = self.passwd.get(realm, {})
557 authuri = self.reduce_uri(authuri)
558 for uris, authinfo in domains.items():
559 for uri in uris:
560 if self.is_suburi(uri, authuri):
561 return authinfo
562 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000563
564 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000565 """Accept netloc or URI and extract only the netloc and path"""
566 parts = urlparse.urlparse(uri)
567 if parts[1]:
568 return parts[1], parts[2] or '/'
569 else:
570 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000571
572 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000573 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000574
Fred Drake13a2c272000-02-10 17:17:14 +0000575 Both args must be URIs in reduced form.
576 """
577 if base == test:
578 return 1
579 if base[0] != test[0]:
580 return 0
Moshe Zadka8a18e992001-03-01 08:40:42 +0000581 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000582 if len(common) == len(base[1]):
583 return 1
584 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000585
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000586
Moshe Zadka8a18e992001-03-01 08:40:42 +0000587class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
588
589 def find_user_password(self, realm, authuri):
590 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
591 if user is not None:
592 return user, password
593 return HTTPPasswordMgr.find_user_password(self, None, authuri)
594
595
596class AbstractBasicAuthHandler:
597
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000598 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
599
600 # XXX there can actually be multiple auth-schemes in a
601 # www-authenticate header. should probably be a lot more careful
602 # in parsing them to extract multiple alternatives
603
Moshe Zadka8a18e992001-03-01 08:40:42 +0000604 def __init__(self, password_mgr=None):
605 if password_mgr is None:
606 password_mgr = HTTPPasswordMgr()
607 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000608 self.add_password = self.passwd.add_password
609 self.__current_realm = None
610 # if __current_realm is not None, then the server must have
611 # refused our name/password and is asking for authorization
612 # again. must be careful to set it to None on successful
Tim Peterse1190062001-01-15 03:34:38 +0000613 # return.
614
Moshe Zadka8a18e992001-03-01 08:40:42 +0000615 def http_error_auth_reqed(self, authreq, host, req, headers):
616 # XXX could be multiple headers
617 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000618 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000619 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000620 if mo:
621 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000622 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000623 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000624
Moshe Zadka8a18e992001-03-01 08:40:42 +0000625 def retry_http_basic_auth(self, host, req, realm):
Fred Drake13a2c272000-02-10 17:17:14 +0000626 if self.__current_realm is None:
627 self.__current_realm = realm
628 else:
629 self.__current_realm = realm
630 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000631 user,pw = self.passwd.find_user_password(realm, host)
632 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000633 raw = "%s:%s" % (user, pw)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000634 auth = base64.encodestring(raw).strip()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000635 req.add_header(self.header, 'Basic %s' % auth)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000637 self.__current_realm = None
638 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000639 else:
Fred Drake13a2c272000-02-10 17:17:14 +0000640 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000641 return None
642
Moshe Zadka8a18e992001-03-01 08:40:42 +0000643class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000644
Moshe Zadka8a18e992001-03-01 08:40:42 +0000645 header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000646
Moshe Zadka8a18e992001-03-01 08:40:42 +0000647 def http_error_401(self, req, fp, code, msg, headers):
648 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000649 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000650 host, req, headers)
651
652
653class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
654
655 header = 'Proxy-Authorization'
656
657 def http_error_407(self, req, fp, code, msg, headers):
658 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000659 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000660 host, req, headers)
661
662
663class AbstractDigestAuthHandler:
664
665 def __init__(self, passwd=None):
666 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000667 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000668 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000669 self.add_password = self.passwd.add_password
670 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000671
Moshe Zadka8a18e992001-03-01 08:40:42 +0000672 def http_error_auth_reqed(self, authreq, host, req, headers):
673 authreq = headers.get(self.header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000674 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000675 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000676 if kind == 'Digest':
677 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000678
679 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000680 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000681 chal = parse_keqv_list(parse_http_list(challenge))
682 auth = self.get_authorization(req, chal)
683 if auth:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000684 req.add_header(self.header, 'Digest %s' % auth)
Fred Drake13a2c272000-02-10 17:17:14 +0000685 resp = self.parent.open(req)
686 self.__current_realm = None
687 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000688
689 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000690 try:
691 realm = chal['realm']
692 nonce = chal['nonce']
693 algorithm = chal.get('algorithm', 'MD5')
694 # mod_digest doesn't send an opaque, even though it isn't
695 # supposed to be optional
696 opaque = chal.get('opaque', None)
697 except KeyError:
698 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000699
Fred Drake13a2c272000-02-10 17:17:14 +0000700 if self.__current_realm is None:
701 self.__current_realm = realm
702 else:
703 self.__current_realm = realm
704 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000705
Fred Drake13a2c272000-02-10 17:17:14 +0000706 H, KD = self.get_algorithm_impls(algorithm)
707 if H is None:
708 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000709
Fred Drake13a2c272000-02-10 17:17:14 +0000710 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000711 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000712 if user is None:
713 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000714
Fred Drake13a2c272000-02-10 17:17:14 +0000715 # XXX not implemented yet
716 if req.has_data():
717 entdig = self.get_entity_digest(req.get_data(), chal)
718 else:
719 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000720
Fred Drake13a2c272000-02-10 17:17:14 +0000721 A1 = "%s:%s:%s" % (user, realm, pw)
722 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
723 # XXX selector: what about proxies and full urls
724 req.get_selector())
725 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
726 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000727
Fred Drake13a2c272000-02-10 17:17:14 +0000728 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
729 'response="%s"' % (user, realm, nonce, req.get_selector(),
730 respdig)
731 if opaque:
732 base = base + ', opaque="%s"' % opaque
733 if entdig:
734 base = base + ', digest="%s"' % entdig
735 if algorithm != 'MD5':
736 base = base + ', algorithm="%s"' % algorithm
737 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000738
739 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000740 # lambdas assume digest modules are imported at the top level
741 if algorithm == 'MD5':
742 H = lambda x, e=encode_digest:e(md5.new(x).digest())
743 elif algorithm == 'SHA':
744 H = lambda x, e=encode_digest:e(sha.new(x).digest())
745 # XXX MD5-sess
746 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
747 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000748
749 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000750 # XXX not implemented yet
751 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000752
Moshe Zadka8a18e992001-03-01 08:40:42 +0000753
754class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
755 """An authentication protocol defined by RFC 2069
756
757 Digest authentication improves on basic authentication because it
758 does not transmit passwords in the clear.
759 """
760
761 header = 'Authorization'
762
763 def http_error_401(self, req, fp, code, msg, headers):
764 host = urlparse.urlparse(req.get_full_url())[1]
765 self.http_error_auth_reqed('www-authenticate', host, req, headers)
766
767
768class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
769
770 header = 'Proxy-Authorization'
771
772 def http_error_407(self, req, fp, code, msg, headers):
773 host = req.get_host()
774 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
775
776
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000777def encode_digest(digest):
778 hexrep = []
779 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000780 n = (ord(c) >> 4) & 0xf
781 hexrep.append(hex(n)[-1])
782 n = ord(c) & 0xf
783 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000784 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000785
786
Moshe Zadka8a18e992001-03-01 08:40:42 +0000787class AbstractHTTPHandler(BaseHandler):
788
789 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000790 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000791 if not host:
792 raise URLError('no host given')
793
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000794 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000795 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000796 if req.has_data():
797 data = req.get_data()
798 h.putrequest('POST', req.get_selector())
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000799 if not req.headers.has_key('Content-type'):
800 h.putheader('Content-type',
801 'application/x-www-form-urlencoded')
802 if not req.headers.has_key('Content-length'):
803 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000804 else:
805 h.putrequest('GET', req.get_selector())
806 except socket.error, err:
807 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000808
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000809 h.putheader('Host', host)
810 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000811 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000812 for k, v in req.headers.items():
813 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000814 h.endheaders()
815 if req.has_data():
Fred Drakeec3dfde2001-07-04 05:18:29 +0000816 h.send(data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000817
818 code, msg, hdrs = h.getreply()
819 fp = h.getfile()
820 if code == 200:
821 return addinfourl(fp, hdrs, req.get_full_url())
822 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000823 return self.parent.error('http', req, fp, code, msg, hdrs)
824
Moshe Zadka8a18e992001-03-01 08:40:42 +0000825
826class HTTPHandler(AbstractHTTPHandler):
827
828 def http_open(self, req):
829 return self.do_open(httplib.HTTP, req)
830
831
832if hasattr(httplib, 'HTTPS'):
833 class HTTPSHandler(AbstractHTTPHandler):
834
835 def https_open(self, req):
836 return self.do_open(httplib.HTTPS, req)
837
838
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000839class UnknownHandler(BaseHandler):
840 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000841 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000842 raise URLError('unknown url type: %s' % type)
843
844def parse_keqv_list(l):
845 """Parse list of key=value strings where keys are not duplicated."""
846 parsed = {}
847 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000848 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000849 if v[0] == '"' and v[-1] == '"':
850 v = v[1:-1]
851 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000852 return parsed
853
854def parse_http_list(s):
855 """Parse lists as described by RFC 2068 Section 2.
856
857 In particular, parse comman-separated lists where the elements of
858 the list may include quoted-strings. A quoted-string could
859 contain a comma.
860 """
861 # XXX this function could probably use more testing
862
863 list = []
864 end = len(s)
865 i = 0
866 inquote = 0
867 start = 0
868 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000869 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000870 c = cur.find(',')
871 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000872 if c == -1:
873 list.append(s[start:])
874 break
875 if q == -1:
876 if inquote:
877 raise ValueError, "unbalanced quotes"
878 else:
879 list.append(s[start:i+c])
880 i = i + c + 1
881 continue
882 if inquote:
883 if q < c:
884 list.append(s[start:i+c])
885 i = i + c + 1
886 start = i
887 inquote = 0
888 else:
Tim Peterse1190062001-01-15 03:34:38 +0000889 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000890 else:
891 if c < q:
892 list.append(s[start:i+c])
893 i = i + c + 1
894 start = i
895 else:
896 inquote = 1
897 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000898 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000899
900class FileHandler(BaseHandler):
901 # Use local file or FTP depending on form of URL
902 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000903 url = req.get_selector()
904 if url[:2] == '//' and url[2:3] != '/':
905 req.type = 'ftp'
906 return self.parent.open(req)
907 else:
908 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000909
910 # names for the localhost
911 names = None
912 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000913 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000914 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000915 socket.gethostbyname(socket.gethostname()))
916 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000917
918 # not entirely sure what the rules are here
919 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000920 host = req.get_host()
921 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000922 localfile = url2pathname(file)
923 stats = os.stat(localfile)
924 size = stats[stat.ST_SIZE]
925 modified = rfc822.formatdate(stats[stat.ST_MTIME])
926 mtype = mimetypes.guess_type(file)[0]
927 stats = os.stat(localfile)
928 headers = mimetools.Message(StringIO(
929 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
930 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +0000931 if host:
932 host, port = splitport(host)
933 if not host or \
934 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000935 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +0000936 headers, 'file:'+file)
937 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000938
939class FTPHandler(BaseHandler):
940 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000941 host = req.get_host()
942 if not host:
943 raise IOError, ('ftp error', 'no host given')
944 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000945 try:
946 host = socket.gethostbyname(host)
947 except socket.error, msg:
948 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000949 host, port = splitport(host)
950 if port is None:
951 port = ftplib.FTP_PORT
952 path, attrs = splitattr(req.get_selector())
953 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000954 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000955 dirs, file = dirs[:-1], dirs[-1]
956 if dirs and not dirs[0]:
957 dirs = dirs[1:]
958 user = passwd = '' # XXX
959 try:
960 fw = self.connect_ftp(user, passwd, host, port, dirs)
961 type = file and 'I' or 'D'
962 for attr in attrs:
963 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000964 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000965 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000966 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000967 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +0000968 headers = ""
969 mtype = mimetypes.guess_type(req.get_full_url())[0]
970 if mtype:
971 headers += "Content-Type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +0000972 if retrlen is not None and retrlen >= 0:
Guido van Rossum833a8d82001-08-24 13:10:13 +0000973 headers += "Content-Length: %d\n" % retrlen
974 sf = StringIO(headers)
975 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +0000976 return addinfourl(fp, headers, req.get_full_url())
977 except ftplib.all_errors, msg:
978 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000979
980 def connect_ftp(self, user, passwd, host, port, dirs):
981 fw = ftpwrapper(user, passwd, host, port, dirs)
982## fw.ftp.set_debuglevel(1)
983 return fw
984
985class CacheFTPHandler(FTPHandler):
986 # XXX would be nice to have pluggable cache strategies
987 # XXX this stuff is definitely not thread safe
988 def __init__(self):
989 self.cache = {}
990 self.timeout = {}
991 self.soonest = 0
992 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000993 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000994
995 def setTimeout(self, t):
996 self.delay = t
997
998 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000999 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001000
1001 def connect_ftp(self, user, passwd, host, port, dirs):
1002 key = user, passwd, host, port
1003 if self.cache.has_key(key):
1004 self.timeout[key] = time.time() + self.delay
1005 else:
1006 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1007 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001008 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001009 return self.cache[key]
1010
1011 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001012 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001013 t = time.time()
1014 if self.soonest <= t:
1015 for k, v in self.timeout.items():
1016 if v < t:
1017 self.cache[k].close()
1018 del self.cache[k]
1019 del self.timeout[k]
1020 self.soonest = min(self.timeout.values())
1021
1022 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001023 if len(self.cache) == self.max_conns:
1024 for k, v in self.timeout.items():
1025 if v == self.soonest:
1026 del self.cache[k]
1027 del self.timeout[k]
1028 break
1029 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001030
1031class GopherHandler(BaseHandler):
1032 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001033 host = req.get_host()
1034 if not host:
1035 raise GopherError('no host given')
1036 host = unquote(host)
1037 selector = req.get_selector()
1038 type, selector = splitgophertype(selector)
1039 selector, query = splitquery(selector)
1040 selector = unquote(selector)
1041 if query:
1042 query = unquote(query)
1043 fp = gopherlib.send_query(selector, query, host)
1044 else:
1045 fp = gopherlib.send_selector(selector, host)
1046 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001047
1048#bleck! don't use this yet
1049class OpenerFactory:
1050
1051 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001052 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001053 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001054 proxy_handlers = [ProxyHandler]
1055 handlers = []
1056 replacement_handlers = []
1057
1058 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001059 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001060
1061 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001062 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001063
1064 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001065 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001066
1067 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001068 opener = OpenerDirector()
Fred Drake13a2c272000-02-10 17:17:14 +00001069 for ph in self.proxy_handlers:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +00001070 if isinstance(ph, types.ClassType):
Fred Drake13a2c272000-02-10 17:17:14 +00001071 ph = ph()
1072 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001073
1074if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001075 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001076 # are internal to CNRI. Need to set up a public server with the
1077 # right authentication configuration for test purposes.
1078 if socket.gethostname() == 'bitdiddle':
1079 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001080 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001081 localhost = 'localhost'
1082 else:
1083 localhost = None
1084 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001085 # Thanks to Fred for finding these!
1086 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1087 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001088
Fred Drake13a2c272000-02-10 17:17:14 +00001089 'file:/etc/passwd',
1090 'file://nonsensename/etc/passwd',
1091 'ftp://www.python.org/pub/tmp/httplib.py',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001092 'ftp://www.python.org/pub/tmp/imageop.c',
1093 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001094 'http://www.espn.com/', # redirect
1095 'http://www.python.org/Spanish/Inquistion/',
1096 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1097 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1098 'http://www.python.org/',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001099 'ftp://prep.ai.mit.edu/welcome.msg',
1100 'ftp://www.python.org/pub/tmp/figure.prn',
1101 'ftp://www.python.org/pub/tmp/interp.pl',
Fred Drake13a2c272000-02-10 17:17:14 +00001102 'http://checkproxy.cnri.reston.va.us/test/test.html',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001103 ]
1104
1105 if localhost is not None:
1106 urls = urls + [
1107 'file://%s/etc/passwd' % localhost,
1108 'http://%s/simple/' % localhost,
1109 'http://%s/digest/' % localhost,
1110 'http://%s/not/found.h' % localhost,
1111 ]
1112
1113 bauth = HTTPBasicAuthHandler()
1114 bauth.add_password('basic_test_realm', localhost, 'jhylton',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001115 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001116 dauth = HTTPDigestAuthHandler()
1117 dauth.add_password('digest_test_realm', localhost, 'jhylton',
1118 'password')
1119
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001120
1121 cfh = CacheFTPHandler()
1122 cfh.setTimeout(1)
1123
1124 # XXX try out some custom proxy objects too!
1125 def at_cnri(req):
Fred Drake13a2c272000-02-10 17:17:14 +00001126 host = req.get_host()
1127 print host
1128 if host[-18:] == '.cnri.reston.va.us':
1129 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001130 p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1131 ph = CustomProxyHandler(p)
1132
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001133 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001134
1135 for url in urls:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +00001136 if isinstance(url, types.TupleType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001137 url, req = url
1138 else:
1139 req = None
1140 print url
1141 try:
1142 f = urlopen(url, req)
1143 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001144 print "IOError:", err
1145 except socket.error, err:
1146 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001147 else:
1148 buf = f.read()
1149 f.close()
1150 print "read %d bytes" % len(buf)
1151 print
1152 time.sleep(0.1)