blob: 814a2df7a162598915c7bce1c2021b1dd91a05da [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000092import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import re
94import base64
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000095import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000096import md5
97import mimetypes
98import mimetools
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +000099import rfc822
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000100import ftplib
101import sys
102import time
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000103import os
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000105import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000106
107try:
108 from cStringIO import StringIO
109except ImportError:
110 from StringIO import StringIO
111
112try:
113 import sha
114except ImportError:
115 # need 1.5.2 final
116 sha = None
117
118# not sure how many of these need to be gotten rid of
119from urllib import unwrap, unquote, splittype, splithost, \
120 addinfourl, splitport, splitgophertype, splitquery, \
121 splitattr, ftpwrapper, noheaders
122
123# support for proxies via environment variables
124from urllib import getproxies
125
126# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000127from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000128
129__version__ = "2.0a1"
130
131_opener = None
132def urlopen(url, data=None):
133 global _opener
134 if _opener is None:
135 _opener = build_opener()
136 return _opener.open(url, data)
137
138def install_opener(opener):
139 global _opener
140 _opener = opener
141
142# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000143# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000144 # subtypes.
145
146class URLError(IOError):
147 # URLError is a sub-type of IOError, but it doesn't share any of
148 # the implementation. need to override __init__ and __str__
149 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000150 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000153 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000154
155class HTTPError(URLError, addinfourl):
156 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000157 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000158
159 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000160 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000161 self.code = code
162 self.msg = msg
163 self.hdrs = hdrs
164 self.fp = fp
165 # XXX
166 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000167
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000168 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000169 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000170
171 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000172 # XXX is this safe? what if user catches exception, then
173 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000174 if self.fp:
175 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000176
177class GopherError(URLError):
178 pass
179
Moshe Zadka8a18e992001-03-01 08:40:42 +0000180
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000181class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000182
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000183 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000184 # unwrap('<URL:type://host/path>') --> 'type://host/path'
185 self.__original = unwrap(url)
186 self.type = None
187 # self.__r_type is what's left after doing the splittype
188 self.host = None
189 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000190 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000191 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000192 self.headers.update(headers)
193
194 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000195 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000196 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000197 # too complicated and/or unnecessary.
198 # XXX should the __r_XXX attributes be public?
199 if attr[:12] == '_Request__r_':
200 name = attr[12:]
201 if hasattr(Request, 'get_' + name):
202 getattr(self, 'get_' + name)()
203 return getattr(self, attr)
204 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000205
206 def add_data(self, data):
207 self.data = data
208
209 def has_data(self):
210 return self.data is not None
211
212 def get_data(self):
213 return self.data
214
215 def get_full_url(self):
216 return self.__original
217
218 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000219 if self.type is None:
220 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000221 if self.type is None:
222 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000223 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000224
225 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000226 if self.host is None:
227 self.host, self.__r_host = splithost(self.__r_type)
228 if self.host:
229 self.host = unquote(self.host)
230 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000231
232 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000233 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000234
Moshe Zadka8a18e992001-03-01 08:40:42 +0000235 def set_proxy(self, host, type):
236 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000237 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000238
239 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000240 # useful for something like authentication
241 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000242
243class OpenerDirector:
244 def __init__(self):
245 server_version = "Python-urllib/%s" % __version__
246 self.addheaders = [('User-agent', server_version)]
247 # manage the individual handlers
248 self.handlers = []
249 self.handle_open = {}
250 self.handle_error = {}
251
252 def add_handler(self, handler):
253 added = 0
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000254 for meth in dir(handler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000255 if meth[-5:] == '_open':
256 protocol = meth[:-5]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000257 if protocol in self.handle_open:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000258 self.handle_open[protocol].append(handler)
259 else:
260 self.handle_open[protocol] = [handler]
261 added = 1
262 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000263 i = meth.find('_')
264 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000265 if j != -1 and meth[i+1:j] == 'error':
266 proto = meth[:i]
267 kind = meth[j+1:]
268 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000269 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000270 except ValueError:
271 pass
272 dict = self.handle_error.get(proto, {})
Raymond Hettinger54f02222002-06-01 14:18:47 +0000273 if kind in dict:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000274 dict[kind].append(handler)
275 else:
276 dict[kind] = [handler]
277 self.handle_error[proto] = dict
278 added = 1
279 continue
280 if added:
281 self.handlers.append(handler)
282 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000283
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000284 def __del__(self):
285 self.close()
286
287 def close(self):
288 for handler in self.handlers:
289 handler.close()
290 self.handlers = []
291
292 def _call_chain(self, chain, kind, meth_name, *args):
293 # XXX raise an exception if no one else should try to handle
294 # this url. return None if you can't but someone else could.
295 handlers = chain.get(kind, ())
296 for handler in handlers:
297 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000298
299 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000300 if result is not None:
301 return result
302
303 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000304 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000305 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000306 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000307 else:
308 req = fullurl
309 if data is not None:
310 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000311 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000312
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000313 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000314 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000315 if result:
316 return result
317
Fred Drake13a2c272000-02-10 17:17:14 +0000318 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000320 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000321 if result:
322 return result
323
324 return self._call_chain(self.handle_open, 'unknown',
325 'unknown_open', req)
326
327 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000328 if proto in ['http', 'https']:
Fred Draked5214b02001-11-08 17:19:29 +0000329 # XXX http[s] protocols are special-cased
330 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000331 proto = args[2] # YUCK!
332 meth_name = 'http_error_%d' % proto
333 http_err = 1
334 orig_args = args
335 else:
336 dict = self.handle_error
337 meth_name = proto + '_error'
338 http_err = 0
339 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000340 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000341 if result:
342 return result
343
344 if http_err:
345 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000346 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000347
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000348# XXX probably also want an abstract factory that knows things like
349 # the fact that a ProxyHandler needs to get inserted first.
350# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000351 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000352
353def build_opener(*handlers):
354 """Create an opener object from a list of handlers.
355
356 The opener will use several default handlers, including support
357 for HTTP and FTP. If there is a ProxyHandler, it must be at the
358 front of the list of handlers. (Yuck.)
359
360 If any of the handlers passed as arguments are subclasses of the
361 default handlers, the default handlers will not be used.
362 """
Tim Peterse1190062001-01-15 03:34:38 +0000363
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000364 opener = OpenerDirector()
365 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
366 HTTPDefaultErrorHandler, HTTPRedirectHandler,
367 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000368 if hasattr(httplib, 'HTTPS'):
369 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000370 skip = []
371 for klass in default_classes:
372 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000373 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000374 if issubclass(check, klass):
375 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000376 elif isinstance(check, klass):
377 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000378 for klass in skip:
379 default_classes.remove(klass)
380
381 for klass in default_classes:
382 opener.add_handler(klass())
383
384 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000385 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000386 h = h()
387 opener.add_handler(h)
388 return opener
389
390class BaseHandler:
391 def add_parent(self, parent):
392 self.parent = parent
393 def close(self):
394 self.parent = None
395
396class HTTPDefaultErrorHandler(BaseHandler):
397 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000398 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000399
400class HTTPRedirectHandler(BaseHandler):
401 # Implementation note: To avoid the server sending us into an
402 # infinite loop, the request object needs to track what URLs we
403 # have already seen. Do this by adding a handler-specific
404 # attribute to the Request object.
405 def http_error_302(self, req, fp, code, msg, headers):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000406 if 'location' in headers:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000407 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000408 elif 'uri' in headers:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000409 newurl = headers['uri']
410 else:
411 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000412 newurl = urlparse.urljoin(req.get_full_url(), newurl)
413
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000414 # XXX Probably want to forget about the state of the current
415 # request, although that might interact poorly with other
416 # handlers that also use handler-specific request attributes
Greg Ward2e250b42002-02-11 20:46:10 +0000417 new = Request(newurl, req.get_data(), req.headers)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000418 new.error_302_dict = {}
419 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000420 if len(req.error_302_dict)>10 or \
Raymond Hettinger54f02222002-06-01 14:18:47 +0000421 newurl in req.error_302_dict:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000422 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000423 self.inf_msg + msg, headers, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000424 new.error_302_dict.update(req.error_302_dict)
425 new.error_302_dict[newurl] = newurl
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000426
427 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000428 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000429 fp.read()
430 fp.close()
431
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000432 return self.parent.open(new)
433
434 http_error_301 = http_error_302
435
436 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000437 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000438 "The last 302 error message was:\n"
439
440class ProxyHandler(BaseHandler):
441 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000442 if proxies is None:
443 proxies = getproxies()
444 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
445 self.proxies = proxies
446 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000447 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000448 lambda r, proxy=url, type=type, meth=self.proxy_open: \
449 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000450
451 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000452 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000453 type, r_type = splittype(proxy)
454 host, XXX = splithost(r_type)
455 if '@' in host:
456 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000457 user_pass = base64.encodestring(unquote(user_pass)).strip()
458 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000459 host = unquote(host)
460 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000461 if orig_type == type:
462 # let other handlers take care of it
463 # XXX this only makes sense if the proxy is before the
464 # other handlers
465 return None
466 else:
467 # need to start over, because the other handlers don't
468 # grok the proxy's URL type
469 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000470
471# feature suggested by Duncan Booth
472# XXX custom is not a good name
473class CustomProxy:
474 # either pass a function to the constructor or override handle
475 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000476 self.proto = proto
477 self.func = func
478 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000479
480 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000481 if self.func and self.func(req):
482 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000483
484 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000485 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000486
487class CustomProxyHandler(BaseHandler):
488 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000489 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000490
491 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000492 proto = req.get_type()
493 try:
494 proxies = self.proxies[proto]
495 except KeyError:
496 return None
497 for p in proxies:
498 if p.handle(req):
499 req.set_proxy(p.get_proxy())
500 return self.parent.open(req)
501 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000502
503 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000504 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000505
506 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000507 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000508 self.proxies[cpo.proto].append(cpo)
509 else:
510 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000511
512class HTTPPasswordMgr:
513 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000514 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000515
516 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000517 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000518 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000519 uri = [uri]
520 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000521 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000522 self.passwd[realm] = {}
523 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000524
525 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000526 domains = self.passwd.get(realm, {})
527 authuri = self.reduce_uri(authuri)
528 for uris, authinfo in domains.items():
529 for uri in uris:
530 if self.is_suburi(uri, authuri):
531 return authinfo
532 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000533
534 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000535 """Accept netloc or URI and extract only the netloc and path"""
536 parts = urlparse.urlparse(uri)
537 if parts[1]:
538 return parts[1], parts[2] or '/'
539 else:
540 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000541
542 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000543 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000544
Fred Drake13a2c272000-02-10 17:17:14 +0000545 Both args must be URIs in reduced form.
546 """
547 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000548 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000549 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000550 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000551 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000552 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000553 return True
554 return False
Tim Peterse1190062001-01-15 03:34:38 +0000555
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000556
Moshe Zadka8a18e992001-03-01 08:40:42 +0000557class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
558
559 def find_user_password(self, realm, authuri):
560 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
561 if user is not None:
562 return user, password
563 return HTTPPasswordMgr.find_user_password(self, None, authuri)
564
565
566class AbstractBasicAuthHandler:
567
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000568 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
569
570 # XXX there can actually be multiple auth-schemes in a
571 # www-authenticate header. should probably be a lot more careful
572 # in parsing them to extract multiple alternatives
573
Moshe Zadka8a18e992001-03-01 08:40:42 +0000574 def __init__(self, password_mgr=None):
575 if password_mgr is None:
576 password_mgr = HTTPPasswordMgr()
577 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000578 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000579
Moshe Zadka8a18e992001-03-01 08:40:42 +0000580 def http_error_auth_reqed(self, authreq, host, req, headers):
581 # XXX could be multiple headers
582 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000583 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000584 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000585 if mo:
586 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000587 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000588 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000589
Moshe Zadka8a18e992001-03-01 08:40:42 +0000590 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000591 user,pw = self.passwd.find_user_password(realm, host)
592 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000593 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000594 auth = 'Basic %s' % base64.encodestring(raw).strip()
595 if req.headers.get(self.auth_header, None) == auth:
596 return None
597 req.add_header(self.auth_header, auth)
598 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000599 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000600 return None
601
Moshe Zadka8a18e992001-03-01 08:40:42 +0000602class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000603
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000604 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000605
Moshe Zadka8a18e992001-03-01 08:40:42 +0000606 def http_error_401(self, req, fp, code, msg, headers):
607 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000608 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000609 host, req, headers)
610
611
612class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
613
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000614 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000615
616 def http_error_407(self, req, fp, code, msg, headers):
617 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000618 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000619 host, req, headers)
620
621
622class AbstractDigestAuthHandler:
623
624 def __init__(self, passwd=None):
625 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000626 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000627 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000628 self.add_password = self.passwd.add_password
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000629
Moshe Zadka8a18e992001-03-01 08:40:42 +0000630 def http_error_auth_reqed(self, authreq, host, req, headers):
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000631 authreq = headers.get(self.auth_header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000632 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000633 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000634 if kind == 'Digest':
635 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636
637 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000638 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000639 chal = parse_keqv_list(parse_http_list(challenge))
640 auth = self.get_authorization(req, chal)
641 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000642 auth_val = 'Digest %s' % auth
643 if req.headers.get(self.auth_header, None) == auth_val:
644 return None
645 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000646 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000647 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000648
649 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000650 try:
651 realm = chal['realm']
652 nonce = chal['nonce']
653 algorithm = chal.get('algorithm', 'MD5')
654 # mod_digest doesn't send an opaque, even though it isn't
655 # supposed to be optional
656 opaque = chal.get('opaque', None)
657 except KeyError:
658 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000659
Fred Drake13a2c272000-02-10 17:17:14 +0000660 H, KD = self.get_algorithm_impls(algorithm)
661 if H is None:
662 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000663
Fred Drake13a2c272000-02-10 17:17:14 +0000664 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000665 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000666 if user is None:
667 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000668
Fred Drake13a2c272000-02-10 17:17:14 +0000669 # XXX not implemented yet
670 if req.has_data():
671 entdig = self.get_entity_digest(req.get_data(), chal)
672 else:
673 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000674
Fred Drake13a2c272000-02-10 17:17:14 +0000675 A1 = "%s:%s:%s" % (user, realm, pw)
676 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
677 # XXX selector: what about proxies and full urls
678 req.get_selector())
679 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
680 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000681
Fred Drake13a2c272000-02-10 17:17:14 +0000682 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
683 'response="%s"' % (user, realm, nonce, req.get_selector(),
684 respdig)
685 if opaque:
686 base = base + ', opaque="%s"' % opaque
687 if entdig:
688 base = base + ', digest="%s"' % entdig
689 if algorithm != 'MD5':
690 base = base + ', algorithm="%s"' % algorithm
691 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000692
693 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000694 # lambdas assume digest modules are imported at the top level
695 if algorithm == 'MD5':
696 H = lambda x, e=encode_digest:e(md5.new(x).digest())
697 elif algorithm == 'SHA':
698 H = lambda x, e=encode_digest:e(sha.new(x).digest())
699 # XXX MD5-sess
700 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
701 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000702
703 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000704 # XXX not implemented yet
705 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000706
Moshe Zadka8a18e992001-03-01 08:40:42 +0000707
708class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
709 """An authentication protocol defined by RFC 2069
710
711 Digest authentication improves on basic authentication because it
712 does not transmit passwords in the clear.
713 """
714
715 header = 'Authorization'
716
717 def http_error_401(self, req, fp, code, msg, headers):
718 host = urlparse.urlparse(req.get_full_url())[1]
719 self.http_error_auth_reqed('www-authenticate', host, req, headers)
720
721
722class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
723
724 header = 'Proxy-Authorization'
725
726 def http_error_407(self, req, fp, code, msg, headers):
727 host = req.get_host()
728 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
729
730
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000731def encode_digest(digest):
732 hexrep = []
733 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000734 n = (ord(c) >> 4) & 0xf
735 hexrep.append(hex(n)[-1])
736 n = ord(c) & 0xf
737 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000738 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000739
740
Moshe Zadka8a18e992001-03-01 08:40:42 +0000741class AbstractHTTPHandler(BaseHandler):
742
743 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000744 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000745 if not host:
746 raise URLError('no host given')
747
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000748 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000749 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000750 if req.has_data():
751 data = req.get_data()
752 h.putrequest('POST', req.get_selector())
Raymond Hettinger54f02222002-06-01 14:18:47 +0000753 if not 'Content-type' in req.headers:
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000754 h.putheader('Content-type',
755 'application/x-www-form-urlencoded')
Raymond Hettinger54f02222002-06-01 14:18:47 +0000756 if not 'Content-length' in req.headers:
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000757 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000758 else:
759 h.putrequest('GET', req.get_selector())
760 except socket.error, err:
761 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000762
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000763 h.putheader('Host', host)
764 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000765 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000766 for k, v in req.headers.items():
767 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000768 h.endheaders()
769 if req.has_data():
Fred Drakeec3dfde2001-07-04 05:18:29 +0000770 h.send(data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000771
772 code, msg, hdrs = h.getreply()
773 fp = h.getfile()
774 if code == 200:
775 return addinfourl(fp, hdrs, req.get_full_url())
776 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000777 return self.parent.error('http', req, fp, code, msg, hdrs)
778
Moshe Zadka8a18e992001-03-01 08:40:42 +0000779
780class HTTPHandler(AbstractHTTPHandler):
781
782 def http_open(self, req):
783 return self.do_open(httplib.HTTP, req)
784
785
786if hasattr(httplib, 'HTTPS'):
787 class HTTPSHandler(AbstractHTTPHandler):
788
789 def https_open(self, req):
790 return self.do_open(httplib.HTTPS, req)
791
792
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000793class UnknownHandler(BaseHandler):
794 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000795 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000796 raise URLError('unknown url type: %s' % type)
797
798def parse_keqv_list(l):
799 """Parse list of key=value strings where keys are not duplicated."""
800 parsed = {}
801 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000802 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000803 if v[0] == '"' and v[-1] == '"':
804 v = v[1:-1]
805 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000806 return parsed
807
808def parse_http_list(s):
809 """Parse lists as described by RFC 2068 Section 2.
810
811 In particular, parse comman-separated lists where the elements of
812 the list may include quoted-strings. A quoted-string could
813 contain a comma.
814 """
815 # XXX this function could probably use more testing
816
817 list = []
818 end = len(s)
819 i = 0
820 inquote = 0
821 start = 0
822 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000823 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000824 c = cur.find(',')
825 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000826 if c == -1:
827 list.append(s[start:])
828 break
829 if q == -1:
830 if inquote:
831 raise ValueError, "unbalanced quotes"
832 else:
833 list.append(s[start:i+c])
834 i = i + c + 1
835 continue
836 if inquote:
837 if q < c:
838 list.append(s[start:i+c])
839 i = i + c + 1
840 start = i
841 inquote = 0
842 else:
Tim Peterse1190062001-01-15 03:34:38 +0000843 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000844 else:
845 if c < q:
846 list.append(s[start:i+c])
847 i = i + c + 1
848 start = i
849 else:
850 inquote = 1
851 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000852 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000853
854class FileHandler(BaseHandler):
855 # Use local file or FTP depending on form of URL
856 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000857 url = req.get_selector()
858 if url[:2] == '//' and url[2:3] != '/':
859 req.type = 'ftp'
860 return self.parent.open(req)
861 else:
862 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000863
864 # names for the localhost
865 names = None
866 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000867 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000868 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000869 socket.gethostbyname(socket.gethostname()))
870 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000871
872 # not entirely sure what the rules are here
873 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000874 host = req.get_host()
875 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000876 localfile = url2pathname(file)
877 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +0000878 size = stats.st_size
879 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000880 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000881 headers = mimetools.Message(StringIO(
882 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
883 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +0000884 if host:
885 host, port = splitport(host)
886 if not host or \
887 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000888 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +0000889 headers, 'file:'+file)
890 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000891
892class FTPHandler(BaseHandler):
893 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000894 host = req.get_host()
895 if not host:
896 raise IOError, ('ftp error', 'no host given')
897 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000898 try:
899 host = socket.gethostbyname(host)
900 except socket.error, msg:
901 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000902 host, port = splitport(host)
903 if port is None:
904 port = ftplib.FTP_PORT
905 path, attrs = splitattr(req.get_selector())
906 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000907 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000908 dirs, file = dirs[:-1], dirs[-1]
909 if dirs and not dirs[0]:
910 dirs = dirs[1:]
911 user = passwd = '' # XXX
912 try:
913 fw = self.connect_ftp(user, passwd, host, port, dirs)
914 type = file and 'I' or 'D'
915 for attr in attrs:
916 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000917 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000918 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000919 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000920 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +0000921 headers = ""
922 mtype = mimetypes.guess_type(req.get_full_url())[0]
923 if mtype:
924 headers += "Content-Type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +0000925 if retrlen is not None and retrlen >= 0:
Guido van Rossum833a8d82001-08-24 13:10:13 +0000926 headers += "Content-Length: %d\n" % retrlen
927 sf = StringIO(headers)
928 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +0000929 return addinfourl(fp, headers, req.get_full_url())
930 except ftplib.all_errors, msg:
931 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000932
933 def connect_ftp(self, user, passwd, host, port, dirs):
934 fw = ftpwrapper(user, passwd, host, port, dirs)
935## fw.ftp.set_debuglevel(1)
936 return fw
937
938class CacheFTPHandler(FTPHandler):
939 # XXX would be nice to have pluggable cache strategies
940 # XXX this stuff is definitely not thread safe
941 def __init__(self):
942 self.cache = {}
943 self.timeout = {}
944 self.soonest = 0
945 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000946 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000947
948 def setTimeout(self, t):
949 self.delay = t
950
951 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000952 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000953
954 def connect_ftp(self, user, passwd, host, port, dirs):
955 key = user, passwd, host, port
Raymond Hettinger54f02222002-06-01 14:18:47 +0000956 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000957 self.timeout[key] = time.time() + self.delay
958 else:
959 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
960 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000961 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000962 return self.cache[key]
963
964 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000965 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000966 t = time.time()
967 if self.soonest <= t:
968 for k, v in self.timeout.items():
969 if v < t:
970 self.cache[k].close()
971 del self.cache[k]
972 del self.timeout[k]
973 self.soonest = min(self.timeout.values())
974
975 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +0000976 if len(self.cache) == self.max_conns:
977 for k, v in self.timeout.items():
978 if v == self.soonest:
979 del self.cache[k]
980 del self.timeout[k]
981 break
982 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000983
984class GopherHandler(BaseHandler):
985 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000986 host = req.get_host()
987 if not host:
988 raise GopherError('no host given')
989 host = unquote(host)
990 selector = req.get_selector()
991 type, selector = splitgophertype(selector)
992 selector, query = splitquery(selector)
993 selector = unquote(selector)
994 if query:
995 query = unquote(query)
996 fp = gopherlib.send_query(selector, query, host)
997 else:
998 fp = gopherlib.send_selector(selector, host)
999 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001000
1001#bleck! don't use this yet
1002class OpenerFactory:
1003
1004 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001005 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001006 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001007 proxy_handlers = [ProxyHandler]
1008 handlers = []
1009 replacement_handlers = []
1010
1011 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001012 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001013
1014 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001015 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001016
1017 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001018 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001019
1020 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001021 opener = OpenerDirector()
Fred Drake13a2c272000-02-10 17:17:14 +00001022 for ph in self.proxy_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001023 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001024 ph = ph()
1025 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001026
1027if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001028 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001029 # are internal to CNRI. Need to set up a public server with the
1030 # right authentication configuration for test purposes.
1031 if socket.gethostname() == 'bitdiddle':
1032 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001033 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001034 localhost = 'localhost'
1035 else:
1036 localhost = None
1037 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001038 # Thanks to Fred for finding these!
1039 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1040 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001041
Fred Drake13a2c272000-02-10 17:17:14 +00001042 'file:/etc/passwd',
1043 'file://nonsensename/etc/passwd',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001044 'ftp://www.python.org/pub/python/misc/sousa.au',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001045 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001046 'http://www.espn.com/', # redirect
1047 'http://www.python.org/Spanish/Inquistion/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001048 ('http://www.python.org/cgi-bin/faqw.py',
Fred Drake13a2c272000-02-10 17:17:14 +00001049 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1050 'http://www.python.org/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001051 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001052 ]
1053
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001054## if localhost is not None:
1055## urls = urls + [
1056## 'file://%s/etc/passwd' % localhost,
1057## 'http://%s/simple/' % localhost,
1058## 'http://%s/digest/' % localhost,
1059## 'http://%s/not/found.h' % localhost,
1060## ]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001061
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001062## bauth = HTTPBasicAuthHandler()
1063## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1064## 'password')
1065## dauth = HTTPDigestAuthHandler()
1066## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1067## 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001068
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001069
1070 cfh = CacheFTPHandler()
1071 cfh.setTimeout(1)
1072
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001073## # XXX try out some custom proxy objects too!
1074## def at_cnri(req):
1075## host = req.get_host()
1076## print host
1077## if host[-18:] == '.cnri.reston.va.us':
1078## return 1
1079## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1080## ph = CustomProxyHandler(p)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001081
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001082## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1083 install_opener(build_opener(cfh, GopherHandler))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001084
1085 for url in urls:
Walter Dörwald65230a22002-06-03 15:58:32 +00001086 if isinstance(url, tuple):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001087 url, req = url
1088 else:
1089 req = None
1090 print url
1091 try:
1092 f = urlopen(url, req)
1093 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001094 print "IOError:", err
1095 except socket.error, err:
1096 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001097 else:
1098 buf = f.read()
1099 f.close()
1100 print "read %d bytes" % len(buf)
1101 print
1102 time.sleep(0.1)