blob: a1866c6f5adeb5e0213cb5bb0ec9938a5c5e4ad1 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
Tim Peterse1190062001-01-15 03:34:38 +000078 # that hash algo that requested in the challenge, it would be good to
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000079 # pass that information along to the client, too.
80
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000092import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import re
94import base64
95import types
96import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000097import md5
98import mimetypes
99import mimetools
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000100import rfc822
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000101import ftplib
102import sys
103import time
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000104import os
105import stat
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000106import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000107import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000108
109try:
110 from cStringIO import StringIO
111except ImportError:
112 from StringIO import StringIO
113
114try:
115 import sha
116except ImportError:
117 # need 1.5.2 final
118 sha = None
119
120# not sure how many of these need to be gotten rid of
121from urllib import unwrap, unquote, splittype, splithost, \
122 addinfourl, splitport, splitgophertype, splitquery, \
123 splitattr, ftpwrapper, noheaders
124
125# support for proxies via environment variables
126from urllib import getproxies
127
128# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000129from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000130
131__version__ = "2.0a1"
132
133_opener = None
134def urlopen(url, data=None):
135 global _opener
136 if _opener is None:
137 _opener = build_opener()
138 return _opener.open(url, data)
139
140def install_opener(opener):
141 global _opener
142 _opener = opener
143
144# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000145# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000146 # subtypes.
147
148class URLError(IOError):
149 # URLError is a sub-type of IOError, but it doesn't share any of
150 # the implementation. need to override __init__ and __str__
151 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000152 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000153
154 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000155 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000156
157class HTTPError(URLError, addinfourl):
158 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000159 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000160
161 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000162 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000163 self.code = code
164 self.msg = msg
165 self.hdrs = hdrs
166 self.fp = fp
167 # XXX
168 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000169
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000170 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000171 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172
173 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000174 # XXX is this safe? what if user catches exception, then
175 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000176 if self.fp:
177 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000178
179class GopherError(URLError):
180 pass
181
Moshe Zadka8a18e992001-03-01 08:40:42 +0000182
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000183class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000184
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000185 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000186 # unwrap('<URL:type://host/path>') --> 'type://host/path'
187 self.__original = unwrap(url)
188 self.type = None
189 # self.__r_type is what's left after doing the splittype
190 self.host = None
191 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000192 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000193 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000194 self.headers.update(headers)
195
196 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000197 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000198 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000199 # too complicated and/or unnecessary.
200 # XXX should the __r_XXX attributes be public?
201 if attr[:12] == '_Request__r_':
202 name = attr[12:]
203 if hasattr(Request, 'get_' + name):
204 getattr(self, 'get_' + name)()
205 return getattr(self, attr)
206 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000207
208 def add_data(self, data):
209 self.data = data
210
211 def has_data(self):
212 return self.data is not None
213
214 def get_data(self):
215 return self.data
216
217 def get_full_url(self):
218 return self.__original
219
220 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000221 if self.type is None:
222 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000223 if self.type is None:
224 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000225 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000226
227 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000228 if self.host is None:
229 self.host, self.__r_host = splithost(self.__r_type)
230 if self.host:
231 self.host = unquote(self.host)
232 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000233
234 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000235 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000236
Moshe Zadka8a18e992001-03-01 08:40:42 +0000237 def set_proxy(self, host, type):
238 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000239 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000240
241 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000242 # useful for something like authentication
243 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000244
245class OpenerDirector:
246 def __init__(self):
247 server_version = "Python-urllib/%s" % __version__
248 self.addheaders = [('User-agent', server_version)]
249 # manage the individual handlers
250 self.handlers = []
251 self.handle_open = {}
252 self.handle_error = {}
253
254 def add_handler(self, handler):
255 added = 0
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000256 for meth in dir(handler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000257 if meth[-5:] == '_open':
258 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000259 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000260 self.handle_open[protocol].append(handler)
261 else:
262 self.handle_open[protocol] = [handler]
263 added = 1
264 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000265 i = meth.find('_')
266 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000267 if j != -1 and meth[i+1:j] == 'error':
268 proto = meth[:i]
269 kind = meth[j+1:]
270 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000271 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000272 except ValueError:
273 pass
274 dict = self.handle_error.get(proto, {})
275 if dict.has_key(kind):
276 dict[kind].append(handler)
277 else:
278 dict[kind] = [handler]
279 self.handle_error[proto] = dict
280 added = 1
281 continue
282 if added:
283 self.handlers.append(handler)
284 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000285
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000286 def __del__(self):
287 self.close()
288
289 def close(self):
290 for handler in self.handlers:
291 handler.close()
292 self.handlers = []
293
294 def _call_chain(self, chain, kind, meth_name, *args):
295 # XXX raise an exception if no one else should try to handle
296 # this url. return None if you can't but someone else could.
297 handlers = chain.get(kind, ())
298 for handler in handlers:
299 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000300
301 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000302 if result is not None:
303 return result
304
305 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000306 # accept a URL or a Request object
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000307 if isinstance(fullurl, (types.StringType, types.UnicodeType)):
Fred Drake13a2c272000-02-10 17:17:14 +0000308 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000309 else:
310 req = fullurl
311 if data is not None:
312 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000313 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000314
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000315 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000316 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000317 if result:
318 return result
319
Fred Drake13a2c272000-02-10 17:17:14 +0000320 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000321 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000322 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 if result:
324 return result
325
326 return self._call_chain(self.handle_open, 'unknown',
327 'unknown_open', req)
328
329 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000330 if proto in ['http', 'https']:
331 # XXX http[s] protocols are special cased
332 dict = self.handle_error['http'] # https is not different then http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000333 proto = args[2] # YUCK!
334 meth_name = 'http_error_%d' % proto
335 http_err = 1
336 orig_args = args
337 else:
338 dict = self.handle_error
339 meth_name = proto + '_error'
340 http_err = 0
341 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000342 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000343 if result:
344 return result
345
346 if http_err:
347 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000348 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000349
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000350# XXX probably also want an abstract factory that knows things like
351 # the fact that a ProxyHandler needs to get inserted first.
352# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000353 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000354
355def build_opener(*handlers):
356 """Create an opener object from a list of handlers.
357
358 The opener will use several default handlers, including support
359 for HTTP and FTP. If there is a ProxyHandler, it must be at the
360 front of the list of handlers. (Yuck.)
361
362 If any of the handlers passed as arguments are subclasses of the
363 default handlers, the default handlers will not be used.
364 """
Tim Peterse1190062001-01-15 03:34:38 +0000365
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000366 opener = OpenerDirector()
367 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
368 HTTPDefaultErrorHandler, HTTPRedirectHandler,
369 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000370 if hasattr(httplib, 'HTTPS'):
371 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000372 skip = []
373 for klass in default_classes:
374 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000375 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000376 if issubclass(check, klass):
377 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000378 elif isinstance(check, klass):
379 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000380 for klass in skip:
381 default_classes.remove(klass)
382
383 for klass in default_classes:
384 opener.add_handler(klass())
385
386 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000387 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000388 h = h()
389 opener.add_handler(h)
390 return opener
391
392class BaseHandler:
393 def add_parent(self, parent):
394 self.parent = parent
395 def close(self):
396 self.parent = None
397
398class HTTPDefaultErrorHandler(BaseHandler):
399 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000400 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000401
402class HTTPRedirectHandler(BaseHandler):
403 # Implementation note: To avoid the server sending us into an
404 # infinite loop, the request object needs to track what URLs we
405 # have already seen. Do this by adding a handler-specific
406 # attribute to the Request object.
407 def http_error_302(self, req, fp, code, msg, headers):
408 if headers.has_key('location'):
409 newurl = headers['location']
410 elif headers.has_key('uri'):
411 newurl = headers['uri']
412 else:
413 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000414 newurl = urlparse.urljoin(req.get_full_url(), newurl)
415
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000416 # XXX Probably want to forget about the state of the current
417 # request, although that might interact poorly with other
418 # handlers that also use handler-specific request attributes
419 new = Request(newurl, req.get_data())
420 new.error_302_dict = {}
421 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000422 if len(req.error_302_dict)>10 or \
423 req.error_302_dict.has_key(newurl):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000424 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000425 self.inf_msg + msg, headers, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000426 new.error_302_dict.update(req.error_302_dict)
427 new.error_302_dict[newurl] = newurl
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000428
429 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000430 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000431 fp.read()
432 fp.close()
433
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000434 return self.parent.open(new)
435
436 http_error_301 = http_error_302
437
438 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000439 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000440 "The last 302 error message was:\n"
441
442class ProxyHandler(BaseHandler):
443 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000444 if proxies is None:
445 proxies = getproxies()
446 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
447 self.proxies = proxies
448 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000449 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000450 lambda r, proxy=url, type=type, meth=self.proxy_open: \
451 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000452
453 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000454 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000455 type, r_type = splittype(proxy)
456 host, XXX = splithost(r_type)
457 if '@' in host:
458 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000459 user_pass = base64.encodestring(unquote(user_pass)).strip()
460 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000461 host = unquote(host)
462 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000463 if orig_type == type:
464 # let other handlers take care of it
465 # XXX this only makes sense if the proxy is before the
466 # other handlers
467 return None
468 else:
469 # need to start over, because the other handlers don't
470 # grok the proxy's URL type
471 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000472
473# feature suggested by Duncan Booth
474# XXX custom is not a good name
475class CustomProxy:
476 # either pass a function to the constructor or override handle
477 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000478 self.proto = proto
479 self.func = func
480 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000481
482 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000483 if self.func and self.func(req):
484 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000485
486 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000487 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000488
489class CustomProxyHandler(BaseHandler):
490 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000491 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000492
493 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000494 proto = req.get_type()
495 try:
496 proxies = self.proxies[proto]
497 except KeyError:
498 return None
499 for p in proxies:
500 if p.handle(req):
501 req.set_proxy(p.get_proxy())
502 return self.parent.open(req)
503 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000504
505 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000506 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000507
508 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000509 if self.proxies.has_key(cpo.proto):
510 self.proxies[cpo.proto].append(cpo)
511 else:
512 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000513
514class HTTPPasswordMgr:
515 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000516 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000517
518 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000519 # uri could be a single URI or a sequence
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000520 if isinstance(uri, (types.StringType, types.UnicodeType)):
Fred Drake13a2c272000-02-10 17:17:14 +0000521 uri = [uri]
522 uri = tuple(map(self.reduce_uri, uri))
523 if not self.passwd.has_key(realm):
524 self.passwd[realm] = {}
525 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000526
527 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000528 domains = self.passwd.get(realm, {})
529 authuri = self.reduce_uri(authuri)
530 for uris, authinfo in domains.items():
531 for uri in uris:
532 if self.is_suburi(uri, authuri):
533 return authinfo
534 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000535
536 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000537 """Accept netloc or URI and extract only the netloc and path"""
538 parts = urlparse.urlparse(uri)
539 if parts[1]:
540 return parts[1], parts[2] or '/'
541 else:
542 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000543
544 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000545 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000546
Fred Drake13a2c272000-02-10 17:17:14 +0000547 Both args must be URIs in reduced form.
548 """
549 if base == test:
550 return 1
551 if base[0] != test[0]:
552 return 0
Moshe Zadka8a18e992001-03-01 08:40:42 +0000553 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000554 if len(common) == len(base[1]):
555 return 1
556 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000557
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000558
Moshe Zadka8a18e992001-03-01 08:40:42 +0000559class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
560
561 def find_user_password(self, realm, authuri):
562 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
563 if user is not None:
564 return user, password
565 return HTTPPasswordMgr.find_user_password(self, None, authuri)
566
567
568class AbstractBasicAuthHandler:
569
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000570 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
571
572 # XXX there can actually be multiple auth-schemes in a
573 # www-authenticate header. should probably be a lot more careful
574 # in parsing them to extract multiple alternatives
575
Moshe Zadka8a18e992001-03-01 08:40:42 +0000576 def __init__(self, password_mgr=None):
577 if password_mgr is None:
578 password_mgr = HTTPPasswordMgr()
579 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000580 self.add_password = self.passwd.add_password
581 self.__current_realm = None
582 # if __current_realm is not None, then the server must have
583 # refused our name/password and is asking for authorization
584 # again. must be careful to set it to None on successful
Tim Peterse1190062001-01-15 03:34:38 +0000585 # return.
586
Moshe Zadka8a18e992001-03-01 08:40:42 +0000587 def http_error_auth_reqed(self, authreq, host, req, headers):
588 # XXX could be multiple headers
589 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000590 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000591 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000592 if mo:
593 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000594 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000595 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000596
Moshe Zadka8a18e992001-03-01 08:40:42 +0000597 def retry_http_basic_auth(self, host, req, realm):
Fred Drake13a2c272000-02-10 17:17:14 +0000598 if self.__current_realm is None:
599 self.__current_realm = realm
600 else:
601 self.__current_realm = realm
602 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000603 user,pw = self.passwd.find_user_password(realm, host)
604 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000605 raw = "%s:%s" % (user, pw)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000606 auth = base64.encodestring(raw).strip()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000607 req.add_header(self.header, 'Basic %s' % auth)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000608 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000609 self.__current_realm = None
610 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000611 else:
Fred Drake13a2c272000-02-10 17:17:14 +0000612 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000613 return None
614
Moshe Zadka8a18e992001-03-01 08:40:42 +0000615class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000616
Moshe Zadka8a18e992001-03-01 08:40:42 +0000617 header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000618
Moshe Zadka8a18e992001-03-01 08:40:42 +0000619 def http_error_401(self, req, fp, code, msg, headers):
620 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000621 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000622 host, req, headers)
623
624
625class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
626
627 header = 'Proxy-Authorization'
628
629 def http_error_407(self, req, fp, code, msg, headers):
630 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000631 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000632 host, req, headers)
633
634
635class AbstractDigestAuthHandler:
636
637 def __init__(self, passwd=None):
638 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000639 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000640 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000641 self.add_password = self.passwd.add_password
642 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000643
Moshe Zadka8a18e992001-03-01 08:40:42 +0000644 def http_error_auth_reqed(self, authreq, host, req, headers):
645 authreq = headers.get(self.header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000646 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000647 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000648 if kind == 'Digest':
649 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000650
651 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000652 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000653 chal = parse_keqv_list(parse_http_list(challenge))
654 auth = self.get_authorization(req, chal)
655 if auth:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000656 req.add_header(self.header, 'Digest %s' % auth)
Fred Drake13a2c272000-02-10 17:17:14 +0000657 resp = self.parent.open(req)
658 self.__current_realm = None
659 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000660
661 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000662 try:
663 realm = chal['realm']
664 nonce = chal['nonce']
665 algorithm = chal.get('algorithm', 'MD5')
666 # mod_digest doesn't send an opaque, even though it isn't
667 # supposed to be optional
668 opaque = chal.get('opaque', None)
669 except KeyError:
670 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000671
Fred Drake13a2c272000-02-10 17:17:14 +0000672 if self.__current_realm is None:
673 self.__current_realm = realm
674 else:
675 self.__current_realm = realm
676 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000677
Fred Drake13a2c272000-02-10 17:17:14 +0000678 H, KD = self.get_algorithm_impls(algorithm)
679 if H is None:
680 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000681
Fred Drake13a2c272000-02-10 17:17:14 +0000682 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000683 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000684 if user is None:
685 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000686
Fred Drake13a2c272000-02-10 17:17:14 +0000687 # XXX not implemented yet
688 if req.has_data():
689 entdig = self.get_entity_digest(req.get_data(), chal)
690 else:
691 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000692
Fred Drake13a2c272000-02-10 17:17:14 +0000693 A1 = "%s:%s:%s" % (user, realm, pw)
694 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
695 # XXX selector: what about proxies and full urls
696 req.get_selector())
697 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
698 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000699
Fred Drake13a2c272000-02-10 17:17:14 +0000700 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
701 'response="%s"' % (user, realm, nonce, req.get_selector(),
702 respdig)
703 if opaque:
704 base = base + ', opaque="%s"' % opaque
705 if entdig:
706 base = base + ', digest="%s"' % entdig
707 if algorithm != 'MD5':
708 base = base + ', algorithm="%s"' % algorithm
709 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000710
711 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000712 # lambdas assume digest modules are imported at the top level
713 if algorithm == 'MD5':
714 H = lambda x, e=encode_digest:e(md5.new(x).digest())
715 elif algorithm == 'SHA':
716 H = lambda x, e=encode_digest:e(sha.new(x).digest())
717 # XXX MD5-sess
718 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
719 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000720
721 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000722 # XXX not implemented yet
723 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000724
Moshe Zadka8a18e992001-03-01 08:40:42 +0000725
726class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
727 """An authentication protocol defined by RFC 2069
728
729 Digest authentication improves on basic authentication because it
730 does not transmit passwords in the clear.
731 """
732
733 header = 'Authorization'
734
735 def http_error_401(self, req, fp, code, msg, headers):
736 host = urlparse.urlparse(req.get_full_url())[1]
737 self.http_error_auth_reqed('www-authenticate', host, req, headers)
738
739
740class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
741
742 header = 'Proxy-Authorization'
743
744 def http_error_407(self, req, fp, code, msg, headers):
745 host = req.get_host()
746 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
747
748
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000749def encode_digest(digest):
750 hexrep = []
751 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000752 n = (ord(c) >> 4) & 0xf
753 hexrep.append(hex(n)[-1])
754 n = ord(c) & 0xf
755 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000756 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000757
758
Moshe Zadka8a18e992001-03-01 08:40:42 +0000759class AbstractHTTPHandler(BaseHandler):
760
761 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000762 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000763 if not host:
764 raise URLError('no host given')
765
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000766 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000767 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000768 if req.has_data():
769 data = req.get_data()
770 h.putrequest('POST', req.get_selector())
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000771 if not req.headers.has_key('Content-type'):
772 h.putheader('Content-type',
773 'application/x-www-form-urlencoded')
774 if not req.headers.has_key('Content-length'):
775 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000776 else:
777 h.putrequest('GET', req.get_selector())
778 except socket.error, err:
779 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000780
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000781 h.putheader('Host', host)
782 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000783 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000784 for k, v in req.headers.items():
785 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000786 h.endheaders()
787 if req.has_data():
Fred Drakeec3dfde2001-07-04 05:18:29 +0000788 h.send(data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000789
790 code, msg, hdrs = h.getreply()
791 fp = h.getfile()
792 if code == 200:
793 return addinfourl(fp, hdrs, req.get_full_url())
794 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000795 return self.parent.error('http', req, fp, code, msg, hdrs)
796
Moshe Zadka8a18e992001-03-01 08:40:42 +0000797
798class HTTPHandler(AbstractHTTPHandler):
799
800 def http_open(self, req):
801 return self.do_open(httplib.HTTP, req)
802
803
804if hasattr(httplib, 'HTTPS'):
805 class HTTPSHandler(AbstractHTTPHandler):
806
807 def https_open(self, req):
808 return self.do_open(httplib.HTTPS, req)
809
810
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000811class UnknownHandler(BaseHandler):
812 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000813 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000814 raise URLError('unknown url type: %s' % type)
815
816def parse_keqv_list(l):
817 """Parse list of key=value strings where keys are not duplicated."""
818 parsed = {}
819 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000820 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000821 if v[0] == '"' and v[-1] == '"':
822 v = v[1:-1]
823 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000824 return parsed
825
826def parse_http_list(s):
827 """Parse lists as described by RFC 2068 Section 2.
828
829 In particular, parse comman-separated lists where the elements of
830 the list may include quoted-strings. A quoted-string could
831 contain a comma.
832 """
833 # XXX this function could probably use more testing
834
835 list = []
836 end = len(s)
837 i = 0
838 inquote = 0
839 start = 0
840 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000841 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000842 c = cur.find(',')
843 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000844 if c == -1:
845 list.append(s[start:])
846 break
847 if q == -1:
848 if inquote:
849 raise ValueError, "unbalanced quotes"
850 else:
851 list.append(s[start:i+c])
852 i = i + c + 1
853 continue
854 if inquote:
855 if q < c:
856 list.append(s[start:i+c])
857 i = i + c + 1
858 start = i
859 inquote = 0
860 else:
Tim Peterse1190062001-01-15 03:34:38 +0000861 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000862 else:
863 if c < q:
864 list.append(s[start:i+c])
865 i = i + c + 1
866 start = i
867 else:
868 inquote = 1
869 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000870 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000871
872class FileHandler(BaseHandler):
873 # Use local file or FTP depending on form of URL
874 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000875 url = req.get_selector()
876 if url[:2] == '//' and url[2:3] != '/':
877 req.type = 'ftp'
878 return self.parent.open(req)
879 else:
880 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000881
882 # names for the localhost
883 names = None
884 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000885 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000886 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000887 socket.gethostbyname(socket.gethostname()))
888 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000889
890 # not entirely sure what the rules are here
891 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000892 host = req.get_host()
893 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000894 localfile = url2pathname(file)
895 stats = os.stat(localfile)
896 size = stats[stat.ST_SIZE]
897 modified = rfc822.formatdate(stats[stat.ST_MTIME])
898 mtype = mimetypes.guess_type(file)[0]
899 stats = os.stat(localfile)
900 headers = mimetools.Message(StringIO(
901 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
902 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +0000903 if host:
904 host, port = splitport(host)
905 if not host or \
906 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000907 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +0000908 headers, 'file:'+file)
909 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000910
911class FTPHandler(BaseHandler):
912 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000913 host = req.get_host()
914 if not host:
915 raise IOError, ('ftp error', 'no host given')
916 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000917 try:
918 host = socket.gethostbyname(host)
919 except socket.error, msg:
920 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000921 host, port = splitport(host)
922 if port is None:
923 port = ftplib.FTP_PORT
924 path, attrs = splitattr(req.get_selector())
925 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000926 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000927 dirs, file = dirs[:-1], dirs[-1]
928 if dirs and not dirs[0]:
929 dirs = dirs[1:]
930 user = passwd = '' # XXX
931 try:
932 fw = self.connect_ftp(user, passwd, host, port, dirs)
933 type = file and 'I' or 'D'
934 for attr in attrs:
935 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000936 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000937 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000938 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000939 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +0000940 headers = ""
941 mtype = mimetypes.guess_type(req.get_full_url())[0]
942 if mtype:
943 headers += "Content-Type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +0000944 if retrlen is not None and retrlen >= 0:
Guido van Rossum833a8d82001-08-24 13:10:13 +0000945 headers += "Content-Length: %d\n" % retrlen
946 sf = StringIO(headers)
947 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +0000948 return addinfourl(fp, headers, req.get_full_url())
949 except ftplib.all_errors, msg:
950 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000951
952 def connect_ftp(self, user, passwd, host, port, dirs):
953 fw = ftpwrapper(user, passwd, host, port, dirs)
954## fw.ftp.set_debuglevel(1)
955 return fw
956
957class CacheFTPHandler(FTPHandler):
958 # XXX would be nice to have pluggable cache strategies
959 # XXX this stuff is definitely not thread safe
960 def __init__(self):
961 self.cache = {}
962 self.timeout = {}
963 self.soonest = 0
964 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000965 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000966
967 def setTimeout(self, t):
968 self.delay = t
969
970 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000971 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000972
973 def connect_ftp(self, user, passwd, host, port, dirs):
974 key = user, passwd, host, port
975 if self.cache.has_key(key):
976 self.timeout[key] = time.time() + self.delay
977 else:
978 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
979 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000980 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000981 return self.cache[key]
982
983 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000984 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000985 t = time.time()
986 if self.soonest <= t:
987 for k, v in self.timeout.items():
988 if v < t:
989 self.cache[k].close()
990 del self.cache[k]
991 del self.timeout[k]
992 self.soonest = min(self.timeout.values())
993
994 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +0000995 if len(self.cache) == self.max_conns:
996 for k, v in self.timeout.items():
997 if v == self.soonest:
998 del self.cache[k]
999 del self.timeout[k]
1000 break
1001 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001002
1003class GopherHandler(BaseHandler):
1004 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001005 host = req.get_host()
1006 if not host:
1007 raise GopherError('no host given')
1008 host = unquote(host)
1009 selector = req.get_selector()
1010 type, selector = splitgophertype(selector)
1011 selector, query = splitquery(selector)
1012 selector = unquote(selector)
1013 if query:
1014 query = unquote(query)
1015 fp = gopherlib.send_query(selector, query, host)
1016 else:
1017 fp = gopherlib.send_selector(selector, host)
1018 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001019
1020#bleck! don't use this yet
1021class OpenerFactory:
1022
1023 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001024 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001025 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001026 proxy_handlers = [ProxyHandler]
1027 handlers = []
1028 replacement_handlers = []
1029
1030 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001031 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001032
1033 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001034 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001035
1036 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001037 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001038
1039 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001040 opener = OpenerDirector()
Fred Drake13a2c272000-02-10 17:17:14 +00001041 for ph in self.proxy_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001042 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001043 ph = ph()
1044 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001045
1046if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001047 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001048 # are internal to CNRI. Need to set up a public server with the
1049 # right authentication configuration for test purposes.
1050 if socket.gethostname() == 'bitdiddle':
1051 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001052 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001053 localhost = 'localhost'
1054 else:
1055 localhost = None
1056 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001057 # Thanks to Fred for finding these!
1058 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1059 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001060
Fred Drake13a2c272000-02-10 17:17:14 +00001061 'file:/etc/passwd',
1062 'file://nonsensename/etc/passwd',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001063 'ftp://www.python.org/pub/python/misc/sousa.au',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001064 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001065 'http://www.espn.com/', # redirect
1066 'http://www.python.org/Spanish/Inquistion/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001067 ('http://www.python.org/cgi-bin/faqw.py',
Fred Drake13a2c272000-02-10 17:17:14 +00001068 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1069 'http://www.python.org/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001070 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001071 ]
1072
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001073## if localhost is not None:
1074## urls = urls + [
1075## 'file://%s/etc/passwd' % localhost,
1076## 'http://%s/simple/' % localhost,
1077## 'http://%s/digest/' % localhost,
1078## 'http://%s/not/found.h' % localhost,
1079## ]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001080
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001081## bauth = HTTPBasicAuthHandler()
1082## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1083## 'password')
1084## dauth = HTTPDigestAuthHandler()
1085## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1086## 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001087
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001088
1089 cfh = CacheFTPHandler()
1090 cfh.setTimeout(1)
1091
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001092## # XXX try out some custom proxy objects too!
1093## def at_cnri(req):
1094## host = req.get_host()
1095## print host
1096## if host[-18:] == '.cnri.reston.va.us':
1097## return 1
1098## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1099## ph = CustomProxyHandler(p)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001100
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001101## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1102 install_opener(build_opener(cfh, GopherHandler))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001103
1104 for url in urls:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +00001105 if isinstance(url, types.TupleType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001106 url, req = url
1107 else:
1108 req = None
1109 print url
1110 try:
1111 f = urlopen(url, req)
1112 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001113 print "IOError:", err
1114 except socket.error, err:
1115 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001116 else:
1117 buf = f.read()
1118 f.close()
1119 print "read %d bytes" % len(buf)
1120 print
1121 time.sleep(0.1)