blob: 3c2c1048a35ff319ceb02d9ca5979fc33dc9e9ed [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000092import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import re
94import base64
95import types
96import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000097import md5
98import mimetypes
99import mimetools
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000100import rfc822
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000101import ftplib
102import sys
103import time
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000104import os
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000105import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000106import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000107
108try:
109 from cStringIO import StringIO
110except ImportError:
111 from StringIO import StringIO
112
113try:
114 import sha
115except ImportError:
116 # need 1.5.2 final
117 sha = None
118
119# not sure how many of these need to be gotten rid of
120from urllib import unwrap, unquote, splittype, splithost, \
121 addinfourl, splitport, splitgophertype, splitquery, \
122 splitattr, ftpwrapper, noheaders
123
124# support for proxies via environment variables
125from urllib import getproxies
126
127# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000128from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000129
130__version__ = "2.0a1"
131
132_opener = None
133def urlopen(url, data=None):
134 global _opener
135 if _opener is None:
136 _opener = build_opener()
137 return _opener.open(url, data)
138
139def install_opener(opener):
140 global _opener
141 _opener = opener
142
143# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000144# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000145 # subtypes.
146
147class URLError(IOError):
148 # URLError is a sub-type of IOError, but it doesn't share any of
149 # the implementation. need to override __init__ and __str__
150 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000151 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000154 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000155
156class HTTPError(URLError, addinfourl):
157 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000158 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000159
160 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000161 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000162 self.code = code
163 self.msg = msg
164 self.hdrs = hdrs
165 self.fp = fp
166 # XXX
167 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000168
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000170 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000171
172 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000173 # XXX is this safe? what if user catches exception, then
174 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000175 if self.fp:
176 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000177
178class GopherError(URLError):
179 pass
180
Moshe Zadka8a18e992001-03-01 08:40:42 +0000181
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000182class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000183
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000184 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000185 # unwrap('<URL:type://host/path>') --> 'type://host/path'
186 self.__original = unwrap(url)
187 self.type = None
188 # self.__r_type is what's left after doing the splittype
189 self.host = None
190 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000192 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000193 self.headers.update(headers)
194
195 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000196 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000197 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000198 # too complicated and/or unnecessary.
199 # XXX should the __r_XXX attributes be public?
200 if attr[:12] == '_Request__r_':
201 name = attr[12:]
202 if hasattr(Request, 'get_' + name):
203 getattr(self, 'get_' + name)()
204 return getattr(self, attr)
205 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000206
207 def add_data(self, data):
208 self.data = data
209
210 def has_data(self):
211 return self.data is not None
212
213 def get_data(self):
214 return self.data
215
216 def get_full_url(self):
217 return self.__original
218
219 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000220 if self.type is None:
221 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000222 if self.type is None:
223 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000224 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000225
226 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000227 if self.host is None:
228 self.host, self.__r_host = splithost(self.__r_type)
229 if self.host:
230 self.host = unquote(self.host)
231 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000232
233 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000234 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000235
Moshe Zadka8a18e992001-03-01 08:40:42 +0000236 def set_proxy(self, host, type):
237 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000238 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000239
240 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000241 # useful for something like authentication
242 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000243
244class OpenerDirector:
245 def __init__(self):
246 server_version = "Python-urllib/%s" % __version__
247 self.addheaders = [('User-agent', server_version)]
248 # manage the individual handlers
249 self.handlers = []
250 self.handle_open = {}
251 self.handle_error = {}
252
253 def add_handler(self, handler):
254 added = 0
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000255 for meth in dir(handler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000256 if meth[-5:] == '_open':
257 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000258 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000259 self.handle_open[protocol].append(handler)
260 else:
261 self.handle_open[protocol] = [handler]
262 added = 1
263 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000264 i = meth.find('_')
265 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000266 if j != -1 and meth[i+1:j] == 'error':
267 proto = meth[:i]
268 kind = meth[j+1:]
269 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000270 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000271 except ValueError:
272 pass
273 dict = self.handle_error.get(proto, {})
274 if dict.has_key(kind):
275 dict[kind].append(handler)
276 else:
277 dict[kind] = [handler]
278 self.handle_error[proto] = dict
279 added = 1
280 continue
281 if added:
282 self.handlers.append(handler)
283 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000284
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000285 def __del__(self):
286 self.close()
287
288 def close(self):
289 for handler in self.handlers:
290 handler.close()
291 self.handlers = []
292
293 def _call_chain(self, chain, kind, meth_name, *args):
294 # XXX raise an exception if no one else should try to handle
295 # this url. return None if you can't but someone else could.
296 handlers = chain.get(kind, ())
297 for handler in handlers:
298 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000299
300 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000301 if result is not None:
302 return result
303
304 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000305 # accept a URL or a Request object
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000306 if isinstance(fullurl, (types.StringType, types.UnicodeType)):
Fred Drake13a2c272000-02-10 17:17:14 +0000307 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000308 else:
309 req = fullurl
310 if data is not None:
311 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000312 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000313
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000314 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000315 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000316 if result:
317 return result
318
Fred Drake13a2c272000-02-10 17:17:14 +0000319 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000320 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000321 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000322 if result:
323 return result
324
325 return self._call_chain(self.handle_open, 'unknown',
326 'unknown_open', req)
327
328 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000329 if proto in ['http', 'https']:
Fred Draked5214b02001-11-08 17:19:29 +0000330 # XXX http[s] protocols are special-cased
331 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000332 proto = args[2] # YUCK!
333 meth_name = 'http_error_%d' % proto
334 http_err = 1
335 orig_args = args
336 else:
337 dict = self.handle_error
338 meth_name = proto + '_error'
339 http_err = 0
340 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000341 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000342 if result:
343 return result
344
345 if http_err:
346 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000347 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000348
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000349# XXX probably also want an abstract factory that knows things like
350 # the fact that a ProxyHandler needs to get inserted first.
351# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000352 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000353
354def build_opener(*handlers):
355 """Create an opener object from a list of handlers.
356
357 The opener will use several default handlers, including support
358 for HTTP and FTP. If there is a ProxyHandler, it must be at the
359 front of the list of handlers. (Yuck.)
360
361 If any of the handlers passed as arguments are subclasses of the
362 default handlers, the default handlers will not be used.
363 """
Tim Peterse1190062001-01-15 03:34:38 +0000364
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000365 opener = OpenerDirector()
366 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
367 HTTPDefaultErrorHandler, HTTPRedirectHandler,
368 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000369 if hasattr(httplib, 'HTTPS'):
370 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 skip = []
372 for klass in default_classes:
373 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000374 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000375 if issubclass(check, klass):
376 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000377 elif isinstance(check, klass):
378 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000379 for klass in skip:
380 default_classes.remove(klass)
381
382 for klass in default_classes:
383 opener.add_handler(klass())
384
385 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000386 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000387 h = h()
388 opener.add_handler(h)
389 return opener
390
391class BaseHandler:
392 def add_parent(self, parent):
393 self.parent = parent
394 def close(self):
395 self.parent = None
396
397class HTTPDefaultErrorHandler(BaseHandler):
398 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000399 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000400
401class HTTPRedirectHandler(BaseHandler):
402 # Implementation note: To avoid the server sending us into an
403 # infinite loop, the request object needs to track what URLs we
404 # have already seen. Do this by adding a handler-specific
405 # attribute to the Request object.
406 def http_error_302(self, req, fp, code, msg, headers):
407 if headers.has_key('location'):
408 newurl = headers['location']
409 elif headers.has_key('uri'):
410 newurl = headers['uri']
411 else:
412 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000413 newurl = urlparse.urljoin(req.get_full_url(), newurl)
414
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000415 # XXX Probably want to forget about the state of the current
416 # request, although that might interact poorly with other
417 # handlers that also use handler-specific request attributes
Greg Ward2e250b42002-02-11 20:46:10 +0000418 new = Request(newurl, req.get_data(), req.headers)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000419 new.error_302_dict = {}
420 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000421 if len(req.error_302_dict)>10 or \
422 req.error_302_dict.has_key(newurl):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000423 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000424 self.inf_msg + msg, headers, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000425 new.error_302_dict.update(req.error_302_dict)
426 new.error_302_dict[newurl] = newurl
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000427
428 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000429 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000430 fp.read()
431 fp.close()
432
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000433 return self.parent.open(new)
434
435 http_error_301 = http_error_302
436
437 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000438 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000439 "The last 302 error message was:\n"
440
441class ProxyHandler(BaseHandler):
442 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000443 if proxies is None:
444 proxies = getproxies()
445 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
446 self.proxies = proxies
447 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000448 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000449 lambda r, proxy=url, type=type, meth=self.proxy_open: \
450 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000451
452 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000453 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000454 type, r_type = splittype(proxy)
455 host, XXX = splithost(r_type)
456 if '@' in host:
457 user_pass, host = host.split('@', 1)
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000458 user_pass = base64.encodestring(unquote(user_pass)).strip()
459 req.add_header('Proxy-Authorization', 'Basic '+user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000460 host = unquote(host)
461 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000462 if orig_type == type:
463 # let other handlers take care of it
464 # XXX this only makes sense if the proxy is before the
465 # other handlers
466 return None
467 else:
468 # need to start over, because the other handlers don't
469 # grok the proxy's URL type
470 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000471
472# feature suggested by Duncan Booth
473# XXX custom is not a good name
474class CustomProxy:
475 # either pass a function to the constructor or override handle
476 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000477 self.proto = proto
478 self.func = func
479 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000480
481 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000482 if self.func and self.func(req):
483 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000484
485 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000486 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000487
488class CustomProxyHandler(BaseHandler):
489 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000490 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000491
492 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000493 proto = req.get_type()
494 try:
495 proxies = self.proxies[proto]
496 except KeyError:
497 return None
498 for p in proxies:
499 if p.handle(req):
500 req.set_proxy(p.get_proxy())
501 return self.parent.open(req)
502 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000503
504 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000505 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000506
507 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000508 if self.proxies.has_key(cpo.proto):
509 self.proxies[cpo.proto].append(cpo)
510 else:
511 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000512
513class HTTPPasswordMgr:
514 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000515 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000516
517 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000518 # uri could be a single URI or a sequence
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000519 if isinstance(uri, (types.StringType, types.UnicodeType)):
Fred Drake13a2c272000-02-10 17:17:14 +0000520 uri = [uri]
521 uri = tuple(map(self.reduce_uri, uri))
522 if not self.passwd.has_key(realm):
523 self.passwd[realm] = {}
524 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000525
526 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000527 domains = self.passwd.get(realm, {})
528 authuri = self.reduce_uri(authuri)
529 for uris, authinfo in domains.items():
530 for uri in uris:
531 if self.is_suburi(uri, authuri):
532 return authinfo
533 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000534
535 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000536 """Accept netloc or URI and extract only the netloc and path"""
537 parts = urlparse.urlparse(uri)
538 if parts[1]:
539 return parts[1], parts[2] or '/'
540 else:
541 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000542
543 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000544 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000545
Fred Drake13a2c272000-02-10 17:17:14 +0000546 Both args must be URIs in reduced form.
547 """
548 if base == test:
549 return 1
550 if base[0] != test[0]:
551 return 0
Moshe Zadka8a18e992001-03-01 08:40:42 +0000552 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000553 if len(common) == len(base[1]):
554 return 1
555 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000556
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000557
Moshe Zadka8a18e992001-03-01 08:40:42 +0000558class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
559
560 def find_user_password(self, realm, authuri):
561 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
562 if user is not None:
563 return user, password
564 return HTTPPasswordMgr.find_user_password(self, None, authuri)
565
566
567class AbstractBasicAuthHandler:
568
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000569 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
570
571 # XXX there can actually be multiple auth-schemes in a
572 # www-authenticate header. should probably be a lot more careful
573 # in parsing them to extract multiple alternatives
574
Moshe Zadka8a18e992001-03-01 08:40:42 +0000575 def __init__(self, password_mgr=None):
576 if password_mgr is None:
577 password_mgr = HTTPPasswordMgr()
578 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000579 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000580
Moshe Zadka8a18e992001-03-01 08:40:42 +0000581 def http_error_auth_reqed(self, authreq, host, req, headers):
582 # XXX could be multiple headers
583 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000584 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000585 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000586 if mo:
587 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000588 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000589 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000590
Moshe Zadka8a18e992001-03-01 08:40:42 +0000591 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000592 user,pw = self.passwd.find_user_password(realm, host)
593 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000594 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000595 auth = 'Basic %s' % base64.encodestring(raw).strip()
596 if req.headers.get(self.auth_header, None) == auth:
597 return None
598 req.add_header(self.auth_header, auth)
599 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000600 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000601 return None
602
Moshe Zadka8a18e992001-03-01 08:40:42 +0000603class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000604
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000605 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000606
Moshe Zadka8a18e992001-03-01 08:40:42 +0000607 def http_error_401(self, req, fp, code, msg, headers):
608 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000609 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000610 host, req, headers)
611
612
613class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
614
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000615 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000616
617 def http_error_407(self, req, fp, code, msg, headers):
618 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000619 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000620 host, req, headers)
621
622
623class AbstractDigestAuthHandler:
624
625 def __init__(self, passwd=None):
626 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000627 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000628 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000629 self.add_password = self.passwd.add_password
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000630
Moshe Zadka8a18e992001-03-01 08:40:42 +0000631 def http_error_auth_reqed(self, authreq, host, req, headers):
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000632 authreq = headers.get(self.auth_header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000633 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000634 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000635 if kind == 'Digest':
636 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000637
638 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000639 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000640 chal = parse_keqv_list(parse_http_list(challenge))
641 auth = self.get_authorization(req, chal)
642 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000643 auth_val = 'Digest %s' % auth
644 if req.headers.get(self.auth_header, None) == auth_val:
645 return None
646 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000647 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000648 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000649
650 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000651 try:
652 realm = chal['realm']
653 nonce = chal['nonce']
654 algorithm = chal.get('algorithm', 'MD5')
655 # mod_digest doesn't send an opaque, even though it isn't
656 # supposed to be optional
657 opaque = chal.get('opaque', None)
658 except KeyError:
659 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000660
Fred Drake13a2c272000-02-10 17:17:14 +0000661 H, KD = self.get_algorithm_impls(algorithm)
662 if H is None:
663 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000664
Fred Drake13a2c272000-02-10 17:17:14 +0000665 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000666 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000667 if user is None:
668 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000669
Fred Drake13a2c272000-02-10 17:17:14 +0000670 # XXX not implemented yet
671 if req.has_data():
672 entdig = self.get_entity_digest(req.get_data(), chal)
673 else:
674 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675
Fred Drake13a2c272000-02-10 17:17:14 +0000676 A1 = "%s:%s:%s" % (user, realm, pw)
677 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
678 # XXX selector: what about proxies and full urls
679 req.get_selector())
680 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
681 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000682
Fred Drake13a2c272000-02-10 17:17:14 +0000683 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
684 'response="%s"' % (user, realm, nonce, req.get_selector(),
685 respdig)
686 if opaque:
687 base = base + ', opaque="%s"' % opaque
688 if entdig:
689 base = base + ', digest="%s"' % entdig
690 if algorithm != 'MD5':
691 base = base + ', algorithm="%s"' % algorithm
692 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000693
694 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000695 # lambdas assume digest modules are imported at the top level
696 if algorithm == 'MD5':
697 H = lambda x, e=encode_digest:e(md5.new(x).digest())
698 elif algorithm == 'SHA':
699 H = lambda x, e=encode_digest:e(sha.new(x).digest())
700 # XXX MD5-sess
701 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
702 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000703
704 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000705 # XXX not implemented yet
706 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000707
Moshe Zadka8a18e992001-03-01 08:40:42 +0000708
709class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
710 """An authentication protocol defined by RFC 2069
711
712 Digest authentication improves on basic authentication because it
713 does not transmit passwords in the clear.
714 """
715
716 header = 'Authorization'
717
718 def http_error_401(self, req, fp, code, msg, headers):
719 host = urlparse.urlparse(req.get_full_url())[1]
720 self.http_error_auth_reqed('www-authenticate', host, req, headers)
721
722
723class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
724
725 header = 'Proxy-Authorization'
726
727 def http_error_407(self, req, fp, code, msg, headers):
728 host = req.get_host()
729 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
730
731
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000732def encode_digest(digest):
733 hexrep = []
734 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000735 n = (ord(c) >> 4) & 0xf
736 hexrep.append(hex(n)[-1])
737 n = ord(c) & 0xf
738 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000739 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000740
741
Moshe Zadka8a18e992001-03-01 08:40:42 +0000742class AbstractHTTPHandler(BaseHandler):
743
744 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000745 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000746 if not host:
747 raise URLError('no host given')
748
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000749 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000750 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000751 if req.has_data():
752 data = req.get_data()
753 h.putrequest('POST', req.get_selector())
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000754 if not req.headers.has_key('Content-type'):
755 h.putheader('Content-type',
756 'application/x-www-form-urlencoded')
757 if not req.headers.has_key('Content-length'):
758 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000759 else:
760 h.putrequest('GET', req.get_selector())
761 except socket.error, err:
762 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000763
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000764 h.putheader('Host', host)
765 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000766 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000767 for k, v in req.headers.items():
768 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000769 h.endheaders()
770 if req.has_data():
Fred Drakeec3dfde2001-07-04 05:18:29 +0000771 h.send(data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000772
773 code, msg, hdrs = h.getreply()
774 fp = h.getfile()
775 if code == 200:
776 return addinfourl(fp, hdrs, req.get_full_url())
777 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000778 return self.parent.error('http', req, fp, code, msg, hdrs)
779
Moshe Zadka8a18e992001-03-01 08:40:42 +0000780
781class HTTPHandler(AbstractHTTPHandler):
782
783 def http_open(self, req):
784 return self.do_open(httplib.HTTP, req)
785
786
787if hasattr(httplib, 'HTTPS'):
788 class HTTPSHandler(AbstractHTTPHandler):
789
790 def https_open(self, req):
791 return self.do_open(httplib.HTTPS, req)
792
793
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000794class UnknownHandler(BaseHandler):
795 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000796 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000797 raise URLError('unknown url type: %s' % type)
798
799def parse_keqv_list(l):
800 """Parse list of key=value strings where keys are not duplicated."""
801 parsed = {}
802 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000803 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000804 if v[0] == '"' and v[-1] == '"':
805 v = v[1:-1]
806 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000807 return parsed
808
809def parse_http_list(s):
810 """Parse lists as described by RFC 2068 Section 2.
811
812 In particular, parse comman-separated lists where the elements of
813 the list may include quoted-strings. A quoted-string could
814 contain a comma.
815 """
816 # XXX this function could probably use more testing
817
818 list = []
819 end = len(s)
820 i = 0
821 inquote = 0
822 start = 0
823 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000824 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000825 c = cur.find(',')
826 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000827 if c == -1:
828 list.append(s[start:])
829 break
830 if q == -1:
831 if inquote:
832 raise ValueError, "unbalanced quotes"
833 else:
834 list.append(s[start:i+c])
835 i = i + c + 1
836 continue
837 if inquote:
838 if q < c:
839 list.append(s[start:i+c])
840 i = i + c + 1
841 start = i
842 inquote = 0
843 else:
Tim Peterse1190062001-01-15 03:34:38 +0000844 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000845 else:
846 if c < q:
847 list.append(s[start:i+c])
848 i = i + c + 1
849 start = i
850 else:
851 inquote = 1
852 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000853 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000854
855class FileHandler(BaseHandler):
856 # Use local file or FTP depending on form of URL
857 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000858 url = req.get_selector()
859 if url[:2] == '//' and url[2:3] != '/':
860 req.type = 'ftp'
861 return self.parent.open(req)
862 else:
863 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000864
865 # names for the localhost
866 names = None
867 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000868 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000869 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000870 socket.gethostbyname(socket.gethostname()))
871 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000872
873 # not entirely sure what the rules are here
874 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000875 host = req.get_host()
876 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000877 localfile = url2pathname(file)
878 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +0000879 size = stats.st_size
880 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000881 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000882 headers = mimetools.Message(StringIO(
883 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
884 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +0000885 if host:
886 host, port = splitport(host)
887 if not host or \
888 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000889 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +0000890 headers, 'file:'+file)
891 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000892
893class FTPHandler(BaseHandler):
894 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000895 host = req.get_host()
896 if not host:
897 raise IOError, ('ftp error', 'no host given')
898 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000899 try:
900 host = socket.gethostbyname(host)
901 except socket.error, msg:
902 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000903 host, port = splitport(host)
904 if port is None:
905 port = ftplib.FTP_PORT
906 path, attrs = splitattr(req.get_selector())
907 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000908 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000909 dirs, file = dirs[:-1], dirs[-1]
910 if dirs and not dirs[0]:
911 dirs = dirs[1:]
912 user = passwd = '' # XXX
913 try:
914 fw = self.connect_ftp(user, passwd, host, port, dirs)
915 type = file and 'I' or 'D'
916 for attr in attrs:
917 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000918 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000919 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000920 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000921 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +0000922 headers = ""
923 mtype = mimetypes.guess_type(req.get_full_url())[0]
924 if mtype:
925 headers += "Content-Type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +0000926 if retrlen is not None and retrlen >= 0:
Guido van Rossum833a8d82001-08-24 13:10:13 +0000927 headers += "Content-Length: %d\n" % retrlen
928 sf = StringIO(headers)
929 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +0000930 return addinfourl(fp, headers, req.get_full_url())
931 except ftplib.all_errors, msg:
932 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000933
934 def connect_ftp(self, user, passwd, host, port, dirs):
935 fw = ftpwrapper(user, passwd, host, port, dirs)
936## fw.ftp.set_debuglevel(1)
937 return fw
938
939class CacheFTPHandler(FTPHandler):
940 # XXX would be nice to have pluggable cache strategies
941 # XXX this stuff is definitely not thread safe
942 def __init__(self):
943 self.cache = {}
944 self.timeout = {}
945 self.soonest = 0
946 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000947 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000948
949 def setTimeout(self, t):
950 self.delay = t
951
952 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000953 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000954
955 def connect_ftp(self, user, passwd, host, port, dirs):
956 key = user, passwd, host, port
957 if self.cache.has_key(key):
958 self.timeout[key] = time.time() + self.delay
959 else:
960 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
961 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000962 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000963 return self.cache[key]
964
965 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000966 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000967 t = time.time()
968 if self.soonest <= t:
969 for k, v in self.timeout.items():
970 if v < t:
971 self.cache[k].close()
972 del self.cache[k]
973 del self.timeout[k]
974 self.soonest = min(self.timeout.values())
975
976 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +0000977 if len(self.cache) == self.max_conns:
978 for k, v in self.timeout.items():
979 if v == self.soonest:
980 del self.cache[k]
981 del self.timeout[k]
982 break
983 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000984
985class GopherHandler(BaseHandler):
986 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000987 host = req.get_host()
988 if not host:
989 raise GopherError('no host given')
990 host = unquote(host)
991 selector = req.get_selector()
992 type, selector = splitgophertype(selector)
993 selector, query = splitquery(selector)
994 selector = unquote(selector)
995 if query:
996 query = unquote(query)
997 fp = gopherlib.send_query(selector, query, host)
998 else:
999 fp = gopherlib.send_selector(selector, host)
1000 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001001
1002#bleck! don't use this yet
1003class OpenerFactory:
1004
1005 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001006 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001007 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001008 proxy_handlers = [ProxyHandler]
1009 handlers = []
1010 replacement_handlers = []
1011
1012 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001013 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001014
1015 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001016 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001017
1018 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001019 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001020
1021 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001022 opener = OpenerDirector()
Fred Drake13a2c272000-02-10 17:17:14 +00001023 for ph in self.proxy_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001024 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001025 ph = ph()
1026 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001027
1028if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001029 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001030 # are internal to CNRI. Need to set up a public server with the
1031 # right authentication configuration for test purposes.
1032 if socket.gethostname() == 'bitdiddle':
1033 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001034 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001035 localhost = 'localhost'
1036 else:
1037 localhost = None
1038 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001039 # Thanks to Fred for finding these!
1040 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1041 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001042
Fred Drake13a2c272000-02-10 17:17:14 +00001043 'file:/etc/passwd',
1044 'file://nonsensename/etc/passwd',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001045 'ftp://www.python.org/pub/python/misc/sousa.au',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001046 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001047 'http://www.espn.com/', # redirect
1048 'http://www.python.org/Spanish/Inquistion/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001049 ('http://www.python.org/cgi-bin/faqw.py',
Fred Drake13a2c272000-02-10 17:17:14 +00001050 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1051 'http://www.python.org/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001052 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001053 ]
1054
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001055## if localhost is not None:
1056## urls = urls + [
1057## 'file://%s/etc/passwd' % localhost,
1058## 'http://%s/simple/' % localhost,
1059## 'http://%s/digest/' % localhost,
1060## 'http://%s/not/found.h' % localhost,
1061## ]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001062
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001063## bauth = HTTPBasicAuthHandler()
1064## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1065## 'password')
1066## dauth = HTTPDigestAuthHandler()
1067## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1068## 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001069
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001070
1071 cfh = CacheFTPHandler()
1072 cfh.setTimeout(1)
1073
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001074## # XXX try out some custom proxy objects too!
1075## def at_cnri(req):
1076## host = req.get_host()
1077## print host
1078## if host[-18:] == '.cnri.reston.va.us':
1079## return 1
1080## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1081## ph = CustomProxyHandler(p)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001082
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001083## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1084 install_opener(build_opener(cfh, GopherHandler))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001085
1086 for url in urls:
Jeremy Hyltond5d8fc52001-08-11 21:44:46 +00001087 if isinstance(url, types.TupleType):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001088 url, req = url
1089 else:
1090 req = None
1091 print url
1092 try:
1093 f = urlopen(url, req)
1094 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001095 print "IOError:", err
1096 except socket.error, err:
1097 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001098 else:
1099 buf = f.read()
1100 f.close()
1101 print "read %d bytes" % len(buf)
1102 print
1103 time.sleep(0.1)