blob: a3ff482b7423c71eb9c6513a14526dd5f5929fe1 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirectory manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Tim Peterse1190062001-01-15 03:34:38 +000060# build a new opener that adds authentication and caching FTP handlers
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000061opener = urllib2.build_opener(authinfo, urllib2.CacheFTPHandler)
62
63# install it
64urllib2.install_opener(opener)
65
66f = urllib2.urlopen('http://www.python.org/')
67
68
69"""
70
71# XXX issues:
72# If an authentication error handler that tries to perform
73 # authentication for some reason but fails, how should the error be
74 # signalled? The client needs to know the HTTP error code. But if
75 # the handler knows that the problem was, e.g., that it didn't know
Tim Peterse1190062001-01-15 03:34:38 +000076 # that hash algo that requested in the challenge, it would be good to
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000077 # pass that information along to the client, too.
78
79# XXX to do:
80# name!
81# documentation (getting there)
82# complex proxies
83# abstract factory for opener
84# ftp errors aren't handled cleanly
85# gopher can return a socket.error
86# check digest against correct (i.e. non-apache) implementation
87
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000088import socket
89import UserDict
90import httplib
91import re
92import base64
93import types
94import urlparse
95import os
96import md5
97import mimetypes
98import mimetools
99import ftplib
100import sys
101import time
102import gopherlib
103
104try:
105 from cStringIO import StringIO
106except ImportError:
107 from StringIO import StringIO
108
109try:
110 import sha
111except ImportError:
112 # need 1.5.2 final
113 sha = None
114
115# not sure how many of these need to be gotten rid of
116from urllib import unwrap, unquote, splittype, splithost, \
117 addinfourl, splitport, splitgophertype, splitquery, \
118 splitattr, ftpwrapper, noheaders
119
120# support for proxies via environment variables
121from urllib import getproxies
122
123# support for FileHandler
124from urllib import localhost, thishost, url2pathname, pathname2url
125
126# support for GopherHandler
127from urllib import splitgophertype, splitquery
128
129__version__ = "2.0a1"
130
131_opener = None
132def urlopen(url, data=None):
133 global _opener
134 if _opener is None:
135 _opener = build_opener()
136 return _opener.open(url, data)
137
138def install_opener(opener):
139 global _opener
140 _opener = opener
141
142# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000143# make sure all of the IOError stuff is overridden. we just want to be
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000144 # subtypes.
145
146class URLError(IOError):
147 # URLError is a sub-type of IOError, but it doesn't share any of
148 # the implementation. need to override __init__ and __str__
149 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000150 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000153 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000154
155class HTTPError(URLError, addinfourl):
156 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000157 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000158
159 def __init__(self, url, code, msg, hdrs, fp):
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000160 self.__super_init(fp, hdrs, url)
Fred Drake13a2c272000-02-10 17:17:14 +0000161 self.code = code
162 self.msg = msg
163 self.hdrs = hdrs
164 self.fp = fp
165 # XXX
166 self.filename = url
Tim Peterse1190062001-01-15 03:34:38 +0000167
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000168 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000169 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000170
171 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000172 # XXX is this safe? what if user catches exception, then
173 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000174 if self.fp:
175 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000176
177class GopherError(URLError):
178 pass
179
180class Request:
181 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000182 # unwrap('<URL:type://host/path>') --> 'type://host/path'
183 self.__original = unwrap(url)
184 self.type = None
185 # self.__r_type is what's left after doing the splittype
186 self.host = None
187 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000188 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000189 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000190 self.headers.update(headers)
191
192 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000193 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000194 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000195 # too complicated and/or unnecessary.
196 # XXX should the __r_XXX attributes be public?
197 if attr[:12] == '_Request__r_':
198 name = attr[12:]
199 if hasattr(Request, 'get_' + name):
200 getattr(self, 'get_' + name)()
201 return getattr(self, attr)
202 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000203
204 def add_data(self, data):
205 self.data = data
206
207 def has_data(self):
208 return self.data is not None
209
210 def get_data(self):
211 return self.data
212
213 def get_full_url(self):
214 return self.__original
215
216 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000217 if self.type is None:
218 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000219 assert self.type is not None, self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000220 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000221
222 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000223 if self.host is None:
224 self.host, self.__r_host = splithost(self.__r_type)
225 if self.host:
226 self.host = unquote(self.host)
227 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000228
229 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000230 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000231
232 def set_proxy(self, proxy):
Fred Drake13a2c272000-02-10 17:17:14 +0000233 self.__proxy = proxy
234 # XXX this code is based on urllib, but it doesn't seem
235 # correct. specifically, if the proxy has a port number then
236 # splittype will return the hostname as the type and the port
237 # will be include with everything else
238 self.type, self.__r_type = splittype(self.__proxy)
239 self.host, XXX = splithost(self.__r_type)
240 self.host = unquote(self.host)
241 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000242
243 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000244 # useful for something like authentication
245 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000246
247class OpenerDirector:
248 def __init__(self):
249 server_version = "Python-urllib/%s" % __version__
250 self.addheaders = [('User-agent', server_version)]
251 # manage the individual handlers
252 self.handlers = []
253 self.handle_open = {}
254 self.handle_error = {}
255
256 def add_handler(self, handler):
257 added = 0
258 for meth in get_methods(handler):
259 if meth[-5:] == '_open':
260 protocol = meth[:-5]
Tim Peterse1190062001-01-15 03:34:38 +0000261 if self.handle_open.has_key(protocol):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000262 self.handle_open[protocol].append(handler)
263 else:
264 self.handle_open[protocol] = [handler]
265 added = 1
266 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000267 i = meth.find('_')
268 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000269 if j != -1 and meth[i+1:j] == 'error':
270 proto = meth[:i]
271 kind = meth[j+1:]
272 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000273 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000274 except ValueError:
275 pass
276 dict = self.handle_error.get(proto, {})
277 if dict.has_key(kind):
278 dict[kind].append(handler)
279 else:
280 dict[kind] = [handler]
281 self.handle_error[proto] = dict
282 added = 1
283 continue
284 if added:
285 self.handlers.append(handler)
286 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000287
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000288 def __del__(self):
289 self.close()
290
291 def close(self):
292 for handler in self.handlers:
293 handler.close()
294 self.handlers = []
295
296 def _call_chain(self, chain, kind, meth_name, *args):
297 # XXX raise an exception if no one else should try to handle
298 # this url. return None if you can't but someone else could.
299 handlers = chain.get(kind, ())
300 for handler in handlers:
301 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000302
303 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000304 if result is not None:
305 return result
306
307 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000308 # accept a URL or a Request object
309 if type(fullurl) == types.StringType:
310 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000311 else:
312 req = fullurl
313 if data is not None:
314 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000315 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000316
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000317 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000318 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 if result:
320 return result
321
Fred Drake13a2c272000-02-10 17:17:14 +0000322 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000324 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000325 if result:
326 return result
327
328 return self._call_chain(self.handle_open, 'unknown',
329 'unknown_open', req)
330
331 def error(self, proto, *args):
332 if proto == 'http':
333 # XXX http protocol is special cased
334 dict = self.handle_error[proto]
335 proto = args[2] # YUCK!
336 meth_name = 'http_error_%d' % proto
337 http_err = 1
338 orig_args = args
339 else:
340 dict = self.handle_error
341 meth_name = proto + '_error'
342 http_err = 0
343 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000344 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345 if result:
346 return result
347
348 if http_err:
349 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000350 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000351
352def is_callable(obj):
353 # not quite like builtin callable (which I didn't know existed),
354 # not entirely sure it needs to be different
355 if type(obj) in (types.BuiltinFunctionType,
Fred Drake13a2c272000-02-10 17:17:14 +0000356 types.BuiltinMethodType, types.LambdaType,
357 types.MethodType):
358 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000359 if type(obj) == types.InstanceType:
Fred Drake13a2c272000-02-10 17:17:14 +0000360 return hasattr(obj, '__call__')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000361 return 0
362
363def get_methods(inst):
364 methods = {}
365 classes = []
366 classes.append(inst.__class__)
367 while classes:
368 klass = classes[0]
369 del classes[0]
370 classes = classes + list(klass.__bases__)
371 for name in dir(klass):
372 attr = getattr(klass, name)
373 if type(attr) == types.UnboundMethodType:
374 methods[name] = 1
375 for name in dir(inst):
Fred Drake13a2c272000-02-10 17:17:14 +0000376 if is_callable(getattr(inst, name)):
377 methods[name] = 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000378 return methods.keys()
379
380# XXX probably also want an abstract factory that knows things like
381 # the fact that a ProxyHandler needs to get inserted first.
382# would also know when it makes sense to skip a superclass in favor of
Tim Peterse1190062001-01-15 03:34:38 +0000383 # a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000384
385def build_opener(*handlers):
386 """Create an opener object from a list of handlers.
387
388 The opener will use several default handlers, including support
389 for HTTP and FTP. If there is a ProxyHandler, it must be at the
390 front of the list of handlers. (Yuck.)
391
392 If any of the handlers passed as arguments are subclasses of the
393 default handlers, the default handlers will not be used.
394 """
Tim Peterse1190062001-01-15 03:34:38 +0000395
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000396 opener = OpenerDirector()
397 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
398 HTTPDefaultErrorHandler, HTTPRedirectHandler,
399 FTPHandler, FileHandler]
400 skip = []
401 for klass in default_classes:
402 for check in handlers:
403 if type(check) == types.ClassType:
404 if issubclass(check, klass):
405 skip.append(klass)
406 elif type(check) == types.InstanceType:
407 if isinstance(check, klass):
408 skip.append(klass)
409 for klass in skip:
410 default_classes.remove(klass)
411
412 for klass in default_classes:
413 opener.add_handler(klass())
414
415 for h in handlers:
416 if type(h) == types.ClassType:
417 h = h()
418 opener.add_handler(h)
419 return opener
420
421class BaseHandler:
422 def add_parent(self, parent):
423 self.parent = parent
424 def close(self):
425 self.parent = None
426
427class HTTPDefaultErrorHandler(BaseHandler):
428 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000429 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000430
431class HTTPRedirectHandler(BaseHandler):
432 # Implementation note: To avoid the server sending us into an
433 # infinite loop, the request object needs to track what URLs we
434 # have already seen. Do this by adding a handler-specific
435 # attribute to the Request object.
436 def http_error_302(self, req, fp, code, msg, headers):
437 if headers.has_key('location'):
438 newurl = headers['location']
439 elif headers.has_key('uri'):
440 newurl = headers['uri']
441 else:
442 return
443 nil = fp.read()
444 fp.close()
445
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000446 newurl = urlparse.urljoin(req.get_full_url(), newurl)
447
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000448 # XXX Probably want to forget about the state of the current
449 # request, although that might interact poorly with other
450 # handlers that also use handler-specific request attributes
451 new = Request(newurl, req.get_data())
452 new.error_302_dict = {}
453 if hasattr(req, 'error_302_dict'):
454 if req.error_302_dict.has_key(newurl):
455 raise HTTPError(req.get_full_url(), code,
456 self.inf_msg + msg, headers)
457 new.error_302_dict.update(req.error_302_dict)
458 new.error_302_dict[newurl] = newurl
459 return self.parent.open(new)
460
461 http_error_301 = http_error_302
462
463 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000464 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000465 "The last 302 error message was:\n"
466
467class ProxyHandler(BaseHandler):
468 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000469 if proxies is None:
470 proxies = getproxies()
471 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
472 self.proxies = proxies
473 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000474 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000475 lambda r, proxy=url, type=type, meth=self.proxy_open: \
476 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000477
478 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000479 orig_type = req.get_type()
480 req.set_proxy(proxy)
481 if orig_type == type:
482 # let other handlers take care of it
483 # XXX this only makes sense if the proxy is before the
484 # other handlers
485 return None
486 else:
487 # need to start over, because the other handlers don't
488 # grok the proxy's URL type
489 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000490
491# feature suggested by Duncan Booth
492# XXX custom is not a good name
493class CustomProxy:
494 # either pass a function to the constructor or override handle
495 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000496 self.proto = proto
497 self.func = func
498 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000499
500 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000501 if self.func and self.func(req):
502 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000503
504 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000505 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000506
507class CustomProxyHandler(BaseHandler):
508 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000509 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000510
511 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000512 proto = req.get_type()
513 try:
514 proxies = self.proxies[proto]
515 except KeyError:
516 return None
517 for p in proxies:
518 if p.handle(req):
519 req.set_proxy(p.get_proxy())
520 return self.parent.open(req)
521 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000522
523 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000524 p
525 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000526
527 def add_proxy(self, cpo):
Fred Drake13a2c272000-02-10 17:17:14 +0000528 if self.proxies.has_key(cpo.proto):
529 self.proxies[cpo.proto].append(cpo)
530 else:
531 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000532
533class HTTPPasswordMgr:
534 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000535 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000536
537 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000538 # uri could be a single URI or a sequence
539 if type(uri) == types.StringType:
540 uri = [uri]
541 uri = tuple(map(self.reduce_uri, uri))
542 if not self.passwd.has_key(realm):
543 self.passwd[realm] = {}
544 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000545
546 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000547 domains = self.passwd.get(realm, {})
548 authuri = self.reduce_uri(authuri)
549 for uris, authinfo in domains.items():
550 for uri in uris:
551 if self.is_suburi(uri, authuri):
552 return authinfo
553 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000554
555 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000556 """Accept netloc or URI and extract only the netloc and path"""
557 parts = urlparse.urlparse(uri)
558 if parts[1]:
559 return parts[1], parts[2] or '/'
560 else:
561 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000562
563 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000564 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000565
Fred Drake13a2c272000-02-10 17:17:14 +0000566 Both args must be URIs in reduced form.
567 """
568 if base == test:
569 return 1
570 if base[0] != test[0]:
571 return 0
572 common = os.path.commonprefix((base[1], test[1]))
573 if len(common) == len(base[1]):
574 return 1
575 return 0
Tim Peterse1190062001-01-15 03:34:38 +0000576
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000577
578class HTTPBasicAuthHandler(BaseHandler):
579 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
580
581 # XXX there can actually be multiple auth-schemes in a
582 # www-authenticate header. should probably be a lot more careful
583 # in parsing them to extract multiple alternatives
584
585 def __init__(self):
586 self.passwd = HTTPPasswordMgr()
Fred Drake13a2c272000-02-10 17:17:14 +0000587 self.add_password = self.passwd.add_password
588 self.__current_realm = None
589 # if __current_realm is not None, then the server must have
590 # refused our name/password and is asking for authorization
591 # again. must be careful to set it to None on successful
Tim Peterse1190062001-01-15 03:34:38 +0000592 # return.
593
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000594 def http_error_401(self, req, fp, code, msg, headers):
Fred Drake13a2c272000-02-10 17:17:14 +0000595 # XXX could be mult. headers
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000596 authreq = headers.get('www-authenticate', None)
597 if authreq:
598 mo = HTTPBasicAuthHandler.rx.match(authreq)
599 if mo:
600 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000601 if scheme.lower() == 'basic':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000602 return self.retry_http_basic_auth(req, realm)
603
604 def retry_http_basic_auth(self, req, realm):
Fred Drake13a2c272000-02-10 17:17:14 +0000605 if self.__current_realm is None:
606 self.__current_realm = realm
607 else:
608 self.__current_realm = realm
609 return None
610 # XXX host isn't really the correct URI?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000611 host = req.get_host()
612 user,pw = self.passwd.find_user_password(realm, host)
613 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000614 raw = "%s:%s" % (user, pw)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000615 auth = base64.encodestring(raw).strip()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000616 req.add_header('Authorization', 'Basic %s' % auth)
617 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000618 self.__current_realm = None
619 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000620 else:
Fred Drake13a2c272000-02-10 17:17:14 +0000621 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000622 return None
623
624class HTTPDigestAuthHandler(BaseHandler):
625 """An authentication protocol defined by RFC 2069
626
627 Digest authentication improves on basic authentication because it
628 does not transmit passwords in the clear.
629 """
630
631 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000632 self.passwd = HTTPPasswordMgr()
633 self.add_password = self.passwd.add_password
634 self.__current_realm = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000635
636 def http_error_401(self, req, fp, code, msg, headers):
Fred Drake13a2c272000-02-10 17:17:14 +0000637 # XXX could be mult. headers
638 authreq = headers.get('www-authenticate', None)
639 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000640 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000641 if kind == 'Digest':
642 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000643
644 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000645 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000646 chal = parse_keqv_list(parse_http_list(challenge))
647 auth = self.get_authorization(req, chal)
648 if auth:
649 req.add_header('Authorization', 'Digest %s' % auth)
650 resp = self.parent.open(req)
651 self.__current_realm = None
652 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000653
654 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000655 try:
656 realm = chal['realm']
657 nonce = chal['nonce']
658 algorithm = chal.get('algorithm', 'MD5')
659 # mod_digest doesn't send an opaque, even though it isn't
660 # supposed to be optional
661 opaque = chal.get('opaque', None)
662 except KeyError:
663 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000664
Fred Drake13a2c272000-02-10 17:17:14 +0000665 if self.__current_realm is None:
666 self.__current_realm = realm
667 else:
668 self.__current_realm = realm
669 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000670
Fred Drake13a2c272000-02-10 17:17:14 +0000671 H, KD = self.get_algorithm_impls(algorithm)
672 if H is None:
673 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000674
Fred Drake13a2c272000-02-10 17:17:14 +0000675 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000676 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000677 if user is None:
678 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000679
Fred Drake13a2c272000-02-10 17:17:14 +0000680 # XXX not implemented yet
681 if req.has_data():
682 entdig = self.get_entity_digest(req.get_data(), chal)
683 else:
684 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000685
Fred Drake13a2c272000-02-10 17:17:14 +0000686 A1 = "%s:%s:%s" % (user, realm, pw)
687 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
688 # XXX selector: what about proxies and full urls
689 req.get_selector())
690 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
691 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000692
Fred Drake13a2c272000-02-10 17:17:14 +0000693 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
694 'response="%s"' % (user, realm, nonce, req.get_selector(),
695 respdig)
696 if opaque:
697 base = base + ', opaque="%s"' % opaque
698 if entdig:
699 base = base + ', digest="%s"' % entdig
700 if algorithm != 'MD5':
701 base = base + ', algorithm="%s"' % algorithm
702 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000703
704 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000705 # lambdas assume digest modules are imported at the top level
706 if algorithm == 'MD5':
707 H = lambda x, e=encode_digest:e(md5.new(x).digest())
708 elif algorithm == 'SHA':
709 H = lambda x, e=encode_digest:e(sha.new(x).digest())
710 # XXX MD5-sess
711 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
712 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000713
714 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000715 # XXX not implemented yet
716 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000717
718def encode_digest(digest):
719 hexrep = []
720 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000721 n = (ord(c) >> 4) & 0xf
722 hexrep.append(hex(n)[-1])
723 n = ord(c) & 0xf
724 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000725 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000726
727
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000728class HTTPHandler(BaseHandler):
729 def http_open(self, req):
730 # XXX devise a new mechanism to specify user/password
Fred Drake13a2c272000-02-10 17:17:14 +0000731 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000732 if not host:
733 raise URLError('no host given')
734
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000735 try:
736 h = httplib.HTTP(host) # will parse host:port
737 if req.has_data():
738 data = req.get_data()
739 h.putrequest('POST', req.get_selector())
740 h.putheader('Content-type',
741 'application/x-www-form-urlencoded')
742 h.putheader('Content-length', '%d' % len(data))
743 else:
744 h.putrequest('GET', req.get_selector())
745 except socket.error, err:
746 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000747
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000748 # XXX proxies would have different host here
749 h.putheader('Host', host)
750 for args in self.parent.addheaders:
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000751 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000752 for k, v in req.headers.items():
753 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000754 h.endheaders()
755 if req.has_data():
756 h.send(data + '\r\n')
757
758 code, msg, hdrs = h.getreply()
759 fp = h.getfile()
760 if code == 200:
761 return addinfourl(fp, hdrs, req.get_full_url())
762 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000763 return self.parent.error('http', req, fp, code, msg, hdrs)
764
765class UnknownHandler(BaseHandler):
766 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000767 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000768 raise URLError('unknown url type: %s' % type)
769
770def parse_keqv_list(l):
771 """Parse list of key=value strings where keys are not duplicated."""
772 parsed = {}
773 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000774 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000775 if v[0] == '"' and v[-1] == '"':
776 v = v[1:-1]
777 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000778 return parsed
779
780def parse_http_list(s):
781 """Parse lists as described by RFC 2068 Section 2.
782
783 In particular, parse comman-separated lists where the elements of
784 the list may include quoted-strings. A quoted-string could
785 contain a comma.
786 """
787 # XXX this function could probably use more testing
788
789 list = []
790 end = len(s)
791 i = 0
792 inquote = 0
793 start = 0
794 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000795 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000796 c = cur.find(',')
797 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000798 if c == -1:
799 list.append(s[start:])
800 break
801 if q == -1:
802 if inquote:
803 raise ValueError, "unbalanced quotes"
804 else:
805 list.append(s[start:i+c])
806 i = i + c + 1
807 continue
808 if inquote:
809 if q < c:
810 list.append(s[start:i+c])
811 i = i + c + 1
812 start = i
813 inquote = 0
814 else:
Tim Peterse1190062001-01-15 03:34:38 +0000815 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000816 else:
817 if c < q:
818 list.append(s[start:i+c])
819 i = i + c + 1
820 start = i
821 else:
822 inquote = 1
823 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000824 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000825
826class FileHandler(BaseHandler):
827 # Use local file or FTP depending on form of URL
828 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000829 url = req.get_selector()
830 if url[:2] == '//' and url[2:3] != '/':
831 req.type = 'ftp'
832 return self.parent.open(req)
833 else:
834 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000835
836 # names for the localhost
837 names = None
838 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000839 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000840 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000841 socket.gethostbyname(socket.gethostname()))
842 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000843
844 # not entirely sure what the rules are here
845 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000846 mtype = mimetypes.guess_type(req.get_selector())[0]
847 headers = mimetools.Message(StringIO('Content-Type: %s\n' \
848 % (mtype or 'text/plain')))
849 host = req.get_host()
850 file = req.get_selector()
851 if host:
852 host, port = splitport(host)
853 if not host or \
854 (not port and socket.gethostbyname(host) in self.get_names()):
855 return addinfourl(open(url2pathname(file), 'rb'),
856 headers, 'file:'+file)
857 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000858
859class FTPHandler(BaseHandler):
860 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000861 host = req.get_host()
862 if not host:
863 raise IOError, ('ftp error', 'no host given')
864 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000865 try:
866 host = socket.gethostbyname(host)
867 except socket.error, msg:
868 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000869 host, port = splitport(host)
870 if port is None:
871 port = ftplib.FTP_PORT
872 path, attrs = splitattr(req.get_selector())
873 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000874 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000875 dirs, file = dirs[:-1], dirs[-1]
876 if dirs and not dirs[0]:
877 dirs = dirs[1:]
878 user = passwd = '' # XXX
879 try:
880 fw = self.connect_ftp(user, passwd, host, port, dirs)
881 type = file and 'I' or 'D'
882 for attr in attrs:
883 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000884 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000885 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000886 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000887 fp, retrlen = fw.retrfile(file, type)
888 if retrlen is not None and retrlen >= 0:
889 sf = StringIO('Content-Length: %d\n' % retrlen)
890 headers = mimetools.Message(sf)
891 else:
892 headers = noheaders()
893 return addinfourl(fp, headers, req.get_full_url())
894 except ftplib.all_errors, msg:
895 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000896
897 def connect_ftp(self, user, passwd, host, port, dirs):
898 fw = ftpwrapper(user, passwd, host, port, dirs)
899## fw.ftp.set_debuglevel(1)
900 return fw
901
902class CacheFTPHandler(FTPHandler):
903 # XXX would be nice to have pluggable cache strategies
904 # XXX this stuff is definitely not thread safe
905 def __init__(self):
906 self.cache = {}
907 self.timeout = {}
908 self.soonest = 0
909 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000910 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000911
912 def setTimeout(self, t):
913 self.delay = t
914
915 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000916 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000917
918 def connect_ftp(self, user, passwd, host, port, dirs):
919 key = user, passwd, host, port
920 if self.cache.has_key(key):
921 self.timeout[key] = time.time() + self.delay
922 else:
923 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
924 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000925 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000926 return self.cache[key]
927
928 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000929 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000930 t = time.time()
931 if self.soonest <= t:
932 for k, v in self.timeout.items():
933 if v < t:
934 self.cache[k].close()
935 del self.cache[k]
936 del self.timeout[k]
937 self.soonest = min(self.timeout.values())
938
939 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +0000940 if len(self.cache) == self.max_conns:
941 for k, v in self.timeout.items():
942 if v == self.soonest:
943 del self.cache[k]
944 del self.timeout[k]
945 break
946 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000947
948class GopherHandler(BaseHandler):
949 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000950 host = req.get_host()
951 if not host:
952 raise GopherError('no host given')
953 host = unquote(host)
954 selector = req.get_selector()
955 type, selector = splitgophertype(selector)
956 selector, query = splitquery(selector)
957 selector = unquote(selector)
958 if query:
959 query = unquote(query)
960 fp = gopherlib.send_query(selector, query, host)
961 else:
962 fp = gopherlib.send_selector(selector, host)
963 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000964
965#bleck! don't use this yet
966class OpenerFactory:
967
968 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +0000969 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +0000970 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000971 proxy_handlers = [ProxyHandler]
972 handlers = []
973 replacement_handlers = []
974
975 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +0000976 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000977
978 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +0000979 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000980
981 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +0000982 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000983
984 def build_opener(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000985 opener = OpenerDirectory()
986 for ph in self.proxy_handlers:
987 if type(ph) == types.ClassType:
988 ph = ph()
989 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000990
991if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +0000992 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000993 # are internal to CNRI. Need to set up a public server with the
994 # right authentication configuration for test purposes.
995 if socket.gethostname() == 'bitdiddle':
996 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000997 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000998 localhost = 'localhost'
999 else:
1000 localhost = None
1001 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001002 # Thanks to Fred for finding these!
1003 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1004 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001005
Fred Drake13a2c272000-02-10 17:17:14 +00001006 'file:/etc/passwd',
1007 'file://nonsensename/etc/passwd',
1008 'ftp://www.python.org/pub/tmp/httplib.py',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001009 'ftp://www.python.org/pub/tmp/imageop.c',
1010 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001011 'http://www.espn.com/', # redirect
1012 'http://www.python.org/Spanish/Inquistion/',
1013 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1014 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1015 'http://www.python.org/',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001016 'ftp://prep.ai.mit.edu/welcome.msg',
1017 'ftp://www.python.org/pub/tmp/figure.prn',
1018 'ftp://www.python.org/pub/tmp/interp.pl',
Fred Drake13a2c272000-02-10 17:17:14 +00001019 'http://checkproxy.cnri.reston.va.us/test/test.html',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001020 ]
1021
1022 if localhost is not None:
1023 urls = urls + [
1024 'file://%s/etc/passwd' % localhost,
1025 'http://%s/simple/' % localhost,
1026 'http://%s/digest/' % localhost,
1027 'http://%s/not/found.h' % localhost,
1028 ]
1029
1030 bauth = HTTPBasicAuthHandler()
1031 bauth.add_password('basic_test_realm', localhost, 'jhylton',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001032 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001033 dauth = HTTPDigestAuthHandler()
1034 dauth.add_password('digest_test_realm', localhost, 'jhylton',
1035 'password')
1036
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001037
1038 cfh = CacheFTPHandler()
1039 cfh.setTimeout(1)
1040
1041 # XXX try out some custom proxy objects too!
1042 def at_cnri(req):
Fred Drake13a2c272000-02-10 17:17:14 +00001043 host = req.get_host()
1044 print host
1045 if host[-18:] == '.cnri.reston.va.us':
1046 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001047 p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1048 ph = CustomProxyHandler(p)
1049
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001050 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001051
1052 for url in urls:
1053 if type(url) == types.TupleType:
1054 url, req = url
1055 else:
1056 req = None
1057 print url
1058 try:
1059 f = urlopen(url, req)
1060 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001061 print "IOError:", err
1062 except socket.error, err:
1063 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001064 else:
1065 buf = f.read()
1066 f.close()
1067 print "read %d bytes" % len(buf)
1068 print
1069 time.sleep(0.1)