blob: f189b390298fca8ee7b583d749ad1970b0bb461f [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000090import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000092import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import re
94import base64
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000095import urlparse
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000096import md5
97import mimetypes
98import mimetools
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +000099import rfc822
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000100import ftplib
101import sys
102import time
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000103import os
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104import gopherlib
Moshe Zadka8a18e992001-03-01 08:40:42 +0000105import posixpath
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000106
107try:
108 from cStringIO import StringIO
109except ImportError:
110 from StringIO import StringIO
111
112try:
113 import sha
114except ImportError:
115 # need 1.5.2 final
116 sha = None
117
118# not sure how many of these need to be gotten rid of
119from urllib import unwrap, unquote, splittype, splithost, \
120 addinfourl, splitport, splitgophertype, splitquery, \
121 splitattr, ftpwrapper, noheaders
122
123# support for proxies via environment variables
124from urllib import getproxies
125
126# support for FileHandler
Moshe Zadka8a18e992001-03-01 08:40:42 +0000127from urllib import localhost, url2pathname
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000128
129__version__ = "2.0a1"
130
131_opener = None
132def urlopen(url, data=None):
133 global _opener
134 if _opener is None:
135 _opener = build_opener()
136 return _opener.open(url, data)
137
138def install_opener(opener):
139 global _opener
140 _opener = opener
141
142# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000143# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000144# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000145
146class URLError(IOError):
147 # URLError is a sub-type of IOError, but it doesn't share any of
148 # the implementation. need to override __init__ and __str__
149 def __init__(self, reason):
Fred Drake13a2c272000-02-10 17:17:14 +0000150 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000153 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000154
155class HTTPError(URLError, addinfourl):
156 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000157 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000158
159 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000160 self.code = code
161 self.msg = msg
162 self.hdrs = hdrs
163 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000164 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000165 # The addinfourl classes depend on fp being a valid file
166 # object. In some cases, the HTTPError may not have a valid
167 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000168 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000169 if fp is not None:
170 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000171
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000173 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000174
175 def __del__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000176 # XXX is this safe? what if user catches exception, then
177 # extracts fp and discards exception?
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000178 if self.fp:
179 self.fp.close()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000180
181class GopherError(URLError):
182 pass
183
Moshe Zadka8a18e992001-03-01 08:40:42 +0000184
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000185class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000186
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000187 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000188 # unwrap('<URL:type://host/path>') --> 'type://host/path'
189 self.__original = unwrap(url)
190 self.type = None
191 # self.__r_type is what's left after doing the splittype
192 self.host = None
193 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000194 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000195 self.headers = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000196 self.headers.update(headers)
197
198 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000199 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000200 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000201 # too complicated and/or unnecessary.
202 # XXX should the __r_XXX attributes be public?
203 if attr[:12] == '_Request__r_':
204 name = attr[12:]
205 if hasattr(Request, 'get_' + name):
206 getattr(self, 'get_' + name)()
207 return getattr(self, attr)
208 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000209
210 def add_data(self, data):
211 self.data = data
212
213 def has_data(self):
214 return self.data is not None
215
216 def get_data(self):
217 return self.data
218
219 def get_full_url(self):
220 return self.__original
221
222 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000223 if self.type is None:
224 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000225 if self.type is None:
226 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000227 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000228
229 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000230 if self.host is None:
231 self.host, self.__r_host = splithost(self.__r_type)
232 if self.host:
233 self.host = unquote(self.host)
234 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000235
236 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000237 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000238
Moshe Zadka8a18e992001-03-01 08:40:42 +0000239 def set_proxy(self, host, type):
240 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000241 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000242
243 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000244 # useful for something like authentication
245 self.headers[key] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000246
247class OpenerDirector:
248 def __init__(self):
249 server_version = "Python-urllib/%s" % __version__
Jeremy Hylton96f11292002-10-11 17:26:46 +0000250 self.addheaders = [('User-Agent', server_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000251 # manage the individual handlers
252 self.handlers = []
253 self.handle_open = {}
254 self.handle_error = {}
255
256 def add_handler(self, handler):
257 added = 0
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000258 for meth in dir(handler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000259 if meth[-5:] == '_open':
260 protocol = meth[:-5]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000261 if protocol in self.handle_open:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000262 self.handle_open[protocol].append(handler)
263 else:
264 self.handle_open[protocol] = [handler]
265 added = 1
266 continue
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000267 i = meth.find('_')
268 j = meth[i+1:].find('_') + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000269 if j != -1 and meth[i+1:j] == 'error':
270 proto = meth[:i]
271 kind = meth[j+1:]
272 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000273 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000274 except ValueError:
275 pass
276 dict = self.handle_error.get(proto, {})
Raymond Hettinger54f02222002-06-01 14:18:47 +0000277 if kind in dict:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000278 dict[kind].append(handler)
279 else:
280 dict[kind] = [handler]
281 self.handle_error[proto] = dict
282 added = 1
283 continue
284 if added:
285 self.handlers.append(handler)
286 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000287
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000288 def __del__(self):
289 self.close()
290
291 def close(self):
292 for handler in self.handlers:
293 handler.close()
294 self.handlers = []
295
296 def _call_chain(self, chain, kind, meth_name, *args):
297 # XXX raise an exception if no one else should try to handle
298 # this url. return None if you can't but someone else could.
299 handlers = chain.get(kind, ())
300 for handler in handlers:
301 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000302
303 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000304 if result is not None:
305 return result
306
307 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000308 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000309 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000310 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000311 else:
312 req = fullurl
313 if data is not None:
314 req.add_data(data)
Fred Drake13a2c272000-02-10 17:17:14 +0000315 assert isinstance(req, Request) # really only care about interface
Tim Peterse1190062001-01-15 03:34:38 +0000316
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000317 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000318 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 if result:
320 return result
321
Fred Drake13a2c272000-02-10 17:17:14 +0000322 type_ = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 result = self._call_chain(self.handle_open, type_, type_ + \
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000324 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000325 if result:
326 return result
327
328 return self._call_chain(self.handle_open, 'unknown',
329 'unknown_open', req)
330
331 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000332 if proto in ['http', 'https']:
Fred Draked5214b02001-11-08 17:19:29 +0000333 # XXX http[s] protocols are special-cased
334 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000335 proto = args[2] # YUCK!
336 meth_name = 'http_error_%d' % proto
337 http_err = 1
338 orig_args = args
339 else:
340 dict = self.handle_error
341 meth_name = proto + '_error'
342 http_err = 0
343 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000344 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345 if result:
346 return result
347
348 if http_err:
349 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000350 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000351
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000352# XXX probably also want an abstract factory that knows things like
Fred Drakea87a5212002-08-13 13:59:55 +0000353# the fact that a ProxyHandler needs to get inserted first.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000354# would also know when it makes sense to skip a superclass in favor of
Fred Drakea87a5212002-08-13 13:59:55 +0000355# a subclass and when it might make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000356
357def build_opener(*handlers):
358 """Create an opener object from a list of handlers.
359
360 The opener will use several default handlers, including support
361 for HTTP and FTP. If there is a ProxyHandler, it must be at the
362 front of the list of handlers. (Yuck.)
363
364 If any of the handlers passed as arguments are subclasses of the
365 default handlers, the default handlers will not be used.
366 """
Tim Peterse1190062001-01-15 03:34:38 +0000367
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000368 opener = OpenerDirector()
369 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
370 HTTPDefaultErrorHandler, HTTPRedirectHandler,
371 FTPHandler, FileHandler]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000372 if hasattr(httplib, 'HTTPS'):
373 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000374 skip = []
375 for klass in default_classes:
376 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000377 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000378 if issubclass(check, klass):
379 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000380 elif isinstance(check, klass):
381 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000382 for klass in skip:
383 default_classes.remove(klass)
384
385 for klass in default_classes:
386 opener.add_handler(klass())
387
388 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000389 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000390 h = h()
391 opener.add_handler(h)
392 return opener
393
394class BaseHandler:
395 def add_parent(self, parent):
396 self.parent = parent
397 def close(self):
398 self.parent = None
399
400class HTTPDefaultErrorHandler(BaseHandler):
401 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000402 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000403
404class HTTPRedirectHandler(BaseHandler):
405 # Implementation note: To avoid the server sending us into an
406 # infinite loop, the request object needs to track what URLs we
407 # have already seen. Do this by adding a handler-specific
408 # attribute to the Request object.
409 def http_error_302(self, req, fp, code, msg, headers):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000410 if 'location' in headers:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000411 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000412 elif 'uri' in headers:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000413 newurl = headers['uri']
414 else:
415 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000416 newurl = urlparse.urljoin(req.get_full_url(), newurl)
417
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000418 # XXX Probably want to forget about the state of the current
419 # request, although that might interact poorly with other
420 # handlers that also use handler-specific request attributes
Greg Ward2e250b42002-02-11 20:46:10 +0000421 new = Request(newurl, req.get_data(), req.headers)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000422 new.error_302_dict = {}
423 if hasattr(req, 'error_302_dict'):
Guido van Rossum2d996c02001-04-15 13:08:01 +0000424 if len(req.error_302_dict)>10 or \
Raymond Hettinger54f02222002-06-01 14:18:47 +0000425 newurl in req.error_302_dict:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000426 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000427 self.inf_msg + msg, headers, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000428 new.error_302_dict.update(req.error_302_dict)
429 new.error_302_dict[newurl] = newurl
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000430
431 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000432 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000433 fp.read()
434 fp.close()
435
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000436 return self.parent.open(new)
437
438 http_error_301 = http_error_302
439
440 inf_msg = "The HTTP server returned a redirect error that would" \
Thomas Wouters7e474022000-07-16 12:04:32 +0000441 "lead to an infinite loop.\n" \
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000442 "The last 302 error message was:\n"
443
444class ProxyHandler(BaseHandler):
445 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000446 if proxies is None:
447 proxies = getproxies()
448 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
449 self.proxies = proxies
450 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000451 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000452 lambda r, proxy=url, type=type, meth=self.proxy_open: \
453 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000454
455 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000456 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000457 type, r_type = splittype(proxy)
458 host, XXX = splithost(r_type)
459 if '@' in host:
460 user_pass, host = host.split('@', 1)
Jeremy Hylton144dea32002-07-07 16:57:35 +0000461 if ':' in user_pass:
462 user, password = user_pass.split(':', 1)
Tim Petersc411dba2002-07-16 21:35:23 +0000463 user_pass = base64.encodestring('%s:%s' % (unquote(user),
Jeremy Hylton144dea32002-07-07 16:57:35 +0000464 unquote(password)))
465 req.add_header('Proxy-Authorization', 'Basic ' + user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000466 host = unquote(host)
467 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000468 if orig_type == type:
469 # let other handlers take care of it
470 # XXX this only makes sense if the proxy is before the
471 # other handlers
472 return None
473 else:
474 # need to start over, because the other handlers don't
475 # grok the proxy's URL type
476 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000477
478# feature suggested by Duncan Booth
479# XXX custom is not a good name
480class CustomProxy:
481 # either pass a function to the constructor or override handle
482 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000483 self.proto = proto
484 self.func = func
485 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000486
487 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000488 if self.func and self.func(req):
489 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000490
491 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000492 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000493
494class CustomProxyHandler(BaseHandler):
495 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000496 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000497
498 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000499 proto = req.get_type()
500 try:
501 proxies = self.proxies[proto]
502 except KeyError:
503 return None
504 for p in proxies:
505 if p.handle(req):
506 req.set_proxy(p.get_proxy())
507 return self.parent.open(req)
508 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000509
510 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000511 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000512
513 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000514 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000515 self.proxies[cpo.proto].append(cpo)
516 else:
517 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000518
519class HTTPPasswordMgr:
520 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000521 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000522
523 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000524 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000525 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000526 uri = [uri]
527 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000528 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000529 self.passwd[realm] = {}
530 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000531
532 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000533 domains = self.passwd.get(realm, {})
534 authuri = self.reduce_uri(authuri)
535 for uris, authinfo in domains.items():
536 for uri in uris:
537 if self.is_suburi(uri, authuri):
538 return authinfo
539 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000540
541 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000542 """Accept netloc or URI and extract only the netloc and path"""
543 parts = urlparse.urlparse(uri)
544 if parts[1]:
545 return parts[1], parts[2] or '/'
546 else:
547 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000548
549 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000550 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000551
Fred Drake13a2c272000-02-10 17:17:14 +0000552 Both args must be URIs in reduced form.
553 """
554 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000555 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000556 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000557 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000558 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000559 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000560 return True
561 return False
Tim Peterse1190062001-01-15 03:34:38 +0000562
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000563
Moshe Zadka8a18e992001-03-01 08:40:42 +0000564class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
565
566 def find_user_password(self, realm, authuri):
567 user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)
568 if user is not None:
569 return user, password
570 return HTTPPasswordMgr.find_user_password(self, None, authuri)
571
572
573class AbstractBasicAuthHandler:
574
Neal Norwitz853ddd52002-10-09 23:17:04 +0000575 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000576
577 # XXX there can actually be multiple auth-schemes in a
578 # www-authenticate header. should probably be a lot more careful
579 # in parsing them to extract multiple alternatives
580
Moshe Zadka8a18e992001-03-01 08:40:42 +0000581 def __init__(self, password_mgr=None):
582 if password_mgr is None:
583 password_mgr = HTTPPasswordMgr()
584 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000585 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000586
Moshe Zadka8a18e992001-03-01 08:40:42 +0000587 def http_error_auth_reqed(self, authreq, host, req, headers):
588 # XXX could be multiple headers
589 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000590 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000591 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000592 if mo:
593 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000594 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000595 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000596
Moshe Zadka8a18e992001-03-01 08:40:42 +0000597 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000598 user,pw = self.passwd.find_user_password(realm, host)
599 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000600 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000601 auth = 'Basic %s' % base64.encodestring(raw).strip()
602 if req.headers.get(self.auth_header, None) == auth:
603 return None
604 req.add_header(self.auth_header, auth)
605 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000606 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000607 return None
608
Moshe Zadka8a18e992001-03-01 08:40:42 +0000609class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000610
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000611 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000612
Moshe Zadka8a18e992001-03-01 08:40:42 +0000613 def http_error_401(self, req, fp, code, msg, headers):
614 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000615 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000616 host, req, headers)
617
618
619class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
620
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000621 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000622
623 def http_error_407(self, req, fp, code, msg, headers):
624 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000625 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000626 host, req, headers)
627
628
629class AbstractDigestAuthHandler:
630
631 def __init__(self, passwd=None):
632 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000633 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000634 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000635 self.add_password = self.passwd.add_password
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636
Moshe Zadka8a18e992001-03-01 08:40:42 +0000637 def http_error_auth_reqed(self, authreq, host, req, headers):
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000638 authreq = headers.get(self.auth_header, None)
Fred Drake13a2c272000-02-10 17:17:14 +0000639 if authreq:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000640 kind = authreq.split()[0]
Fred Drake13a2c272000-02-10 17:17:14 +0000641 if kind == 'Digest':
642 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000643
644 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000645 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000646 chal = parse_keqv_list(parse_http_list(challenge))
647 auth = self.get_authorization(req, chal)
648 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000649 auth_val = 'Digest %s' % auth
650 if req.headers.get(self.auth_header, None) == auth_val:
651 return None
652 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000653 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000654 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000655
656 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000657 try:
658 realm = chal['realm']
659 nonce = chal['nonce']
660 algorithm = chal.get('algorithm', 'MD5')
661 # mod_digest doesn't send an opaque, even though it isn't
662 # supposed to be optional
663 opaque = chal.get('opaque', None)
664 except KeyError:
665 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000666
Fred Drake13a2c272000-02-10 17:17:14 +0000667 H, KD = self.get_algorithm_impls(algorithm)
668 if H is None:
669 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000670
Fred Drake13a2c272000-02-10 17:17:14 +0000671 user, pw = self.passwd.find_user_password(realm,
Tim Peterse1190062001-01-15 03:34:38 +0000672 req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000673 if user is None:
674 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675
Fred Drake13a2c272000-02-10 17:17:14 +0000676 # XXX not implemented yet
677 if req.has_data():
678 entdig = self.get_entity_digest(req.get_data(), chal)
679 else:
680 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000681
Fred Drake13a2c272000-02-10 17:17:14 +0000682 A1 = "%s:%s:%s" % (user, realm, pw)
683 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
684 # XXX selector: what about proxies and full urls
685 req.get_selector())
686 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
687 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000688
Fred Drake13a2c272000-02-10 17:17:14 +0000689 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
690 'response="%s"' % (user, realm, nonce, req.get_selector(),
691 respdig)
692 if opaque:
693 base = base + ', opaque="%s"' % opaque
694 if entdig:
695 base = base + ', digest="%s"' % entdig
696 if algorithm != 'MD5':
697 base = base + ', algorithm="%s"' % algorithm
698 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000699
700 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000701 # lambdas assume digest modules are imported at the top level
702 if algorithm == 'MD5':
703 H = lambda x, e=encode_digest:e(md5.new(x).digest())
704 elif algorithm == 'SHA':
705 H = lambda x, e=encode_digest:e(sha.new(x).digest())
706 # XXX MD5-sess
707 KD = lambda s, d, H=H: H("%s:%s" % (s, d))
708 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000709
710 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000711 # XXX not implemented yet
712 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000713
Moshe Zadka8a18e992001-03-01 08:40:42 +0000714
715class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
716 """An authentication protocol defined by RFC 2069
717
718 Digest authentication improves on basic authentication because it
719 does not transmit passwords in the clear.
720 """
721
722 header = 'Authorization'
723
724 def http_error_401(self, req, fp, code, msg, headers):
725 host = urlparse.urlparse(req.get_full_url())[1]
726 self.http_error_auth_reqed('www-authenticate', host, req, headers)
727
728
729class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
730
731 header = 'Proxy-Authorization'
732
733 def http_error_407(self, req, fp, code, msg, headers):
734 host = req.get_host()
735 self.http_error_auth_reqed('proxy-authenticate', host, req, headers)
736
737
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000738def encode_digest(digest):
739 hexrep = []
740 for c in digest:
Fred Drake13a2c272000-02-10 17:17:14 +0000741 n = (ord(c) >> 4) & 0xf
742 hexrep.append(hex(n)[-1])
743 n = ord(c) & 0xf
744 hexrep.append(hex(n)[-1])
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000745 return ''.join(hexrep)
Tim Peterse1190062001-01-15 03:34:38 +0000746
747
Moshe Zadka8a18e992001-03-01 08:40:42 +0000748class AbstractHTTPHandler(BaseHandler):
749
750 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000751 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000752 if not host:
753 raise URLError('no host given')
754
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000755 try:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000756 h = http_class(host) # will parse host:port
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000757 if req.has_data():
758 data = req.get_data()
759 h.putrequest('POST', req.get_selector())
Raymond Hettinger54f02222002-06-01 14:18:47 +0000760 if not 'Content-type' in req.headers:
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000761 h.putheader('Content-type',
762 'application/x-www-form-urlencoded')
Raymond Hettinger54f02222002-06-01 14:18:47 +0000763 if not 'Content-length' in req.headers:
Moshe Zadkad3f193f2001-03-20 13:14:28 +0000764 h.putheader('Content-length', '%d' % len(data))
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000765 else:
766 h.putrequest('GET', req.get_selector())
767 except socket.error, err:
768 raise URLError(err)
Tim Peterse1190062001-01-15 03:34:38 +0000769
Jeremy Hylton144dea32002-07-07 16:57:35 +0000770 scheme, sel = splittype(req.get_selector())
771 sel_host, sel_path = splithost(sel)
772 h.putheader('Host', sel_host or host)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000773 for args in self.parent.addheaders:
Andrew M. Kuchlingaca49b02002-11-06 15:40:05 +0000774 name, value = args
Jeremy Hylton96f11292002-10-11 17:26:46 +0000775 if name not in req.headers:
776 h.putheader(*args)
Fred Drake13a2c272000-02-10 17:17:14 +0000777 for k, v in req.headers.items():
778 h.putheader(k, v)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000779 h.endheaders()
780 if req.has_data():
Fred Drakeec3dfde2001-07-04 05:18:29 +0000781 h.send(data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000782
783 code, msg, hdrs = h.getreply()
784 fp = h.getfile()
785 if code == 200:
786 return addinfourl(fp, hdrs, req.get_full_url())
787 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000788 return self.parent.error('http', req, fp, code, msg, hdrs)
789
Moshe Zadka8a18e992001-03-01 08:40:42 +0000790
791class HTTPHandler(AbstractHTTPHandler):
792
793 def http_open(self, req):
794 return self.do_open(httplib.HTTP, req)
795
796
797if hasattr(httplib, 'HTTPS'):
798 class HTTPSHandler(AbstractHTTPHandler):
799
800 def https_open(self, req):
801 return self.do_open(httplib.HTTPS, req)
802
803
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000804class UnknownHandler(BaseHandler):
805 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000806 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000807 raise URLError('unknown url type: %s' % type)
808
809def parse_keqv_list(l):
810 """Parse list of key=value strings where keys are not duplicated."""
811 parsed = {}
812 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000813 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000814 if v[0] == '"' and v[-1] == '"':
815 v = v[1:-1]
816 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000817 return parsed
818
819def parse_http_list(s):
820 """Parse lists as described by RFC 2068 Section 2.
821
822 In particular, parse comman-separated lists where the elements of
823 the list may include quoted-strings. A quoted-string could
824 contain a comma.
825 """
826 # XXX this function could probably use more testing
827
828 list = []
829 end = len(s)
830 i = 0
831 inquote = 0
832 start = 0
833 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +0000834 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000835 c = cur.find(',')
836 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +0000837 if c == -1:
838 list.append(s[start:])
839 break
840 if q == -1:
841 if inquote:
842 raise ValueError, "unbalanced quotes"
843 else:
844 list.append(s[start:i+c])
845 i = i + c + 1
846 continue
847 if inquote:
848 if q < c:
849 list.append(s[start:i+c])
850 i = i + c + 1
851 start = i
852 inquote = 0
853 else:
Tim Peterse1190062001-01-15 03:34:38 +0000854 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +0000855 else:
856 if c < q:
857 list.append(s[start:i+c])
858 i = i + c + 1
859 start = i
860 else:
861 inquote = 1
862 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000863 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000864
865class FileHandler(BaseHandler):
866 # Use local file or FTP depending on form of URL
867 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000868 url = req.get_selector()
869 if url[:2] == '//' and url[2:3] != '/':
870 req.type = 'ftp'
871 return self.parent.open(req)
872 else:
873 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000874
875 # names for the localhost
876 names = None
877 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000878 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +0000879 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +0000880 socket.gethostbyname(socket.gethostname()))
881 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000882
883 # not entirely sure what the rules are here
884 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000885 host = req.get_host()
886 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000887 localfile = url2pathname(file)
888 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +0000889 size = stats.st_size
890 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000891 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000892 headers = mimetools.Message(StringIO(
893 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
894 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +0000895 if host:
896 host, port = splitport(host)
897 if not host or \
898 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000899 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +0000900 headers, 'file:'+file)
901 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000902
903class FTPHandler(BaseHandler):
904 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000905 host = req.get_host()
906 if not host:
907 raise IOError, ('ftp error', 'no host given')
908 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000909 try:
910 host = socket.gethostbyname(host)
911 except socket.error, msg:
912 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +0000913 host, port = splitport(host)
914 if port is None:
915 port = ftplib.FTP_PORT
916 path, attrs = splitattr(req.get_selector())
917 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000918 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +0000919 dirs, file = dirs[:-1], dirs[-1]
920 if dirs and not dirs[0]:
921 dirs = dirs[1:]
922 user = passwd = '' # XXX
923 try:
924 fw = self.connect_ftp(user, passwd, host, port, dirs)
925 type = file and 'I' or 'D'
926 for attr in attrs:
927 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000928 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +0000929 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000930 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +0000931 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +0000932 headers = ""
933 mtype = mimetypes.guess_type(req.get_full_url())[0]
934 if mtype:
935 headers += "Content-Type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +0000936 if retrlen is not None and retrlen >= 0:
Guido van Rossum833a8d82001-08-24 13:10:13 +0000937 headers += "Content-Length: %d\n" % retrlen
938 sf = StringIO(headers)
939 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +0000940 return addinfourl(fp, headers, req.get_full_url())
941 except ftplib.all_errors, msg:
942 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000943
944 def connect_ftp(self, user, passwd, host, port, dirs):
945 fw = ftpwrapper(user, passwd, host, port, dirs)
946## fw.ftp.set_debuglevel(1)
947 return fw
948
949class CacheFTPHandler(FTPHandler):
950 # XXX would be nice to have pluggable cache strategies
951 # XXX this stuff is definitely not thread safe
952 def __init__(self):
953 self.cache = {}
954 self.timeout = {}
955 self.soonest = 0
956 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +0000957 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000958
959 def setTimeout(self, t):
960 self.delay = t
961
962 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +0000963 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000964
965 def connect_ftp(self, user, passwd, host, port, dirs):
966 key = user, passwd, host, port
Raymond Hettinger54f02222002-06-01 14:18:47 +0000967 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000968 self.timeout[key] = time.time() + self.delay
969 else:
970 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
971 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +0000972 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000973 return self.cache[key]
974
975 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000976 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000977 t = time.time()
978 if self.soonest <= t:
979 for k, v in self.timeout.items():
980 if v < t:
981 self.cache[k].close()
982 del self.cache[k]
983 del self.timeout[k]
984 self.soonest = min(self.timeout.values())
985
986 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +0000987 if len(self.cache) == self.max_conns:
988 for k, v in self.timeout.items():
989 if v == self.soonest:
990 del self.cache[k]
991 del self.timeout[k]
992 break
993 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000994
995class GopherHandler(BaseHandler):
996 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000997 host = req.get_host()
998 if not host:
999 raise GopherError('no host given')
1000 host = unquote(host)
1001 selector = req.get_selector()
1002 type, selector = splitgophertype(selector)
1003 selector, query = splitquery(selector)
1004 selector = unquote(selector)
1005 if query:
1006 query = unquote(query)
1007 fp = gopherlib.send_query(selector, query, host)
1008 else:
1009 fp = gopherlib.send_selector(selector, host)
1010 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001011
1012#bleck! don't use this yet
1013class OpenerFactory:
1014
1015 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001016 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001017 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001018 proxy_handlers = [ProxyHandler]
1019 handlers = []
1020 replacement_handlers = []
1021
1022 def add_proxy_handler(self, ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001023 self.proxy_handlers = self.proxy_handlers + [ph]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001024
1025 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001026 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001027
1028 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001029 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001030
1031 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001032 opener = OpenerDirector()
Fred Drake13a2c272000-02-10 17:17:14 +00001033 for ph in self.proxy_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001034 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001035 ph = ph()
1036 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001037
1038if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001039 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001040 # are internal to CNRI. Need to set up a public server with the
1041 # right authentication configuration for test purposes.
1042 if socket.gethostname() == 'bitdiddle':
1043 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001044 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001045 localhost = 'localhost'
1046 else:
1047 localhost = None
1048 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001049 # Thanks to Fred for finding these!
1050 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1051 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001052
Fred Drake13a2c272000-02-10 17:17:14 +00001053 'file:/etc/passwd',
1054 'file://nonsensename/etc/passwd',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001055 'ftp://www.python.org/pub/python/misc/sousa.au',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001056 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001057 'http://www.espn.com/', # redirect
1058 'http://www.python.org/Spanish/Inquistion/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001059 ('http://www.python.org/cgi-bin/faqw.py',
Fred Drake13a2c272000-02-10 17:17:14 +00001060 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1061 'http://www.python.org/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001062 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001063 ]
1064
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001065## if localhost is not None:
1066## urls = urls + [
1067## 'file://%s/etc/passwd' % localhost,
1068## 'http://%s/simple/' % localhost,
1069## 'http://%s/digest/' % localhost,
1070## 'http://%s/not/found.h' % localhost,
1071## ]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001072
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001073## bauth = HTTPBasicAuthHandler()
1074## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1075## 'password')
1076## dauth = HTTPDigestAuthHandler()
1077## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1078## 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001079
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001080
1081 cfh = CacheFTPHandler()
1082 cfh.setTimeout(1)
1083
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001084## # XXX try out some custom proxy objects too!
1085## def at_cnri(req):
1086## host = req.get_host()
1087## print host
1088## if host[-18:] == '.cnri.reston.va.us':
1089## return 1
1090## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1091## ph = CustomProxyHandler(p)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001092
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001093## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1094 install_opener(build_opener(cfh, GopherHandler))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001095
1096 for url in urls:
Walter Dörwald65230a22002-06-03 15:58:32 +00001097 if isinstance(url, tuple):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001098 url, req = url
1099 else:
1100 req = None
1101 print url
1102 try:
1103 f = urlopen(url, req)
1104 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001105 print "IOError:", err
1106 except socket.error, err:
1107 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001108 else:
1109 buf = f.read()
1110 f.close()
1111 print "read %d bytes" % len(buf)
1112 print
1113 time.sleep(0.1)