blob: a4046d30c237fccf0286459b5f4586f69b39d78d [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000090import base64
91import ftplib
92import gopherlib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000094import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000095import md5
96import mimetypes
97import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000098import os
99import posixpath
100import random
101import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000102import sha
103import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104import sys
105import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000106import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000107import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000108import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000109
110try:
111 from cStringIO import StringIO
112except ImportError:
113 from StringIO import StringIO
114
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000115# not sure how many of these need to be gotten rid of
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000116from urllib import (unwrap, unquote, splittype, splithost,
117 addinfourl, splitport, splitgophertype, splitquery,
118 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000119
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000120# support for FileHandler, proxies via environment variables
121from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000122
Jeremy Hylton023518a2003-12-17 18:52:16 +0000123__version__ = "2.4"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000124
125_opener = None
126def urlopen(url, data=None):
127 global _opener
128 if _opener is None:
129 _opener = build_opener()
130 return _opener.open(url, data)
131
132def install_opener(opener):
133 global _opener
134 _opener = opener
135
136# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000137# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000138# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000139
140class URLError(IOError):
141 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000142 # the implementation. need to override __init__ and __str__.
143 # It sets self.args for compatibility with other EnvironmentError
144 # subclasses, but args doesn't have the typical format with errno in
145 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000146 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000147 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000148 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000149
150 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000151 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153class HTTPError(URLError, addinfourl):
154 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000155 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000156
157 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000158 self.code = code
159 self.msg = msg
160 self.hdrs = hdrs
161 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000162 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000163 # The addinfourl classes depend on fp being a valid file
164 # object. In some cases, the HTTPError may not have a valid
165 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000166 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000167 if fp is not None:
168 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000169
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000170 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000171 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000173class GopherError(URLError):
174 pass
175
Moshe Zadka8a18e992001-03-01 08:40:42 +0000176
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000177class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000178
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000179 def __init__(self, url, data=None, headers={},
180 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000181 # unwrap('<URL:type://host/path>') --> 'type://host/path'
182 self.__original = unwrap(url)
183 self.type = None
184 # self.__r_type is what's left after doing the splittype
185 self.host = None
186 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000187 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000188 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000189 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000190 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000191 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000192 if origin_req_host is None:
193 origin_req_host = cookielib.request_host(self)
194 self.origin_req_host = origin_req_host
195 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000196
197 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000198 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000199 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000200 # too complicated and/or unnecessary.
201 # XXX should the __r_XXX attributes be public?
202 if attr[:12] == '_Request__r_':
203 name = attr[12:]
204 if hasattr(Request, 'get_' + name):
205 getattr(self, 'get_' + name)()
206 return getattr(self, attr)
207 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000208
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000209 def get_method(self):
210 if self.has_data():
211 return "POST"
212 else:
213 return "GET"
214
Jeremy Hylton023518a2003-12-17 18:52:16 +0000215 # XXX these helper methods are lame
216
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000217 def add_data(self, data):
218 self.data = data
219
220 def has_data(self):
221 return self.data is not None
222
223 def get_data(self):
224 return self.data
225
226 def get_full_url(self):
227 return self.__original
228
229 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000230 if self.type is None:
231 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000232 if self.type is None:
233 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000234 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000235
236 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000237 if self.host is None:
238 self.host, self.__r_host = splithost(self.__r_type)
239 if self.host:
240 self.host = unquote(self.host)
241 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000242
243 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000244 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000245
Moshe Zadka8a18e992001-03-01 08:40:42 +0000246 def set_proxy(self, host, type):
247 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000248 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000249
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000250 def get_origin_req_host(self):
251 return self.origin_req_host
252
253 def is_unverifiable(self):
254 return self.unverifiable
255
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000256 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000257 # useful for something like authentication
Brett Cannon86503b12003-05-12 07:29:42 +0000258 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000259
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000260 def add_unredirected_header(self, key, val):
261 # will not be added to a redirected request
262 self.unredirected_hdrs[key.capitalize()] = val
263
264 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000265 return (header_name in self.headers or
266 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000267
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000268 def get_header(self, header_name, default=None):
269 return self.headers.get(
270 header_name,
271 self.unredirected_hdrs.get(header_name, default))
272
273 def header_items(self):
274 hdrs = self.unredirected_hdrs.copy()
275 hdrs.update(self.headers)
276 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000277
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000278class OpenerDirector:
279 def __init__(self):
280 server_version = "Python-urllib/%s" % __version__
Brett Cannon783eaf42003-06-17 21:52:34 +0000281 self.addheaders = [('User-agent', server_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000282 # manage the individual handlers
283 self.handlers = []
284 self.handle_open = {}
285 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000286 self.process_response = {}
287 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000288
289 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000290 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000291 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000292 i = meth.find("_")
293 protocol = meth[:i]
294 condition = meth[i+1:]
295
296 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000297 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000298 kind = meth[j+1:]
299 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000300 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000301 except ValueError:
302 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000303 lookup = self.handle_error.get(protocol, {})
304 self.handle_error[protocol] = lookup
305 elif condition == "open":
306 kind = protocol
307 lookup = getattr(self, "handle_"+condition)
308 elif condition in ["response", "request"]:
309 kind = protocol
310 lookup = getattr(self, "process_"+condition)
311 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000312 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000313
314 handlers = lookup.setdefault(kind, [])
315 if handlers:
316 bisect.insort(handlers, handler)
317 else:
318 handlers.append(handler)
319 added = True
320
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000321 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000322 # XXX why does self.handlers need to be sorted?
323 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000324 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000325
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000326 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000327 # Only exists for backwards compatibility.
328 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000329
330 def _call_chain(self, chain, kind, meth_name, *args):
331 # XXX raise an exception if no one else should try to handle
332 # this url. return None if you can't but someone else could.
333 handlers = chain.get(kind, ())
334 for handler in handlers:
335 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000336
337 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000338 if result is not None:
339 return result
340
341 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000342 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000343 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000344 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345 else:
346 req = fullurl
347 if data is not None:
348 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000349
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000350 protocol = req.get_type()
351
352 # pre-process request
353 meth_name = protocol+"_request"
354 for processor in self.process_request.get(protocol, []):
355 meth = getattr(processor, meth_name)
356 req = meth(req)
357
358 response = self._open(req, data)
359
360 # post-process response
361 meth_name = protocol+"_response"
362 for processor in self.process_response.get(protocol, []):
363 meth = getattr(processor, meth_name)
364 response = meth(req, response)
365
366 return response
367
368 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000369 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000370 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 if result:
372 return result
373
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000374 protocol = req.get_type()
375 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000376 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000377 if result:
378 return result
379
380 return self._call_chain(self.handle_open, 'unknown',
381 'unknown_open', req)
382
383 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000384 if proto in ['http', 'https']:
Fred Draked5214b02001-11-08 17:19:29 +0000385 # XXX http[s] protocols are special-cased
386 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000387 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000388 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000389 http_err = 1
390 orig_args = args
391 else:
392 dict = self.handle_error
393 meth_name = proto + '_error'
394 http_err = 0
395 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000396 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000397 if result:
398 return result
399
400 if http_err:
401 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000402 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000403
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000404# XXX probably also want an abstract factory that knows when it makes
405# sense to skip a superclass in favor of a subclass and when it might
406# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000407
408def build_opener(*handlers):
409 """Create an opener object from a list of handlers.
410
411 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000412 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000413
414 If any of the handlers passed as arguments are subclasses of the
415 default handlers, the default handlers will not be used.
416 """
Tim Peterse1190062001-01-15 03:34:38 +0000417
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000418 opener = OpenerDirector()
419 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
420 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000421 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000422 if hasattr(httplib, 'HTTPS'):
423 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000424 skip = []
425 for klass in default_classes:
426 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000427 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000428 if issubclass(check, klass):
429 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000430 elif isinstance(check, klass):
431 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000432 for klass in skip:
433 default_classes.remove(klass)
434
435 for klass in default_classes:
436 opener.add_handler(klass())
437
438 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000439 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000440 h = h()
441 opener.add_handler(h)
442 return opener
443
444class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000445 handler_order = 500
446
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000447 def add_parent(self, parent):
448 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000449
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000450 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000451 # Only exists for backwards compatibility
452 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000453
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000454 def __lt__(self, other):
455 if not hasattr(other, "handler_order"):
456 # Try to preserve the old behavior of having custom classes
457 # inserted after default ones (works only for custom user
458 # classes which are not aware of handler_order).
459 return True
460 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000461
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000462
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000463class HTTPErrorProcessor(BaseHandler):
464 """Process HTTP error responses."""
465 handler_order = 1000 # after all other processing
466
467 def http_response(self, request, response):
468 code, msg, hdrs = response.code, response.msg, response.info()
469
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000470 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 response = self.parent.error(
472 'http', request, response, code, msg, hdrs)
473
474 return response
475
476 https_response = http_response
477
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000478class HTTPDefaultErrorHandler(BaseHandler):
479 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000480 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000481
482class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000483 # maximum number of redirections to any single URL
484 # this is needed because of the state that cookies introduce
485 max_repeats = 4
486 # maximum total number of redirections (regardless of URL) before
487 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000488 max_redirections = 10
489
Jeremy Hylton03892952003-05-05 04:09:13 +0000490 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000491 """Return a Request or None in response to a redirect.
492
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000493 This is called by the http_error_30x methods when a
494 redirection response is received. If a redirection should
495 take place, return a new Request to allow http_error_30x to
496 perform the redirect. Otherwise, raise HTTPError if no-one
497 else should try to handle this url. Return None if you can't
498 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000499 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000500 m = req.get_method()
501 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000502 or code in (301, 302, 303) and m == "POST"):
503 # Strictly (according to RFC 2616), 301 or 302 in response
504 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000505 # from the user (of urllib2, in this case). In practice,
506 # essentially all clients do redirect in this case, so we
507 # do the same.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000508 return Request(newurl,
509 headers=req.headers,
510 origin_req_host=req.get_origin_req_host(),
511 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000512 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000513 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000514
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000515 # Implementation note: To avoid the server sending us into an
516 # infinite loop, the request object needs to track what URLs we
517 # have already seen. Do this by adding a handler-specific
518 # attribute to the Request object.
519 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000520 # Some servers (incorrectly) return multiple Location headers
521 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000522 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000523 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000524 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000525 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000526 else:
527 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000528 newurl = urlparse.urljoin(req.get_full_url(), newurl)
529
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000530 # XXX Probably want to forget about the state of the current
531 # request, although that might interact poorly with other
532 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000533 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000534 if new is None:
535 return
536
537 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000538 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000539 if hasattr(req, 'redirect_dict'):
540 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000541 if (visited.get(newurl, 0) >= self.max_repeats or
542 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000543 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000544 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000545 else:
546 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000547 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000548
549 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000550 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000551 fp.read()
552 fp.close()
553
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000554 return self.parent.open(new)
555
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000556 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000557
Martin v. Löwis162f0812003-07-12 07:33:32 +0000558 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000559 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000560 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000561
562class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000563 # Proxies must be in front
564 handler_order = 100
565
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000566 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000567 if proxies is None:
568 proxies = getproxies()
569 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
570 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000571 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000572 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000573 lambda r, proxy=url, type=type, meth=self.proxy_open: \
574 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000575
576 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000577 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000578 type, r_type = splittype(proxy)
579 host, XXX = splithost(r_type)
580 if '@' in host:
581 user_pass, host = host.split('@', 1)
Jeremy Hylton144dea32002-07-07 16:57:35 +0000582 if ':' in user_pass:
583 user, password = user_pass.split(':', 1)
Tim Petersc411dba2002-07-16 21:35:23 +0000584 user_pass = base64.encodestring('%s:%s' % (unquote(user),
Jeremy Hylton144dea32002-07-07 16:57:35 +0000585 unquote(password)))
Brett Cannon783eaf42003-06-17 21:52:34 +0000586 req.add_header('Proxy-authorization', 'Basic ' + user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000587 host = unquote(host)
588 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000589 if orig_type == type:
590 # let other handlers take care of it
591 # XXX this only makes sense if the proxy is before the
592 # other handlers
593 return None
594 else:
595 # need to start over, because the other handlers don't
596 # grok the proxy's URL type
597 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000598
599# feature suggested by Duncan Booth
600# XXX custom is not a good name
601class CustomProxy:
602 # either pass a function to the constructor or override handle
603 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000604 self.proto = proto
605 self.func = func
606 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000607
608 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000609 if self.func and self.func(req):
610 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000611
612 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000613 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000614
615class CustomProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000616 # Proxies must be in front
617 handler_order = 100
618
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000619 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000620 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000621
622 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000623 proto = req.get_type()
624 try:
625 proxies = self.proxies[proto]
626 except KeyError:
627 return None
628 for p in proxies:
629 if p.handle(req):
630 req.set_proxy(p.get_proxy())
631 return self.parent.open(req)
632 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000633
634 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000635 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636
637 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000638 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000639 self.proxies[cpo.proto].append(cpo)
640 else:
641 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000642
643class HTTPPasswordMgr:
644 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000645 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000646
647 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000648 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000649 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000650 uri = [uri]
651 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000652 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000653 self.passwd[realm] = {}
654 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000655
656 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000657 domains = self.passwd.get(realm, {})
658 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000659 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000660 for uri in uris:
661 if self.is_suburi(uri, authuri):
662 return authinfo
663 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000664
665 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000666 """Accept netloc or URI and extract only the netloc and path"""
667 parts = urlparse.urlparse(uri)
668 if parts[1]:
669 return parts[1], parts[2] or '/'
670 else:
671 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000672
673 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000674 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675
Fred Drake13a2c272000-02-10 17:17:14 +0000676 Both args must be URIs in reduced form.
677 """
678 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000679 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000680 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000681 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000682 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000683 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000684 return True
685 return False
Tim Peterse1190062001-01-15 03:34:38 +0000686
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000687
Moshe Zadka8a18e992001-03-01 08:40:42 +0000688class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
689
690 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000691 user, password = HTTPPasswordMgr.find_user_password(self, realm,
692 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000693 if user is not None:
694 return user, password
695 return HTTPPasswordMgr.find_user_password(self, None, authuri)
696
697
698class AbstractBasicAuthHandler:
699
Neal Norwitz853ddd52002-10-09 23:17:04 +0000700 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000701
702 # XXX there can actually be multiple auth-schemes in a
703 # www-authenticate header. should probably be a lot more careful
704 # in parsing them to extract multiple alternatives
705
Moshe Zadka8a18e992001-03-01 08:40:42 +0000706 def __init__(self, password_mgr=None):
707 if password_mgr is None:
708 password_mgr = HTTPPasswordMgr()
709 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000710 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000711
Moshe Zadka8a18e992001-03-01 08:40:42 +0000712 def http_error_auth_reqed(self, authreq, host, req, headers):
713 # XXX could be multiple headers
714 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000715 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000716 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000717 if mo:
718 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000719 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000720 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000721
Moshe Zadka8a18e992001-03-01 08:40:42 +0000722 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000723 # TODO(jhylton): Remove the host argument? It depends on whether
724 # retry_http_basic_auth() is consider part of the public API.
725 # It probably is.
726 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000727 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000728 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000729 auth = 'Basic %s' % base64.encodestring(raw).strip()
730 if req.headers.get(self.auth_header, None) == auth:
731 return None
732 req.add_header(self.auth_header, auth)
733 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000734 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000735 return None
736
Moshe Zadka8a18e992001-03-01 08:40:42 +0000737class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000738
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000739 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000740
Moshe Zadka8a18e992001-03-01 08:40:42 +0000741 def http_error_401(self, req, fp, code, msg, headers):
742 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000743 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000744 host, req, headers)
745
746
747class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
748
Brett Cannon783eaf42003-06-17 21:52:34 +0000749 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000750
751 def http_error_407(self, req, fp, code, msg, headers):
752 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000753 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000754 host, req, headers)
755
756
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000757def randombytes(n):
758 """Return n random bytes."""
759 # Use /dev/urandom if it is available. Fall back to random module
760 # if not. It might be worthwhile to extend this function to use
761 # other platform-specific mechanisms for getting random bytes.
762 if os.path.exists("/dev/urandom"):
763 f = open("/dev/urandom")
764 s = f.read(n)
765 f.close()
766 return s
767 else:
768 L = [chr(random.randrange(0, 256)) for i in range(n)]
769 return "".join(L)
770
Moshe Zadka8a18e992001-03-01 08:40:42 +0000771class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000772 # Digest authentication is specified in RFC 2617.
773
774 # XXX The client does not inspect the Authentication-Info header
775 # in a successful response.
776
777 # XXX It should be possible to test this implementation against
778 # a mock server that just generates a static set of challenges.
779
780 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000781
782 def __init__(self, passwd=None):
783 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000784 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000785 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000786 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000787 self.retried = 0
788 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000789
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000790 def reset_retry_count(self):
791 self.retried = 0
792
793 def http_error_auth_reqed(self, auth_header, host, req, headers):
794 authreq = headers.get(auth_header, None)
795 if self.retried > 5:
796 # Don't fail endlessly - if we failed once, we'll probably
797 # fail a second time. Hm. Unless the Password Manager is
798 # prompting for the information. Crap. This isn't great
799 # but it's better than the current 'repeat until recursion
800 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000801 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000802 headers, None)
803 else:
804 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000805 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000806 scheme = authreq.split()[0]
807 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000808 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000809 else:
810 raise ValueError("AbstractDigestAuthHandler doesn't know "
811 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000812
813 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000814 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000815 chal = parse_keqv_list(parse_http_list(challenge))
816 auth = self.get_authorization(req, chal)
817 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000818 auth_val = 'Digest %s' % auth
819 if req.headers.get(self.auth_header, None) == auth_val:
820 return None
821 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000822 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000823 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000824
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000825 def get_cnonce(self, nonce):
826 # The cnonce-value is an opaque
827 # quoted string value provided by the client and used by both client
828 # and server to avoid chosen plaintext attacks, to provide mutual
829 # authentication, and to provide some message integrity protection.
830 # This isn't a fabulous effort, but it's probably Good Enough.
831 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
832 randombytes(8))).hexdigest()
833 return dig[:16]
834
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000835 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000836 try:
837 realm = chal['realm']
838 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000839 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000840 algorithm = chal.get('algorithm', 'MD5')
841 # mod_digest doesn't send an opaque, even though it isn't
842 # supposed to be optional
843 opaque = chal.get('opaque', None)
844 except KeyError:
845 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000846
Fred Drake13a2c272000-02-10 17:17:14 +0000847 H, KD = self.get_algorithm_impls(algorithm)
848 if H is None:
849 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000850
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000851 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000852 if user is None:
853 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000854
Fred Drake13a2c272000-02-10 17:17:14 +0000855 # XXX not implemented yet
856 if req.has_data():
857 entdig = self.get_entity_digest(req.get_data(), chal)
858 else:
859 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000860
Fred Drake13a2c272000-02-10 17:17:14 +0000861 A1 = "%s:%s:%s" % (user, realm, pw)
Johannes Gijsberscdd625a2005-01-09 05:51:49 +0000862 A2 = "%s:%s" % (req.get_method(),
Fred Drake13a2c272000-02-10 17:17:14 +0000863 # XXX selector: what about proxies and full urls
864 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000865 if qop == 'auth':
866 self.nonce_count += 1
867 ncvalue = '%08x' % self.nonce_count
868 cnonce = self.get_cnonce(nonce)
869 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
870 respdig = KD(H(A1), noncebit)
871 elif qop is None:
872 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
873 else:
874 # XXX handle auth-int.
875 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000876
Fred Drake13a2c272000-02-10 17:17:14 +0000877 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000878
Fred Drake13a2c272000-02-10 17:17:14 +0000879 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
880 'response="%s"' % (user, realm, nonce, req.get_selector(),
881 respdig)
882 if opaque:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000883 base += ', opaque="%s"' % opaque
Fred Drake13a2c272000-02-10 17:17:14 +0000884 if entdig:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000885 base += ', digest="%s"' % entdig
886 base += ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000887 if qop:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000888 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000889 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000890
891 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000892 # lambdas assume digest modules are imported at the top level
893 if algorithm == 'MD5':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000894 H = lambda x: md5.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000895 elif algorithm == 'SHA':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000896 H = lambda x: sha.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000897 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000898 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000899 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000900
901 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000902 # XXX not implemented yet
903 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000904
Moshe Zadka8a18e992001-03-01 08:40:42 +0000905
906class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
907 """An authentication protocol defined by RFC 2069
908
909 Digest authentication improves on basic authentication because it
910 does not transmit passwords in the clear.
911 """
912
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000913 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000914
915 def http_error_401(self, req, fp, code, msg, headers):
916 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000917 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000918 host, req, headers)
919 self.reset_retry_count()
920 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000921
922
923class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
924
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000925 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000926
927 def http_error_407(self, req, fp, code, msg, headers):
928 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000929 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000930 host, req, headers)
931 self.reset_retry_count()
932 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000933
Moshe Zadka8a18e992001-03-01 08:40:42 +0000934class AbstractHTTPHandler(BaseHandler):
935
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000936 def __init__(self, debuglevel=0):
937 self._debuglevel = debuglevel
938
939 def set_http_debuglevel(self, level):
940 self._debuglevel = level
941
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000942 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000943 host = request.get_host()
944 if not host:
945 raise URLError('no host given')
946
947 if request.has_data(): # POST
948 data = request.get_data()
949 if not request.has_header('Content-type'):
950 request.add_unredirected_header(
951 'Content-type',
952 'application/x-www-form-urlencoded')
953 if not request.has_header('Content-length'):
954 request.add_unredirected_header(
955 'Content-length', '%d' % len(data))
956
957 scheme, sel = splittype(request.get_selector())
958 sel_host, sel_path = splithost(sel)
959 if not request.has_header('Host'):
960 request.add_unredirected_header('Host', sel_host or host)
961 for name, value in self.parent.addheaders:
962 name = name.capitalize()
963 if not request.has_header(name):
964 request.add_unredirected_header(name, value)
965
966 return request
967
Moshe Zadka8a18e992001-03-01 08:40:42 +0000968 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +0000969 """Return an addinfourl object for the request, using http_class.
970
971 http_class must implement the HTTPConnection API from httplib.
972 The addinfourl return value is a file-like object. It also
973 has methods and attributes including:
974 - info(): return a mimetools.Message object for the headers
975 - geturl(): return the original request URL
976 - code: HTTP status code
977 """
Moshe Zadka76676802001-04-11 07:44:53 +0000978 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000979 if not host:
980 raise URLError('no host given')
981
Jeremy Hylton828023b2003-05-04 23:44:49 +0000982 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000983 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +0000984
Jeremy Hylton023518a2003-12-17 18:52:16 +0000985 headers = dict(req.headers)
986 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000987 # We want to make an HTTP/1.1 request, but the addinfourl
988 # class isn't prepared to deal with a persistent connection.
989 # It will try to read all remaining data from the socket,
990 # which will block while the server waits for the next request.
991 # So make sure the connection gets closed after the (only)
992 # request.
993 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +0000994 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +0000995 h.request(req.get_method(), req.get_selector(), req.data, headers)
996 r = h.getresponse()
997 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +0000998 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000999
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001000 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001001 # object initialized properly.
1002
1003 # Wrap the HTTPResponse object in socket's file object adapter
1004 # for Windows. That adapter calls recv(), so delegate recv()
1005 # to read(). This weird wrapping allows the returned object to
1006 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001007
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001008 # XXX It might be better to extract the read buffering code
1009 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001010
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001011 r.recv = r.read
1012 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001013
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001014 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001015 resp.code = r.status
1016 resp.msg = r.reason
1017 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001018
Moshe Zadka8a18e992001-03-01 08:40:42 +00001019
1020class HTTPHandler(AbstractHTTPHandler):
1021
1022 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001023 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001024
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001025 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001026
1027if hasattr(httplib, 'HTTPS'):
1028 class HTTPSHandler(AbstractHTTPHandler):
1029
1030 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001031 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001032
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001033 https_request = AbstractHTTPHandler.do_request_
1034
1035class HTTPCookieProcessor(BaseHandler):
1036 def __init__(self, cookiejar=None):
1037 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001038 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001039 self.cookiejar = cookiejar
1040
1041 def http_request(self, request):
1042 self.cookiejar.add_cookie_header(request)
1043 return request
1044
1045 def http_response(self, request, response):
1046 self.cookiejar.extract_cookies(response, request)
1047 return response
1048
1049 https_request = http_request
1050 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001051
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001052class UnknownHandler(BaseHandler):
1053 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001054 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001055 raise URLError('unknown url type: %s' % type)
1056
1057def parse_keqv_list(l):
1058 """Parse list of key=value strings where keys are not duplicated."""
1059 parsed = {}
1060 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001061 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001062 if v[0] == '"' and v[-1] == '"':
1063 v = v[1:-1]
1064 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001065 return parsed
1066
1067def parse_http_list(s):
1068 """Parse lists as described by RFC 2068 Section 2.
1069
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001070 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001071 the list may include quoted-strings. A quoted-string could
1072 contain a comma.
1073 """
1074 # XXX this function could probably use more testing
1075
1076 list = []
1077 end = len(s)
1078 i = 0
1079 inquote = 0
1080 start = 0
1081 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +00001082 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001083 c = cur.find(',')
1084 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +00001085 if c == -1:
1086 list.append(s[start:])
1087 break
1088 if q == -1:
1089 if inquote:
1090 raise ValueError, "unbalanced quotes"
1091 else:
1092 list.append(s[start:i+c])
1093 i = i + c + 1
1094 continue
1095 if inquote:
1096 if q < c:
1097 list.append(s[start:i+c])
1098 i = i + c + 1
1099 start = i
1100 inquote = 0
1101 else:
Tim Peterse1190062001-01-15 03:34:38 +00001102 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +00001103 else:
1104 if c < q:
1105 list.append(s[start:i+c])
1106 i = i + c + 1
1107 start = i
1108 else:
1109 inquote = 1
1110 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001111 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001112
1113class FileHandler(BaseHandler):
1114 # Use local file or FTP depending on form of URL
1115 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001116 url = req.get_selector()
1117 if url[:2] == '//' and url[2:3] != '/':
1118 req.type = 'ftp'
1119 return self.parent.open(req)
1120 else:
1121 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001122
1123 # names for the localhost
1124 names = None
1125 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001126 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +00001127 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +00001128 socket.gethostbyname(socket.gethostname()))
1129 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001130
1131 # not entirely sure what the rules are here
1132 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001133 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001134 host = req.get_host()
1135 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001136 localfile = url2pathname(file)
1137 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001138 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001139 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001140 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001141 headers = mimetools.Message(StringIO(
Brett Cannon783eaf42003-06-17 21:52:34 +00001142 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001143 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001144 if host:
1145 host, port = splitport(host)
1146 if not host or \
1147 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001148 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001149 headers, 'file:'+file)
1150 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001151
1152class FTPHandler(BaseHandler):
1153 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001154 host = req.get_host()
1155 if not host:
1156 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001157 host, port = splitport(host)
1158 if port is None:
1159 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001160 else:
1161 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001162
1163 # username/password handling
1164 user, host = splituser(host)
1165 if user:
1166 user, passwd = splitpasswd(user)
1167 else:
1168 passwd = None
1169 host = unquote(host)
1170 user = unquote(user or '')
1171 passwd = unquote(passwd or '')
1172
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001173 try:
1174 host = socket.gethostbyname(host)
1175 except socket.error, msg:
1176 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001177 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001178 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001179 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001180 dirs, file = dirs[:-1], dirs[-1]
1181 if dirs and not dirs[0]:
1182 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001183 try:
1184 fw = self.connect_ftp(user, passwd, host, port, dirs)
1185 type = file and 'I' or 'D'
1186 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001187 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001188 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001189 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001190 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001191 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001192 headers = ""
1193 mtype = mimetypes.guess_type(req.get_full_url())[0]
1194 if mtype:
Brett Cannon783eaf42003-06-17 21:52:34 +00001195 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001196 if retrlen is not None and retrlen >= 0:
Brett Cannon783eaf42003-06-17 21:52:34 +00001197 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001198 sf = StringIO(headers)
1199 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001200 return addinfourl(fp, headers, req.get_full_url())
1201 except ftplib.all_errors, msg:
1202 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001203
1204 def connect_ftp(self, user, passwd, host, port, dirs):
1205 fw = ftpwrapper(user, passwd, host, port, dirs)
1206## fw.ftp.set_debuglevel(1)
1207 return fw
1208
1209class CacheFTPHandler(FTPHandler):
1210 # XXX would be nice to have pluggable cache strategies
1211 # XXX this stuff is definitely not thread safe
1212 def __init__(self):
1213 self.cache = {}
1214 self.timeout = {}
1215 self.soonest = 0
1216 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001217 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001218
1219 def setTimeout(self, t):
1220 self.delay = t
1221
1222 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001223 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001224
1225 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001226 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001227 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001228 self.timeout[key] = time.time() + self.delay
1229 else:
1230 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1231 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001232 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001233 return self.cache[key]
1234
1235 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001236 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001237 t = time.time()
1238 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001239 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001240 if v < t:
1241 self.cache[k].close()
1242 del self.cache[k]
1243 del self.timeout[k]
1244 self.soonest = min(self.timeout.values())
1245
1246 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001247 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001248 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001249 if v == self.soonest:
1250 del self.cache[k]
1251 del self.timeout[k]
1252 break
1253 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001254
1255class GopherHandler(BaseHandler):
1256 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001257 host = req.get_host()
1258 if not host:
1259 raise GopherError('no host given')
1260 host = unquote(host)
1261 selector = req.get_selector()
1262 type, selector = splitgophertype(selector)
1263 selector, query = splitquery(selector)
1264 selector = unquote(selector)
1265 if query:
1266 query = unquote(query)
1267 fp = gopherlib.send_query(selector, query, host)
1268 else:
1269 fp = gopherlib.send_selector(selector, host)
1270 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001271
1272#bleck! don't use this yet
1273class OpenerFactory:
1274
1275 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001276 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001277 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001278 handlers = []
1279 replacement_handlers = []
1280
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001281 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001282 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001283
1284 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001285 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001286
1287 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001288 opener = OpenerDirector()
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +00001289 for ph in self.default_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001290 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001291 ph = ph()
1292 opener.add_handler(ph)