blob: 644b3802d067fbb3ab17306eb5076165978c1515 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000090import base64
91import ftplib
92import gopherlib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000094import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000095import md5
96import mimetypes
97import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000098import os
99import posixpath
100import random
101import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000102import sha
103import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104import sys
105import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000106import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000107import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000108import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000109
110try:
111 from cStringIO import StringIO
112except ImportError:
113 from StringIO import StringIO
114
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000115# not sure how many of these need to be gotten rid of
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000116from urllib import (unwrap, unquote, splittype, splithost,
117 addinfourl, splitport, splitgophertype, splitquery,
118 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000119
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000120# support for FileHandler, proxies via environment variables
121from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000122
Jeremy Hylton023518a2003-12-17 18:52:16 +0000123__version__ = "2.4"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000124
125_opener = None
126def urlopen(url, data=None):
127 global _opener
128 if _opener is None:
129 _opener = build_opener()
130 return _opener.open(url, data)
131
132def install_opener(opener):
133 global _opener
134 _opener = opener
135
136# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000137# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000138# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000139
140class URLError(IOError):
141 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000142 # the implementation. need to override __init__ and __str__.
143 # It sets self.args for compatibility with other EnvironmentError
144 # subclasses, but args doesn't have the typical format with errno in
145 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000146 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000147 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000148 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000149
150 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000151 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153class HTTPError(URLError, addinfourl):
154 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000155 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000156
157 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000158 self.code = code
159 self.msg = msg
160 self.hdrs = hdrs
161 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000162 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000163 # The addinfourl classes depend on fp being a valid file
164 # object. In some cases, the HTTPError may not have a valid
165 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000166 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000167 if fp is not None:
168 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000169
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000170 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000171 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000173class GopherError(URLError):
174 pass
175
Moshe Zadka8a18e992001-03-01 08:40:42 +0000176
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000177class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000178
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000179 def __init__(self, url, data=None, headers={},
180 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000181 # unwrap('<URL:type://host/path>') --> 'type://host/path'
182 self.__original = unwrap(url)
183 self.type = None
184 # self.__r_type is what's left after doing the splittype
185 self.host = None
186 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000187 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000188 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000189 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000190 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000191 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000192 if origin_req_host is None:
193 origin_req_host = cookielib.request_host(self)
194 self.origin_req_host = origin_req_host
195 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000196
197 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000198 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000199 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000200 # too complicated and/or unnecessary.
201 # XXX should the __r_XXX attributes be public?
202 if attr[:12] == '_Request__r_':
203 name = attr[12:]
204 if hasattr(Request, 'get_' + name):
205 getattr(self, 'get_' + name)()
206 return getattr(self, attr)
207 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000208
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000209 def get_method(self):
210 if self.has_data():
211 return "POST"
212 else:
213 return "GET"
214
Jeremy Hylton023518a2003-12-17 18:52:16 +0000215 # XXX these helper methods are lame
216
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000217 def add_data(self, data):
218 self.data = data
219
220 def has_data(self):
221 return self.data is not None
222
223 def get_data(self):
224 return self.data
225
226 def get_full_url(self):
227 return self.__original
228
229 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000230 if self.type is None:
231 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000232 if self.type is None:
233 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000234 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000235
236 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000237 if self.host is None:
238 self.host, self.__r_host = splithost(self.__r_type)
239 if self.host:
240 self.host = unquote(self.host)
241 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000242
243 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000244 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000245
Moshe Zadka8a18e992001-03-01 08:40:42 +0000246 def set_proxy(self, host, type):
247 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000248 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000249
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000250 def get_origin_req_host(self):
251 return self.origin_req_host
252
253 def is_unverifiable(self):
254 return self.unverifiable
255
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000256 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000257 # useful for something like authentication
Brett Cannon86503b12003-05-12 07:29:42 +0000258 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000259
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000260 def add_unredirected_header(self, key, val):
261 # will not be added to a redirected request
262 self.unredirected_hdrs[key.capitalize()] = val
263
264 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000265 return (header_name in self.headers or
266 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000267
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000268 def get_header(self, header_name, default=None):
269 return self.headers.get(
270 header_name,
271 self.unredirected_hdrs.get(header_name, default))
272
273 def header_items(self):
274 hdrs = self.unredirected_hdrs.copy()
275 hdrs.update(self.headers)
276 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000277
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000278class OpenerDirector:
279 def __init__(self):
280 server_version = "Python-urllib/%s" % __version__
Brett Cannon783eaf42003-06-17 21:52:34 +0000281 self.addheaders = [('User-agent', server_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000282 # manage the individual handlers
283 self.handlers = []
284 self.handle_open = {}
285 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000286 self.process_response = {}
287 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000288
289 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000290 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000291 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000292 i = meth.find("_")
293 protocol = meth[:i]
294 condition = meth[i+1:]
295
296 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000297 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000298 kind = meth[j+1:]
299 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000300 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000301 except ValueError:
302 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000303 lookup = self.handle_error.get(protocol, {})
304 self.handle_error[protocol] = lookup
305 elif condition == "open":
306 kind = protocol
307 lookup = getattr(self, "handle_"+condition)
308 elif condition in ["response", "request"]:
309 kind = protocol
310 lookup = getattr(self, "process_"+condition)
311 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000312 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000313
314 handlers = lookup.setdefault(kind, [])
315 if handlers:
316 bisect.insort(handlers, handler)
317 else:
318 handlers.append(handler)
319 added = True
320
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000321 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000322 # XXX why does self.handlers need to be sorted?
323 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000324 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000325
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000326 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000327 # Only exists for backwards compatibility.
328 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000329
330 def _call_chain(self, chain, kind, meth_name, *args):
331 # XXX raise an exception if no one else should try to handle
332 # this url. return None if you can't but someone else could.
333 handlers = chain.get(kind, ())
334 for handler in handlers:
335 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000336
337 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000338 if result is not None:
339 return result
340
341 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000342 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000343 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000344 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345 else:
346 req = fullurl
347 if data is not None:
348 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000349
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000350 protocol = req.get_type()
351
352 # pre-process request
353 meth_name = protocol+"_request"
354 for processor in self.process_request.get(protocol, []):
355 meth = getattr(processor, meth_name)
356 req = meth(req)
357
358 response = self._open(req, data)
359
360 # post-process response
361 meth_name = protocol+"_response"
362 for processor in self.process_response.get(protocol, []):
363 meth = getattr(processor, meth_name)
364 response = meth(req, response)
365
366 return response
367
368 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000369 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000370 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 if result:
372 return result
373
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000374 protocol = req.get_type()
375 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000376 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000377 if result:
378 return result
379
380 return self._call_chain(self.handle_open, 'unknown',
381 'unknown_open', req)
382
383 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000384 if proto in ['http', 'https']:
Fred Draked5214b02001-11-08 17:19:29 +0000385 # XXX http[s] protocols are special-cased
386 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000387 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000388 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000389 http_err = 1
390 orig_args = args
391 else:
392 dict = self.handle_error
393 meth_name = proto + '_error'
394 http_err = 0
395 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000396 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000397 if result:
398 return result
399
400 if http_err:
401 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000402 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000403
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000404# XXX probably also want an abstract factory that knows when it makes
405# sense to skip a superclass in favor of a subclass and when it might
406# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000407
408def build_opener(*handlers):
409 """Create an opener object from a list of handlers.
410
411 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000412 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000413
414 If any of the handlers passed as arguments are subclasses of the
415 default handlers, the default handlers will not be used.
416 """
Tim Peterse1190062001-01-15 03:34:38 +0000417
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000418 opener = OpenerDirector()
419 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
420 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000421 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000422 if hasattr(httplib, 'HTTPS'):
423 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000424 skip = []
425 for klass in default_classes:
426 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000427 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000428 if issubclass(check, klass):
429 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000430 elif isinstance(check, klass):
431 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000432 for klass in skip:
433 default_classes.remove(klass)
434
435 for klass in default_classes:
436 opener.add_handler(klass())
437
438 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000439 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000440 h = h()
441 opener.add_handler(h)
442 return opener
443
444class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000445 handler_order = 500
446
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000447 def add_parent(self, parent):
448 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000449
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000450 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000451 # Only exists for backwards compatibility
452 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000453
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000454 def __lt__(self, other):
455 if not hasattr(other, "handler_order"):
456 # Try to preserve the old behavior of having custom classes
457 # inserted after default ones (works only for custom user
458 # classes which are not aware of handler_order).
459 return True
460 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000461
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000462
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000463class HTTPErrorProcessor(BaseHandler):
464 """Process HTTP error responses."""
465 handler_order = 1000 # after all other processing
466
467 def http_response(self, request, response):
468 code, msg, hdrs = response.code, response.msg, response.info()
469
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000470 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 response = self.parent.error(
472 'http', request, response, code, msg, hdrs)
473
474 return response
475
476 https_response = http_response
477
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000478class HTTPDefaultErrorHandler(BaseHandler):
479 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000480 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000481
482class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000483 # maximum number of redirections to any single URL
484 # this is needed because of the state that cookies introduce
485 max_repeats = 4
486 # maximum total number of redirections (regardless of URL) before
487 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000488 max_redirections = 10
489
Jeremy Hylton03892952003-05-05 04:09:13 +0000490 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000491 """Return a Request or None in response to a redirect.
492
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000493 This is called by the http_error_30x methods when a
494 redirection response is received. If a redirection should
495 take place, return a new Request to allow http_error_30x to
496 perform the redirect. Otherwise, raise HTTPError if no-one
497 else should try to handle this url. Return None if you can't
498 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000499 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000500 m = req.get_method()
501 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000502 or code in (301, 302, 303) and m == "POST"):
503 # Strictly (according to RFC 2616), 301 or 302 in response
504 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000505 # from the user (of urllib2, in this case). In practice,
506 # essentially all clients do redirect in this case, so we
507 # do the same.
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000508 return Request(newurl,
509 headers=req.headers,
510 origin_req_host=req.get_origin_req_host(),
511 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000512 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000513 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000514
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000515 # Implementation note: To avoid the server sending us into an
516 # infinite loop, the request object needs to track what URLs we
517 # have already seen. Do this by adding a handler-specific
518 # attribute to the Request object.
519 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000520 # Some servers (incorrectly) return multiple Location headers
521 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000522 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000523 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000524 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000525 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000526 else:
527 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000528 newurl = urlparse.urljoin(req.get_full_url(), newurl)
529
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000530 # XXX Probably want to forget about the state of the current
531 # request, although that might interact poorly with other
532 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000533 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000534 if new is None:
535 return
536
537 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000538 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000539 if hasattr(req, 'redirect_dict'):
540 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000541 if (visited.get(newurl, 0) >= self.max_repeats or
542 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000543 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000544 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000545 else:
546 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000547 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000548
549 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000550 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000551 fp.read()
552 fp.close()
553
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000554 return self.parent.open(new)
555
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000556 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000557
Martin v. Löwis162f0812003-07-12 07:33:32 +0000558 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000559 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000560 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000561
562class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000563 # Proxies must be in front
564 handler_order = 100
565
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000566 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000567 if proxies is None:
568 proxies = getproxies()
569 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
570 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000571 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000572 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000573 lambda r, proxy=url, type=type, meth=self.proxy_open: \
574 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000575
576 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000577 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000578 type, r_type = splittype(proxy)
579 host, XXX = splithost(r_type)
580 if '@' in host:
581 user_pass, host = host.split('@', 1)
Jeremy Hylton144dea32002-07-07 16:57:35 +0000582 if ':' in user_pass:
583 user, password = user_pass.split(':', 1)
Tim Petersc411dba2002-07-16 21:35:23 +0000584 user_pass = base64.encodestring('%s:%s' % (unquote(user),
Jeremy Hylton144dea32002-07-07 16:57:35 +0000585 unquote(password)))
Brett Cannon783eaf42003-06-17 21:52:34 +0000586 req.add_header('Proxy-authorization', 'Basic ' + user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000587 host = unquote(host)
588 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000589 if orig_type == type:
590 # let other handlers take care of it
591 # XXX this only makes sense if the proxy is before the
592 # other handlers
593 return None
594 else:
595 # need to start over, because the other handlers don't
596 # grok the proxy's URL type
597 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000598
599# feature suggested by Duncan Booth
600# XXX custom is not a good name
601class CustomProxy:
602 # either pass a function to the constructor or override handle
603 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000604 self.proto = proto
605 self.func = func
606 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000607
608 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000609 if self.func and self.func(req):
610 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000611
612 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000613 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000614
615class CustomProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000616 # Proxies must be in front
617 handler_order = 100
618
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000619 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000620 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000621
622 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000623 proto = req.get_type()
624 try:
625 proxies = self.proxies[proto]
626 except KeyError:
627 return None
628 for p in proxies:
629 if p.handle(req):
630 req.set_proxy(p.get_proxy())
631 return self.parent.open(req)
632 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000633
634 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000635 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636
637 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000638 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000639 self.proxies[cpo.proto].append(cpo)
640 else:
641 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000642
643class HTTPPasswordMgr:
644 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000645 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000646
647 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000648 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000649 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000650 uri = [uri]
651 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000652 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000653 self.passwd[realm] = {}
654 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000655
656 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000657 domains = self.passwd.get(realm, {})
658 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000659 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000660 for uri in uris:
661 if self.is_suburi(uri, authuri):
662 return authinfo
663 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000664
665 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000666 """Accept netloc or URI and extract only the netloc and path"""
667 parts = urlparse.urlparse(uri)
668 if parts[1]:
669 return parts[1], parts[2] or '/'
670 else:
671 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000672
673 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000674 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675
Fred Drake13a2c272000-02-10 17:17:14 +0000676 Both args must be URIs in reduced form.
677 """
678 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000679 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000680 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000681 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000682 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000683 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000684 return True
685 return False
Tim Peterse1190062001-01-15 03:34:38 +0000686
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000687
Moshe Zadka8a18e992001-03-01 08:40:42 +0000688class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
689
690 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000691 user, password = HTTPPasswordMgr.find_user_password(self, realm,
692 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000693 if user is not None:
694 return user, password
695 return HTTPPasswordMgr.find_user_password(self, None, authuri)
696
697
698class AbstractBasicAuthHandler:
699
Neal Norwitz853ddd52002-10-09 23:17:04 +0000700 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000701
702 # XXX there can actually be multiple auth-schemes in a
703 # www-authenticate header. should probably be a lot more careful
704 # in parsing them to extract multiple alternatives
705
Moshe Zadka8a18e992001-03-01 08:40:42 +0000706 def __init__(self, password_mgr=None):
707 if password_mgr is None:
708 password_mgr = HTTPPasswordMgr()
709 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000710 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000711
Moshe Zadka8a18e992001-03-01 08:40:42 +0000712 def http_error_auth_reqed(self, authreq, host, req, headers):
713 # XXX could be multiple headers
714 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000715 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000716 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000717 if mo:
718 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000719 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000720 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000721
Moshe Zadka8a18e992001-03-01 08:40:42 +0000722 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000723 user,pw = self.passwd.find_user_password(realm, host)
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000724 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000725 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000726 auth = 'Basic %s' % base64.encodestring(raw).strip()
727 if req.headers.get(self.auth_header, None) == auth:
728 return None
729 req.add_header(self.auth_header, auth)
730 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000731 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000732 return None
733
Moshe Zadka8a18e992001-03-01 08:40:42 +0000734class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000735
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000736 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000737
Moshe Zadka8a18e992001-03-01 08:40:42 +0000738 def http_error_401(self, req, fp, code, msg, headers):
739 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000740 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000741 host, req, headers)
742
743
744class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
745
Brett Cannon783eaf42003-06-17 21:52:34 +0000746 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000747
748 def http_error_407(self, req, fp, code, msg, headers):
749 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000750 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000751 host, req, headers)
752
753
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000754def randombytes(n):
755 """Return n random bytes."""
756 # Use /dev/urandom if it is available. Fall back to random module
757 # if not. It might be worthwhile to extend this function to use
758 # other platform-specific mechanisms for getting random bytes.
759 if os.path.exists("/dev/urandom"):
760 f = open("/dev/urandom")
761 s = f.read(n)
762 f.close()
763 return s
764 else:
765 L = [chr(random.randrange(0, 256)) for i in range(n)]
766 return "".join(L)
767
Moshe Zadka8a18e992001-03-01 08:40:42 +0000768class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000769 # Digest authentication is specified in RFC 2617.
770
771 # XXX The client does not inspect the Authentication-Info header
772 # in a successful response.
773
774 # XXX It should be possible to test this implementation against
775 # a mock server that just generates a static set of challenges.
776
777 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000778
779 def __init__(self, passwd=None):
780 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000781 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000782 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000783 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000784 self.retried = 0
785 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000786
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000787 def reset_retry_count(self):
788 self.retried = 0
789
790 def http_error_auth_reqed(self, auth_header, host, req, headers):
791 authreq = headers.get(auth_header, None)
792 if self.retried > 5:
793 # Don't fail endlessly - if we failed once, we'll probably
794 # fail a second time. Hm. Unless the Password Manager is
795 # prompting for the information. Crap. This isn't great
796 # but it's better than the current 'repeat until recursion
797 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000798 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000799 headers, None)
800 else:
801 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000802 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000803 scheme = authreq.split()[0]
804 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000805 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000806 else:
807 raise ValueError("AbstractDigestAuthHandler doesn't know "
808 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000809
810 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000811 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000812 chal = parse_keqv_list(parse_http_list(challenge))
813 auth = self.get_authorization(req, chal)
814 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000815 auth_val = 'Digest %s' % auth
816 if req.headers.get(self.auth_header, None) == auth_val:
817 return None
818 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000819 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000820 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000821
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000822 def get_cnonce(self, nonce):
823 # The cnonce-value is an opaque
824 # quoted string value provided by the client and used by both client
825 # and server to avoid chosen plaintext attacks, to provide mutual
826 # authentication, and to provide some message integrity protection.
827 # This isn't a fabulous effort, but it's probably Good Enough.
828 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
829 randombytes(8))).hexdigest()
830 return dig[:16]
831
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000832 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000833 try:
834 realm = chal['realm']
835 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000836 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000837 algorithm = chal.get('algorithm', 'MD5')
838 # mod_digest doesn't send an opaque, even though it isn't
839 # supposed to be optional
840 opaque = chal.get('opaque', None)
841 except KeyError:
842 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000843
Fred Drake13a2c272000-02-10 17:17:14 +0000844 H, KD = self.get_algorithm_impls(algorithm)
845 if H is None:
846 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000847
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000848 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000849 if user is None:
850 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000851
Fred Drake13a2c272000-02-10 17:17:14 +0000852 # XXX not implemented yet
853 if req.has_data():
854 entdig = self.get_entity_digest(req.get_data(), chal)
855 else:
856 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000857
Fred Drake13a2c272000-02-10 17:17:14 +0000858 A1 = "%s:%s:%s" % (user, realm, pw)
859 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
860 # XXX selector: what about proxies and full urls
861 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000862 if qop == 'auth':
863 self.nonce_count += 1
864 ncvalue = '%08x' % self.nonce_count
865 cnonce = self.get_cnonce(nonce)
866 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
867 respdig = KD(H(A1), noncebit)
868 elif qop is None:
869 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
870 else:
871 # XXX handle auth-int.
872 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000873
Fred Drake13a2c272000-02-10 17:17:14 +0000874 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000875
Fred Drake13a2c272000-02-10 17:17:14 +0000876 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
877 'response="%s"' % (user, realm, nonce, req.get_selector(),
878 respdig)
879 if opaque:
880 base = base + ', opaque="%s"' % opaque
881 if entdig:
882 base = base + ', digest="%s"' % entdig
883 if algorithm != 'MD5':
884 base = base + ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000885 if qop:
886 base = base + ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000887 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000888
889 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000890 # lambdas assume digest modules are imported at the top level
891 if algorithm == 'MD5':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000892 H = lambda x: md5.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000893 elif algorithm == 'SHA':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000894 H = lambda x: sha.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000895 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000896 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000897 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000898
899 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000900 # XXX not implemented yet
901 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000902
Moshe Zadka8a18e992001-03-01 08:40:42 +0000903
904class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
905 """An authentication protocol defined by RFC 2069
906
907 Digest authentication improves on basic authentication because it
908 does not transmit passwords in the clear.
909 """
910
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000911 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000912
913 def http_error_401(self, req, fp, code, msg, headers):
914 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000915 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000916 host, req, headers)
917 self.reset_retry_count()
918 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000919
920
921class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
922
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000923 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000924
925 def http_error_407(self, req, fp, code, msg, headers):
926 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000927 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000928 host, req, headers)
929 self.reset_retry_count()
930 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000931
Moshe Zadka8a18e992001-03-01 08:40:42 +0000932class AbstractHTTPHandler(BaseHandler):
933
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000934 def __init__(self, debuglevel=0):
935 self._debuglevel = debuglevel
936
937 def set_http_debuglevel(self, level):
938 self._debuglevel = level
939
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000940 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000941 host = request.get_host()
942 if not host:
943 raise URLError('no host given')
944
945 if request.has_data(): # POST
946 data = request.get_data()
947 if not request.has_header('Content-type'):
948 request.add_unredirected_header(
949 'Content-type',
950 'application/x-www-form-urlencoded')
951 if not request.has_header('Content-length'):
952 request.add_unredirected_header(
953 'Content-length', '%d' % len(data))
954
955 scheme, sel = splittype(request.get_selector())
956 sel_host, sel_path = splithost(sel)
957 if not request.has_header('Host'):
958 request.add_unredirected_header('Host', sel_host or host)
959 for name, value in self.parent.addheaders:
960 name = name.capitalize()
961 if not request.has_header(name):
962 request.add_unredirected_header(name, value)
963
964 return request
965
Moshe Zadka8a18e992001-03-01 08:40:42 +0000966 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +0000967 """Return an addinfourl object for the request, using http_class.
968
969 http_class must implement the HTTPConnection API from httplib.
970 The addinfourl return value is a file-like object. It also
971 has methods and attributes including:
972 - info(): return a mimetools.Message object for the headers
973 - geturl(): return the original request URL
974 - code: HTTP status code
975 """
Moshe Zadka76676802001-04-11 07:44:53 +0000976 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000977 if not host:
978 raise URLError('no host given')
979
Jeremy Hylton828023b2003-05-04 23:44:49 +0000980 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000981 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +0000982
Jeremy Hylton023518a2003-12-17 18:52:16 +0000983 headers = dict(req.headers)
984 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000985 # We want to make an HTTP/1.1 request, but the addinfourl
986 # class isn't prepared to deal with a persistent connection.
987 # It will try to read all remaining data from the socket,
988 # which will block while the server waits for the next request.
989 # So make sure the connection gets closed after the (only)
990 # request.
991 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +0000992 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +0000993 h.request(req.get_method(), req.get_selector(), req.data, headers)
994 r = h.getresponse()
995 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +0000996 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000997
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +0000998 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000999 # object initialized properly.
1000
1001 # Wrap the HTTPResponse object in socket's file object adapter
1002 # for Windows. That adapter calls recv(), so delegate recv()
1003 # to read(). This weird wrapping allows the returned object to
1004 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001005
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001006 # XXX It might be better to extract the read buffering code
1007 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001008
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001009 r.recv = r.read
1010 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001011
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001012 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001013 resp.code = r.status
1014 resp.msg = r.reason
1015 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001016
Moshe Zadka8a18e992001-03-01 08:40:42 +00001017
1018class HTTPHandler(AbstractHTTPHandler):
1019
1020 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001021 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001022
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001023 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001024
1025if hasattr(httplib, 'HTTPS'):
1026 class HTTPSHandler(AbstractHTTPHandler):
1027
1028 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001029 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001030
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001031 https_request = AbstractHTTPHandler.do_request_
1032
1033class HTTPCookieProcessor(BaseHandler):
1034 def __init__(self, cookiejar=None):
1035 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001036 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001037 self.cookiejar = cookiejar
1038
1039 def http_request(self, request):
1040 self.cookiejar.add_cookie_header(request)
1041 return request
1042
1043 def http_response(self, request, response):
1044 self.cookiejar.extract_cookies(response, request)
1045 return response
1046
1047 https_request = http_request
1048 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001049
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001050class UnknownHandler(BaseHandler):
1051 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001052 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001053 raise URLError('unknown url type: %s' % type)
1054
1055def parse_keqv_list(l):
1056 """Parse list of key=value strings where keys are not duplicated."""
1057 parsed = {}
1058 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001059 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001060 if v[0] == '"' and v[-1] == '"':
1061 v = v[1:-1]
1062 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001063 return parsed
1064
1065def parse_http_list(s):
1066 """Parse lists as described by RFC 2068 Section 2.
1067
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001068 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001069 the list may include quoted-strings. A quoted-string could
1070 contain a comma.
1071 """
1072 # XXX this function could probably use more testing
1073
1074 list = []
1075 end = len(s)
1076 i = 0
1077 inquote = 0
1078 start = 0
1079 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +00001080 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001081 c = cur.find(',')
1082 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +00001083 if c == -1:
1084 list.append(s[start:])
1085 break
1086 if q == -1:
1087 if inquote:
1088 raise ValueError, "unbalanced quotes"
1089 else:
1090 list.append(s[start:i+c])
1091 i = i + c + 1
1092 continue
1093 if inquote:
1094 if q < c:
1095 list.append(s[start:i+c])
1096 i = i + c + 1
1097 start = i
1098 inquote = 0
1099 else:
Tim Peterse1190062001-01-15 03:34:38 +00001100 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +00001101 else:
1102 if c < q:
1103 list.append(s[start:i+c])
1104 i = i + c + 1
1105 start = i
1106 else:
1107 inquote = 1
1108 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001109 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001110
1111class FileHandler(BaseHandler):
1112 # Use local file or FTP depending on form of URL
1113 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001114 url = req.get_selector()
1115 if url[:2] == '//' and url[2:3] != '/':
1116 req.type = 'ftp'
1117 return self.parent.open(req)
1118 else:
1119 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001120
1121 # names for the localhost
1122 names = None
1123 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001124 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +00001125 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +00001126 socket.gethostbyname(socket.gethostname()))
1127 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001128
1129 # not entirely sure what the rules are here
1130 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001131 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001132 host = req.get_host()
1133 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001134 localfile = url2pathname(file)
1135 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001136 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001137 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001138 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001139 headers = mimetools.Message(StringIO(
Brett Cannon783eaf42003-06-17 21:52:34 +00001140 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001141 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001142 if host:
1143 host, port = splitport(host)
1144 if not host or \
1145 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001146 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001147 headers, 'file:'+file)
1148 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001149
1150class FTPHandler(BaseHandler):
1151 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001152 host = req.get_host()
1153 if not host:
1154 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001155 host, port = splitport(host)
1156 if port is None:
1157 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001158 else:
1159 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001160
1161 # username/password handling
1162 user, host = splituser(host)
1163 if user:
1164 user, passwd = splitpasswd(user)
1165 else:
1166 passwd = None
1167 host = unquote(host)
1168 user = unquote(user or '')
1169 passwd = unquote(passwd or '')
1170
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001171 try:
1172 host = socket.gethostbyname(host)
1173 except socket.error, msg:
1174 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001175 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001176 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001177 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001178 dirs, file = dirs[:-1], dirs[-1]
1179 if dirs and not dirs[0]:
1180 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001181 try:
1182 fw = self.connect_ftp(user, passwd, host, port, dirs)
1183 type = file and 'I' or 'D'
1184 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001185 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001186 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001187 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001188 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001189 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001190 headers = ""
1191 mtype = mimetypes.guess_type(req.get_full_url())[0]
1192 if mtype:
Brett Cannon783eaf42003-06-17 21:52:34 +00001193 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001194 if retrlen is not None and retrlen >= 0:
Brett Cannon783eaf42003-06-17 21:52:34 +00001195 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001196 sf = StringIO(headers)
1197 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001198 return addinfourl(fp, headers, req.get_full_url())
1199 except ftplib.all_errors, msg:
1200 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001201
1202 def connect_ftp(self, user, passwd, host, port, dirs):
1203 fw = ftpwrapper(user, passwd, host, port, dirs)
1204## fw.ftp.set_debuglevel(1)
1205 return fw
1206
1207class CacheFTPHandler(FTPHandler):
1208 # XXX would be nice to have pluggable cache strategies
1209 # XXX this stuff is definitely not thread safe
1210 def __init__(self):
1211 self.cache = {}
1212 self.timeout = {}
1213 self.soonest = 0
1214 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001215 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001216
1217 def setTimeout(self, t):
1218 self.delay = t
1219
1220 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001221 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001222
1223 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001224 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001225 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001226 self.timeout[key] = time.time() + self.delay
1227 else:
1228 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1229 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001230 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001231 return self.cache[key]
1232
1233 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001234 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001235 t = time.time()
1236 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001237 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001238 if v < t:
1239 self.cache[k].close()
1240 del self.cache[k]
1241 del self.timeout[k]
1242 self.soonest = min(self.timeout.values())
1243
1244 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001245 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001246 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001247 if v == self.soonest:
1248 del self.cache[k]
1249 del self.timeout[k]
1250 break
1251 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001252
1253class GopherHandler(BaseHandler):
1254 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001255 host = req.get_host()
1256 if not host:
1257 raise GopherError('no host given')
1258 host = unquote(host)
1259 selector = req.get_selector()
1260 type, selector = splitgophertype(selector)
1261 selector, query = splitquery(selector)
1262 selector = unquote(selector)
1263 if query:
1264 query = unquote(query)
1265 fp = gopherlib.send_query(selector, query, host)
1266 else:
1267 fp = gopherlib.send_selector(selector, host)
1268 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001269
1270#bleck! don't use this yet
1271class OpenerFactory:
1272
1273 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001274 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001275 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001276 handlers = []
1277 replacement_handlers = []
1278
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001279 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001280 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001281
1282 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001283 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001284
1285 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001286 opener = OpenerDirector()
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +00001287 for ph in self.default_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001288 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001289 ph = ph()
1290 opener.add_handler(ph)