blob: ec01c8fd93fe2636474bc31d752565d97f3c9a8d [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
Georg Brandlc5ffd912006-04-02 20:48:11 +000017urlopen(url, data=None) -- basic usage is the same as original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000018urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080# ftp errors aren't handled cleanly
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000081# check digest against correct (i.e. non-apache) implementation
82
Georg Brandlc5ffd912006-04-02 20:48:11 +000083# Possible extensions:
84# complex proxies XXX not sure what exactly was meant by this
85# abstract factory for opener
86
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000087import base64
88import ftplib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000089import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000090import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000091import md5
92import mimetypes
93import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000094import os
95import posixpath
96import random
97import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000098import sha
99import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000100import sys
101import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000102import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000103import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000104import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000105
106try:
107 from cStringIO import StringIO
108except ImportError:
109 from StringIO import StringIO
110
Georg Brandl7fff58c2006-04-02 21:13:13 +0000111from urllib import (unwrap, unquote, splittype, splithost, quote,
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000112 addinfourl, splitport, splitgophertype, splitquery,
113 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000114
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000115# support for FileHandler, proxies via environment variables
116from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000117
Georg Brandl720096a2006-04-02 20:45:34 +0000118# used in User-Agent header sent
119__version__ = sys.version[:3]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000120
121_opener = None
122def urlopen(url, data=None):
123 global _opener
124 if _opener is None:
125 _opener = build_opener()
126 return _opener.open(url, data)
127
128def install_opener(opener):
129 global _opener
130 _opener = opener
131
132# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000133# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000134# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000135
136class URLError(IOError):
137 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000138 # the implementation. need to override __init__ and __str__.
139 # It sets self.args for compatibility with other EnvironmentError
140 # subclasses, but args doesn't have the typical format with errno in
141 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000142 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000143 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000144 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000145
146 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000147 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000148
149class HTTPError(URLError, addinfourl):
150 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000151 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000154 self.code = code
155 self.msg = msg
156 self.hdrs = hdrs
157 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000158 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000159 # The addinfourl classes depend on fp being a valid file
160 # object. In some cases, the HTTPError may not have a valid
161 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000162 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000163 if fp is not None:
164 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000165
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000166 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000167 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000168
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000169class GopherError(URLError):
170 pass
171
Moshe Zadka8a18e992001-03-01 08:40:42 +0000172
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000173class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000174
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000175 def __init__(self, url, data=None, headers={},
176 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000177 # unwrap('<URL:type://host/path>') --> 'type://host/path'
178 self.__original = unwrap(url)
179 self.type = None
180 # self.__r_type is what's left after doing the splittype
181 self.host = None
182 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000183 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000184 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000185 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000186 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000187 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000188 if origin_req_host is None:
189 origin_req_host = cookielib.request_host(self)
190 self.origin_req_host = origin_req_host
191 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000192
193 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000194 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000195 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000196 # too complicated and/or unnecessary.
197 # XXX should the __r_XXX attributes be public?
198 if attr[:12] == '_Request__r_':
199 name = attr[12:]
200 if hasattr(Request, 'get_' + name):
201 getattr(self, 'get_' + name)()
202 return getattr(self, attr)
203 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000204
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000205 def get_method(self):
206 if self.has_data():
207 return "POST"
208 else:
209 return "GET"
210
Jeremy Hylton023518a2003-12-17 18:52:16 +0000211 # XXX these helper methods are lame
212
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000213 def add_data(self, data):
214 self.data = data
215
216 def has_data(self):
217 return self.data is not None
218
219 def get_data(self):
220 return self.data
221
222 def get_full_url(self):
223 return self.__original
224
225 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000226 if self.type is None:
227 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000228 if self.type is None:
229 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000230 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000231
232 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000233 if self.host is None:
234 self.host, self.__r_host = splithost(self.__r_type)
235 if self.host:
236 self.host = unquote(self.host)
237 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000238
239 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000240 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000241
Moshe Zadka8a18e992001-03-01 08:40:42 +0000242 def set_proxy(self, host, type):
243 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000244 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000245
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000246 def get_origin_req_host(self):
247 return self.origin_req_host
248
249 def is_unverifiable(self):
250 return self.unverifiable
251
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000252 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000253 # useful for something like authentication
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000254 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000255
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000256 def add_unredirected_header(self, key, val):
257 # will not be added to a redirected request
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000258 self.unredirected_hdrs[key.capitalize()] = val
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259
260 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000261 return (header_name in self.headers or
262 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000263
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000264 def get_header(self, header_name, default=None):
265 return self.headers.get(
266 header_name,
267 self.unredirected_hdrs.get(header_name, default))
268
269 def header_items(self):
270 hdrs = self.unredirected_hdrs.copy()
271 hdrs.update(self.headers)
272 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000273
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000274class OpenerDirector:
275 def __init__(self):
Georg Brandl8d457c72005-06-26 22:01:35 +0000276 client_version = "Python-urllib/%s" % __version__
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000277 self.addheaders = [('User-agent', client_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000278 # manage the individual handlers
279 self.handlers = []
280 self.handle_open = {}
281 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000282 self.process_response = {}
283 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000284
285 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000286 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000287 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000288 i = meth.find("_")
289 protocol = meth[:i]
290 condition = meth[i+1:]
291
292 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000293 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000294 kind = meth[j+1:]
295 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000296 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000297 except ValueError:
298 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000299 lookup = self.handle_error.get(protocol, {})
300 self.handle_error[protocol] = lookup
301 elif condition == "open":
302 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000303 lookup = self.handle_open
304 elif condition == "response":
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000305 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000306 lookup = self.process_response
307 elif condition == "request":
308 kind = protocol
309 lookup = self.process_request
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000310 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000311 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000312
313 handlers = lookup.setdefault(kind, [])
314 if handlers:
315 bisect.insort(handlers, handler)
316 else:
317 handlers.append(handler)
318 added = True
319
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000320 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000321 # XXX why does self.handlers need to be sorted?
322 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000324
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000325 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000326 # Only exists for backwards compatibility.
327 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000328
329 def _call_chain(self, chain, kind, meth_name, *args):
Georg Brandlc5ffd912006-04-02 20:48:11 +0000330 # Handlers raise an exception if no one else should try to handle
331 # the request, or return None if they can't but another handler
332 # could. Otherwise, they return the response.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000333 handlers = chain.get(kind, ())
334 for handler in handlers:
335 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000336
337 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000338 if result is not None:
339 return result
340
341 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000342 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000343 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000344 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000345 else:
346 req = fullurl
347 if data is not None:
348 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000349
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000350 protocol = req.get_type()
351
352 # pre-process request
353 meth_name = protocol+"_request"
354 for processor in self.process_request.get(protocol, []):
355 meth = getattr(processor, meth_name)
356 req = meth(req)
357
358 response = self._open(req, data)
359
360 # post-process response
361 meth_name = protocol+"_response"
362 for processor in self.process_response.get(protocol, []):
363 meth = getattr(processor, meth_name)
364 response = meth(req, response)
365
366 return response
367
368 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000369 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000370 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000371 if result:
372 return result
373
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000374 protocol = req.get_type()
375 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000376 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000377 if result:
378 return result
379
380 return self._call_chain(self.handle_open, 'unknown',
381 'unknown_open', req)
382
383 def error(self, proto, *args):
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000384 if proto in ('http', 'https'):
Fred Draked5214b02001-11-08 17:19:29 +0000385 # XXX http[s] protocols are special-cased
386 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000387 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000388 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000389 http_err = 1
390 orig_args = args
391 else:
392 dict = self.handle_error
393 meth_name = proto + '_error'
394 http_err = 0
395 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000396 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000397 if result:
398 return result
399
400 if http_err:
401 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000402 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000403
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000404# XXX probably also want an abstract factory that knows when it makes
405# sense to skip a superclass in favor of a subclass and when it might
406# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000407
408def build_opener(*handlers):
409 """Create an opener object from a list of handlers.
410
411 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000412 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000413
414 If any of the handlers passed as arguments are subclasses of the
415 default handlers, the default handlers will not be used.
416 """
Tim Peterse1190062001-01-15 03:34:38 +0000417
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000418 opener = OpenerDirector()
419 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
420 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000421 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000422 if hasattr(httplib, 'HTTPS'):
423 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000424 skip = []
425 for klass in default_classes:
426 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000427 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000428 if issubclass(check, klass):
429 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000430 elif isinstance(check, klass):
431 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000432 for klass in skip:
433 default_classes.remove(klass)
434
435 for klass in default_classes:
436 opener.add_handler(klass())
437
438 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000439 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000440 h = h()
441 opener.add_handler(h)
442 return opener
443
444class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000445 handler_order = 500
446
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000447 def add_parent(self, parent):
448 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000449
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000450 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000451 # Only exists for backwards compatibility
452 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000453
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000454 def __lt__(self, other):
455 if not hasattr(other, "handler_order"):
456 # Try to preserve the old behavior of having custom classes
457 # inserted after default ones (works only for custom user
458 # classes which are not aware of handler_order).
459 return True
460 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000461
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000462
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000463class HTTPErrorProcessor(BaseHandler):
464 """Process HTTP error responses."""
465 handler_order = 1000 # after all other processing
466
467 def http_response(self, request, response):
468 code, msg, hdrs = response.code, response.msg, response.info()
469
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000470 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471 response = self.parent.error(
472 'http', request, response, code, msg, hdrs)
473
474 return response
475
476 https_response = http_response
477
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000478class HTTPDefaultErrorHandler(BaseHandler):
479 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000480 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000481
482class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000483 # maximum number of redirections to any single URL
484 # this is needed because of the state that cookies introduce
485 max_repeats = 4
486 # maximum total number of redirections (regardless of URL) before
487 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000488 max_redirections = 10
489
Jeremy Hylton03892952003-05-05 04:09:13 +0000490 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000491 """Return a Request or None in response to a redirect.
492
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000493 This is called by the http_error_30x methods when a
494 redirection response is received. If a redirection should
495 take place, return a new Request to allow http_error_30x to
496 perform the redirect. Otherwise, raise HTTPError if no-one
497 else should try to handle this url. Return None if you can't
498 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000499 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000500 m = req.get_method()
501 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000502 or code in (301, 302, 303) and m == "POST"):
503 # Strictly (according to RFC 2616), 301 or 302 in response
504 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000505 # from the user (of urllib2, in this case). In practice,
506 # essentially all clients do redirect in this case, so we
507 # do the same.
Georg Brandlddb84d72006-03-18 11:35:18 +0000508 # be conciliant with URIs containing a space
509 newurl = newurl.replace(' ', '%20')
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000510 return Request(newurl,
511 headers=req.headers,
512 origin_req_host=req.get_origin_req_host(),
513 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000514 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000515 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000516
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000517 # Implementation note: To avoid the server sending us into an
518 # infinite loop, the request object needs to track what URLs we
519 # have already seen. Do this by adding a handler-specific
520 # attribute to the Request object.
521 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000522 # Some servers (incorrectly) return multiple Location headers
523 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000524 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000525 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000526 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000527 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000528 else:
529 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000530 newurl = urlparse.urljoin(req.get_full_url(), newurl)
531
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000532 # XXX Probably want to forget about the state of the current
533 # request, although that might interact poorly with other
534 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000535 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000536 if new is None:
537 return
538
539 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000540 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000541 if hasattr(req, 'redirect_dict'):
542 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000543 if (visited.get(newurl, 0) >= self.max_repeats or
544 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000545 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000546 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000547 else:
548 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000549 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000550
551 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000552 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000553 fp.read()
554 fp.close()
555
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000556 return self.parent.open(new)
557
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000558 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000559
Martin v. Löwis162f0812003-07-12 07:33:32 +0000560 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000561 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000562 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000563
Georg Brandl720096a2006-04-02 20:45:34 +0000564
565def _parse_proxy(proxy):
566 """Return (scheme, user, password, host/port) given a URL or an authority.
567
568 If a URL is supplied, it must have an authority (host:port) component.
569 According to RFC 3986, having an authority component means the URL must
570 have two slashes after the scheme:
571
572 >>> _parse_proxy('file:/ftp.example.com/')
573 Traceback (most recent call last):
574 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
575
576 The first three items of the returned tuple may be None.
577
578 Examples of authority parsing:
579
580 >>> _parse_proxy('proxy.example.com')
581 (None, None, None, 'proxy.example.com')
582 >>> _parse_proxy('proxy.example.com:3128')
583 (None, None, None, 'proxy.example.com:3128')
584
585 The authority component may optionally include userinfo (assumed to be
586 username:password):
587
588 >>> _parse_proxy('joe:password@proxy.example.com')
589 (None, 'joe', 'password', 'proxy.example.com')
590 >>> _parse_proxy('joe:password@proxy.example.com:3128')
591 (None, 'joe', 'password', 'proxy.example.com:3128')
592
593 Same examples, but with URLs instead:
594
595 >>> _parse_proxy('http://proxy.example.com/')
596 ('http', None, None, 'proxy.example.com')
597 >>> _parse_proxy('http://proxy.example.com:3128/')
598 ('http', None, None, 'proxy.example.com:3128')
599 >>> _parse_proxy('http://joe:password@proxy.example.com/')
600 ('http', 'joe', 'password', 'proxy.example.com')
601 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
602 ('http', 'joe', 'password', 'proxy.example.com:3128')
603
604 Everything after the authority is ignored:
605
606 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
607 ('ftp', 'joe', 'password', 'proxy.example.com')
608
609 Test for no trailing '/' case:
610
611 >>> _parse_proxy('http://joe:password@proxy.example.com')
612 ('http', 'joe', 'password', 'proxy.example.com')
613
614 """
615 from urlparse import _splitnetloc
616 scheme, r_scheme = splittype(proxy)
617 if not r_scheme.startswith("/"):
618 # authority
619 scheme = None
620 authority = proxy
621 else:
622 # URL
623 if not r_scheme.startswith("//"):
624 raise ValueError("proxy URL with no authority: %r" % proxy)
625 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
626 # and 3.3.), path is empty or starts with '/'
627 end = r_scheme.find("/", 2)
628 if end == -1:
629 end = None
630 authority = r_scheme[2:end]
631 userinfo, hostport = splituser(authority)
632 if userinfo is not None:
633 user, password = splitpasswd(userinfo)
634 else:
635 user = password = None
636 return scheme, user, password, hostport
637
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000638class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000639 # Proxies must be in front
640 handler_order = 100
641
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000642 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000643 if proxies is None:
644 proxies = getproxies()
645 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
646 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000647 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000648 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000649 lambda r, proxy=url, type=type, meth=self.proxy_open: \
650 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000651
652 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000653 orig_type = req.get_type()
Georg Brandl720096a2006-04-02 20:45:34 +0000654 proxy_type, user, password, hostport = _parse_proxy(proxy)
655 if proxy_type is None:
656 proxy_type = orig_type
Georg Brandl531ceba2006-01-21 07:20:56 +0000657 if user and password:
Georg Brandl720096a2006-04-02 20:45:34 +0000658 user_pass = '%s:%s' % (unquote(user), unquote(password))
659 creds = base64.encodestring(user_pass).strip()
660 req.add_header('Proxy-authorization', 'Basic ' + creds)
661 hostport = unquote(hostport)
662 req.set_proxy(hostport, proxy_type)
663 if orig_type == proxy_type:
Fred Drake13a2c272000-02-10 17:17:14 +0000664 # let other handlers take care of it
Fred Drake13a2c272000-02-10 17:17:14 +0000665 return None
666 else:
667 # need to start over, because the other handlers don't
668 # grok the proxy's URL type
Georg Brandl720096a2006-04-02 20:45:34 +0000669 # e.g. if we have a constructor arg proxies like so:
670 # {'http': 'ftp://proxy.example.com'}, we may end up turning
671 # a request for http://acme.example.com/a into one for
672 # ftp://proxy.example.com/a
Fred Drake13a2c272000-02-10 17:17:14 +0000673 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000674
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675class HTTPPasswordMgr:
676 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000677 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000678
679 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000680 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000681 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000682 uri = [uri]
683 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000684 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000685 self.passwd[realm] = {}
686 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000687
688 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000689 domains = self.passwd.get(realm, {})
690 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000691 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000692 for uri in uris:
693 if self.is_suburi(uri, authuri):
694 return authinfo
695 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000696
697 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000698 """Accept netloc or URI and extract only the netloc and path"""
699 parts = urlparse.urlparse(uri)
700 if parts[1]:
701 return parts[1], parts[2] or '/'
702 else:
703 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000704
705 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000706 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000707
Fred Drake13a2c272000-02-10 17:17:14 +0000708 Both args must be URIs in reduced form.
709 """
710 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000711 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000712 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000713 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000714 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000715 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000716 return True
717 return False
Tim Peterse1190062001-01-15 03:34:38 +0000718
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000719
Moshe Zadka8a18e992001-03-01 08:40:42 +0000720class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
721
722 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000723 user, password = HTTPPasswordMgr.find_user_password(self, realm,
724 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000725 if user is not None:
726 return user, password
727 return HTTPPasswordMgr.find_user_password(self, None, authuri)
728
729
730class AbstractBasicAuthHandler:
731
Neal Norwitz853ddd52002-10-09 23:17:04 +0000732 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000733
734 # XXX there can actually be multiple auth-schemes in a
735 # www-authenticate header. should probably be a lot more careful
736 # in parsing them to extract multiple alternatives
737
Moshe Zadka8a18e992001-03-01 08:40:42 +0000738 def __init__(self, password_mgr=None):
739 if password_mgr is None:
740 password_mgr = HTTPPasswordMgr()
741 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000742 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000743
Moshe Zadka8a18e992001-03-01 08:40:42 +0000744 def http_error_auth_reqed(self, authreq, host, req, headers):
745 # XXX could be multiple headers
746 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000747 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000748 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000749 if mo:
750 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000751 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000752 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000753
Moshe Zadka8a18e992001-03-01 08:40:42 +0000754 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000755 # TODO(jhylton): Remove the host argument? It depends on whether
756 # retry_http_basic_auth() is consider part of the public API.
757 # It probably is.
758 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000759 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000760 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000761 auth = 'Basic %s' % base64.encodestring(raw).strip()
762 if req.headers.get(self.auth_header, None) == auth:
763 return None
764 req.add_header(self.auth_header, auth)
765 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000766 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000767 return None
768
Moshe Zadka8a18e992001-03-01 08:40:42 +0000769class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000770
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000771 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000772
Moshe Zadka8a18e992001-03-01 08:40:42 +0000773 def http_error_401(self, req, fp, code, msg, headers):
774 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000775 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000776 host, req, headers)
777
778
779class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
780
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000781 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000782
783 def http_error_407(self, req, fp, code, msg, headers):
784 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000785 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000786 host, req, headers)
787
788
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000789def randombytes(n):
790 """Return n random bytes."""
791 # Use /dev/urandom if it is available. Fall back to random module
792 # if not. It might be worthwhile to extend this function to use
793 # other platform-specific mechanisms for getting random bytes.
794 if os.path.exists("/dev/urandom"):
795 f = open("/dev/urandom")
796 s = f.read(n)
797 f.close()
798 return s
799 else:
800 L = [chr(random.randrange(0, 256)) for i in range(n)]
801 return "".join(L)
802
Moshe Zadka8a18e992001-03-01 08:40:42 +0000803class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000804 # Digest authentication is specified in RFC 2617.
805
806 # XXX The client does not inspect the Authentication-Info header
807 # in a successful response.
808
809 # XXX It should be possible to test this implementation against
810 # a mock server that just generates a static set of challenges.
811
812 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000813
814 def __init__(self, passwd=None):
815 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000816 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000817 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000818 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000819 self.retried = 0
820 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000821
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000822 def reset_retry_count(self):
823 self.retried = 0
824
825 def http_error_auth_reqed(self, auth_header, host, req, headers):
826 authreq = headers.get(auth_header, None)
827 if self.retried > 5:
828 # Don't fail endlessly - if we failed once, we'll probably
829 # fail a second time. Hm. Unless the Password Manager is
830 # prompting for the information. Crap. This isn't great
831 # but it's better than the current 'repeat until recursion
832 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000833 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000834 headers, None)
835 else:
836 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000837 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000838 scheme = authreq.split()[0]
839 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000840 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000841 else:
842 raise ValueError("AbstractDigestAuthHandler doesn't know "
843 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000844
845 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000846 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000847 chal = parse_keqv_list(parse_http_list(challenge))
848 auth = self.get_authorization(req, chal)
849 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000850 auth_val = 'Digest %s' % auth
851 if req.headers.get(self.auth_header, None) == auth_val:
852 return None
853 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000854 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000855 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000856
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000857 def get_cnonce(self, nonce):
858 # The cnonce-value is an opaque
859 # quoted string value provided by the client and used by both client
860 # and server to avoid chosen plaintext attacks, to provide mutual
861 # authentication, and to provide some message integrity protection.
862 # This isn't a fabulous effort, but it's probably Good Enough.
863 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
864 randombytes(8))).hexdigest()
865 return dig[:16]
866
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000867 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000868 try:
869 realm = chal['realm']
870 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000871 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000872 algorithm = chal.get('algorithm', 'MD5')
873 # mod_digest doesn't send an opaque, even though it isn't
874 # supposed to be optional
875 opaque = chal.get('opaque', None)
876 except KeyError:
877 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000878
Fred Drake13a2c272000-02-10 17:17:14 +0000879 H, KD = self.get_algorithm_impls(algorithm)
880 if H is None:
881 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000882
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000883 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000884 if user is None:
885 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000886
Fred Drake13a2c272000-02-10 17:17:14 +0000887 # XXX not implemented yet
888 if req.has_data():
889 entdig = self.get_entity_digest(req.get_data(), chal)
890 else:
891 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000892
Fred Drake13a2c272000-02-10 17:17:14 +0000893 A1 = "%s:%s:%s" % (user, realm, pw)
Johannes Gijsberscdd625a2005-01-09 05:51:49 +0000894 A2 = "%s:%s" % (req.get_method(),
Fred Drake13a2c272000-02-10 17:17:14 +0000895 # XXX selector: what about proxies and full urls
896 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000897 if qop == 'auth':
898 self.nonce_count += 1
899 ncvalue = '%08x' % self.nonce_count
900 cnonce = self.get_cnonce(nonce)
901 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
902 respdig = KD(H(A1), noncebit)
903 elif qop is None:
904 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
905 else:
906 # XXX handle auth-int.
907 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000908
Fred Drake13a2c272000-02-10 17:17:14 +0000909 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000910
Fred Drake13a2c272000-02-10 17:17:14 +0000911 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
912 'response="%s"' % (user, realm, nonce, req.get_selector(),
913 respdig)
914 if opaque:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000915 base += ', opaque="%s"' % opaque
Fred Drake13a2c272000-02-10 17:17:14 +0000916 if entdig:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000917 base += ', digest="%s"' % entdig
918 base += ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000919 if qop:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000920 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000921 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000922
923 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000924 # lambdas assume digest modules are imported at the top level
925 if algorithm == 'MD5':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000926 H = lambda x: md5.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000927 elif algorithm == 'SHA':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000928 H = lambda x: sha.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000929 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000930 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000931 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000932
933 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000934 # XXX not implemented yet
935 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000936
Moshe Zadka8a18e992001-03-01 08:40:42 +0000937
938class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
939 """An authentication protocol defined by RFC 2069
940
941 Digest authentication improves on basic authentication because it
942 does not transmit passwords in the clear.
943 """
944
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000945 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000946
947 def http_error_401(self, req, fp, code, msg, headers):
948 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000949 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000950 host, req, headers)
951 self.reset_retry_count()
952 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000953
954
955class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
956
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000957 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000958
959 def http_error_407(self, req, fp, code, msg, headers):
960 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000961 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000962 host, req, headers)
963 self.reset_retry_count()
964 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000965
Moshe Zadka8a18e992001-03-01 08:40:42 +0000966class AbstractHTTPHandler(BaseHandler):
967
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000968 def __init__(self, debuglevel=0):
969 self._debuglevel = debuglevel
970
971 def set_http_debuglevel(self, level):
972 self._debuglevel = level
973
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000974 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000975 host = request.get_host()
976 if not host:
977 raise URLError('no host given')
978
979 if request.has_data(): # POST
980 data = request.get_data()
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000981 if not request.has_header('Content-type'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000982 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000983 'Content-type',
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000984 'application/x-www-form-urlencoded')
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000985 if not request.has_header('Content-length'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000986 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000987 'Content-length', '%d' % len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000988
989 scheme, sel = splittype(request.get_selector())
990 sel_host, sel_path = splithost(sel)
991 if not request.has_header('Host'):
992 request.add_unredirected_header('Host', sel_host or host)
993 for name, value in self.parent.addheaders:
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000994 name = name.capitalize()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000995 if not request.has_header(name):
996 request.add_unredirected_header(name, value)
997
998 return request
999
Moshe Zadka8a18e992001-03-01 08:40:42 +00001000 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001001 """Return an addinfourl object for the request, using http_class.
1002
1003 http_class must implement the HTTPConnection API from httplib.
1004 The addinfourl return value is a file-like object. It also
1005 has methods and attributes including:
1006 - info(): return a mimetools.Message object for the headers
1007 - geturl(): return the original request URL
1008 - code: HTTP status code
1009 """
Moshe Zadka76676802001-04-11 07:44:53 +00001010 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001011 if not host:
1012 raise URLError('no host given')
1013
Jeremy Hylton828023b2003-05-04 23:44:49 +00001014 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001015 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +00001016
Jeremy Hylton023518a2003-12-17 18:52:16 +00001017 headers = dict(req.headers)
1018 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +00001019 # We want to make an HTTP/1.1 request, but the addinfourl
1020 # class isn't prepared to deal with a persistent connection.
1021 # It will try to read all remaining data from the socket,
1022 # which will block while the server waits for the next request.
1023 # So make sure the connection gets closed after the (only)
1024 # request.
1025 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +00001026 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +00001027 h.request(req.get_method(), req.get_selector(), req.data, headers)
1028 r = h.getresponse()
1029 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +00001030 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001031
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001032 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001033 # object initialized properly.
1034
1035 # Wrap the HTTPResponse object in socket's file object adapter
1036 # for Windows. That adapter calls recv(), so delegate recv()
1037 # to read(). This weird wrapping allows the returned object to
1038 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001039
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001040 # XXX It might be better to extract the read buffering code
1041 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001042
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001043 r.recv = r.read
1044 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001045
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001046 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001047 resp.code = r.status
1048 resp.msg = r.reason
1049 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001050
Moshe Zadka8a18e992001-03-01 08:40:42 +00001051
1052class HTTPHandler(AbstractHTTPHandler):
1053
1054 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001055 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001056
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001057 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001058
1059if hasattr(httplib, 'HTTPS'):
1060 class HTTPSHandler(AbstractHTTPHandler):
1061
1062 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001063 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001064
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001065 https_request = AbstractHTTPHandler.do_request_
1066
1067class HTTPCookieProcessor(BaseHandler):
1068 def __init__(self, cookiejar=None):
1069 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001070 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001071 self.cookiejar = cookiejar
1072
1073 def http_request(self, request):
1074 self.cookiejar.add_cookie_header(request)
1075 return request
1076
1077 def http_response(self, request, response):
1078 self.cookiejar.extract_cookies(response, request)
1079 return response
1080
1081 https_request = http_request
1082 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001083
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001084class UnknownHandler(BaseHandler):
1085 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001086 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001087 raise URLError('unknown url type: %s' % type)
1088
1089def parse_keqv_list(l):
1090 """Parse list of key=value strings where keys are not duplicated."""
1091 parsed = {}
1092 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001093 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001094 if v[0] == '"' and v[-1] == '"':
1095 v = v[1:-1]
1096 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001097 return parsed
1098
1099def parse_http_list(s):
1100 """Parse lists as described by RFC 2068 Section 2.
Tim Peters9e34c042005-08-26 15:20:46 +00001101
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001102 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001103 the list may include quoted-strings. A quoted-string could
Georg Brandle1b13d22005-08-24 22:20:32 +00001104 contain a comma. A non-quoted string could have quotes in the
1105 middle. Neither commas nor quotes count if they are escaped.
1106 Only double-quotes count, not single-quotes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001107 """
Georg Brandle1b13d22005-08-24 22:20:32 +00001108 res = []
1109 part = ''
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001110
Georg Brandle1b13d22005-08-24 22:20:32 +00001111 escape = quote = False
1112 for cur in s:
1113 if escape:
1114 part += cur
1115 escape = False
1116 continue
1117 if quote:
1118 if cur == '\\':
1119 escape = True
Fred Drake13a2c272000-02-10 17:17:14 +00001120 continue
Georg Brandle1b13d22005-08-24 22:20:32 +00001121 elif cur == '"':
1122 quote = False
1123 part += cur
1124 continue
1125
1126 if cur == ',':
1127 res.append(part)
1128 part = ''
1129 continue
1130
1131 if cur == '"':
1132 quote = True
Tim Peters9e34c042005-08-26 15:20:46 +00001133
Georg Brandle1b13d22005-08-24 22:20:32 +00001134 part += cur
1135
1136 # append last part
1137 if part:
1138 res.append(part)
1139
1140 return [part.strip() for part in res]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001141
1142class FileHandler(BaseHandler):
1143 # Use local file or FTP depending on form of URL
1144 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001145 url = req.get_selector()
1146 if url[:2] == '//' and url[2:3] != '/':
1147 req.type = 'ftp'
1148 return self.parent.open(req)
1149 else:
1150 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001151
1152 # names for the localhost
1153 names = None
1154 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001155 if FileHandler.names is None:
Georg Brandl4eb521e2006-04-02 20:37:17 +00001156 try:
1157 FileHandler.names = (socket.gethostbyname('localhost'),
1158 socket.gethostbyname(socket.gethostname()))
1159 except socket.gaierror:
1160 FileHandler.names = (socket.gethostbyname('localhost'),)
Fred Drake13a2c272000-02-10 17:17:14 +00001161 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001162
1163 # not entirely sure what the rules are here
1164 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001165 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001166 host = req.get_host()
1167 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001168 localfile = url2pathname(file)
1169 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001170 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001171 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001172 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001173 headers = mimetools.Message(StringIO(
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001174 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001175 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001176 if host:
1177 host, port = splitport(host)
1178 if not host or \
1179 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001180 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001181 headers, 'file:'+file)
1182 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001183
1184class FTPHandler(BaseHandler):
1185 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001186 host = req.get_host()
1187 if not host:
1188 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001189 host, port = splitport(host)
1190 if port is None:
1191 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001192 else:
1193 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001194
1195 # username/password handling
1196 user, host = splituser(host)
1197 if user:
1198 user, passwd = splitpasswd(user)
1199 else:
1200 passwd = None
1201 host = unquote(host)
1202 user = unquote(user or '')
1203 passwd = unquote(passwd or '')
1204
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001205 try:
1206 host = socket.gethostbyname(host)
1207 except socket.error, msg:
1208 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001209 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001210 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001211 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001212 dirs, file = dirs[:-1], dirs[-1]
1213 if dirs and not dirs[0]:
1214 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001215 try:
1216 fw = self.connect_ftp(user, passwd, host, port, dirs)
1217 type = file and 'I' or 'D'
1218 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001219 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001220 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001221 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001222 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001223 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001224 headers = ""
1225 mtype = mimetypes.guess_type(req.get_full_url())[0]
1226 if mtype:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001227 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001228 if retrlen is not None and retrlen >= 0:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001229 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001230 sf = StringIO(headers)
1231 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001232 return addinfourl(fp, headers, req.get_full_url())
1233 except ftplib.all_errors, msg:
1234 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001235
1236 def connect_ftp(self, user, passwd, host, port, dirs):
1237 fw = ftpwrapper(user, passwd, host, port, dirs)
1238## fw.ftp.set_debuglevel(1)
1239 return fw
1240
1241class CacheFTPHandler(FTPHandler):
1242 # XXX would be nice to have pluggable cache strategies
1243 # XXX this stuff is definitely not thread safe
1244 def __init__(self):
1245 self.cache = {}
1246 self.timeout = {}
1247 self.soonest = 0
1248 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001249 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001250
1251 def setTimeout(self, t):
1252 self.delay = t
1253
1254 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001255 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001256
1257 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001258 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001259 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001260 self.timeout[key] = time.time() + self.delay
1261 else:
1262 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1263 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001264 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001265 return self.cache[key]
1266
1267 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001268 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001269 t = time.time()
1270 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001271 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001272 if v < t:
1273 self.cache[k].close()
1274 del self.cache[k]
1275 del self.timeout[k]
1276 self.soonest = min(self.timeout.values())
1277
1278 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001279 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001280 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001281 if v == self.soonest:
1282 del self.cache[k]
1283 del self.timeout[k]
1284 break
1285 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001286
1287class GopherHandler(BaseHandler):
1288 def gopher_open(self, req):
Georg Brandlc5ffd912006-04-02 20:48:11 +00001289 # XXX can raise socket.error
Tim Peterse2c9a6c2006-02-18 04:14:16 +00001290 import gopherlib # this raises DeprecationWarning in 2.5
Fred Drake13a2c272000-02-10 17:17:14 +00001291 host = req.get_host()
1292 if not host:
1293 raise GopherError('no host given')
1294 host = unquote(host)
1295 selector = req.get_selector()
1296 type, selector = splitgophertype(selector)
1297 selector, query = splitquery(selector)
1298 selector = unquote(selector)
1299 if query:
1300 query = unquote(query)
1301 fp = gopherlib.send_query(selector, query, host)
1302 else:
1303 fp = gopherlib.send_selector(selector, host)
1304 return addinfourl(fp, noheaders(), req.get_full_url())