blob: 8d38504d2bf491b03d71e2517809950fe8992c33 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
Georg Brandlc5ffd912006-04-02 20:48:11 +000017urlopen(url, data=None) -- basic usage is the same as original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000018urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080# ftp errors aren't handled cleanly
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000081# check digest against correct (i.e. non-apache) implementation
82
Georg Brandlc5ffd912006-04-02 20:48:11 +000083# Possible extensions:
84# complex proxies XXX not sure what exactly was meant by this
85# abstract factory for opener
86
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000087import base64
88import ftplib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000089import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000090import inspect
Georg Brandlbffb0bc2006-04-30 08:57:35 +000091import hashlib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000092import mimetypes
93import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000094import os
95import posixpath
96import random
97import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000098import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000099import sys
100import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000101import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000102import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000103import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104
105try:
106 from cStringIO import StringIO
107except ImportError:
108 from StringIO import StringIO
109
Georg Brandl7fff58c2006-04-02 21:13:13 +0000110from urllib import (unwrap, unquote, splittype, splithost, quote,
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000111 addinfourl, splitport, splitgophertype, splitquery,
112 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000113
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000114# support for FileHandler, proxies via environment variables
115from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000116
Georg Brandl720096a2006-04-02 20:45:34 +0000117# used in User-Agent header sent
118__version__ = sys.version[:3]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000119
120_opener = None
121def urlopen(url, data=None):
122 global _opener
123 if _opener is None:
124 _opener = build_opener()
125 return _opener.open(url, data)
126
127def install_opener(opener):
128 global _opener
129 _opener = opener
130
131# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000132# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000133# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000134
135class URLError(IOError):
136 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000137 # the implementation. need to override __init__ and __str__.
138 # It sets self.args for compatibility with other EnvironmentError
139 # subclasses, but args doesn't have the typical format with errno in
140 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000141 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000142 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000143 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000144
145 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000146 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000147
148class HTTPError(URLError, addinfourl):
149 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000150 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000153 self.code = code
154 self.msg = msg
155 self.hdrs = hdrs
156 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000157 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000158 # The addinfourl classes depend on fp being a valid file
159 # object. In some cases, the HTTPError may not have a valid
160 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000161 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000162 if fp is not None:
163 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000164
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000165 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000166 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000167
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000168class GopherError(URLError):
169 pass
170
Moshe Zadka8a18e992001-03-01 08:40:42 +0000171
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000173
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000174 def __init__(self, url, data=None, headers={},
175 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000176 # unwrap('<URL:type://host/path>') --> 'type://host/path'
177 self.__original = unwrap(url)
178 self.type = None
179 # self.__r_type is what's left after doing the splittype
180 self.host = None
181 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000182 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000183 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000184 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000185 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000186 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000187 if origin_req_host is None:
188 origin_req_host = cookielib.request_host(self)
189 self.origin_req_host = origin_req_host
190 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191
192 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000193 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000194 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000195 # too complicated and/or unnecessary.
196 # XXX should the __r_XXX attributes be public?
197 if attr[:12] == '_Request__r_':
198 name = attr[12:]
199 if hasattr(Request, 'get_' + name):
200 getattr(self, 'get_' + name)()
201 return getattr(self, attr)
202 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000203
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000204 def get_method(self):
205 if self.has_data():
206 return "POST"
207 else:
208 return "GET"
209
Jeremy Hylton023518a2003-12-17 18:52:16 +0000210 # XXX these helper methods are lame
211
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000212 def add_data(self, data):
213 self.data = data
214
215 def has_data(self):
216 return self.data is not None
217
218 def get_data(self):
219 return self.data
220
221 def get_full_url(self):
222 return self.__original
223
224 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000225 if self.type is None:
226 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000227 if self.type is None:
228 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000229 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000230
231 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000232 if self.host is None:
233 self.host, self.__r_host = splithost(self.__r_type)
234 if self.host:
235 self.host = unquote(self.host)
236 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000237
238 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000239 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000240
Moshe Zadka8a18e992001-03-01 08:40:42 +0000241 def set_proxy(self, host, type):
242 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000243 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000244
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000245 def get_origin_req_host(self):
246 return self.origin_req_host
247
248 def is_unverifiable(self):
249 return self.unverifiable
250
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000251 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000252 # useful for something like authentication
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000253 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000254
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000255 def add_unredirected_header(self, key, val):
256 # will not be added to a redirected request
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000257 self.unredirected_hdrs[key.capitalize()] = val
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000258
259 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000260 return (header_name in self.headers or
261 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000262
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000263 def get_header(self, header_name, default=None):
264 return self.headers.get(
265 header_name,
266 self.unredirected_hdrs.get(header_name, default))
267
268 def header_items(self):
269 hdrs = self.unredirected_hdrs.copy()
270 hdrs.update(self.headers)
271 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000272
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000273class OpenerDirector:
274 def __init__(self):
Georg Brandl8d457c72005-06-26 22:01:35 +0000275 client_version = "Python-urllib/%s" % __version__
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000276 self.addheaders = [('User-agent', client_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000277 # manage the individual handlers
278 self.handlers = []
279 self.handle_open = {}
280 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000281 self.process_response = {}
282 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000283
284 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000285 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000286 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000287 i = meth.find("_")
288 protocol = meth[:i]
289 condition = meth[i+1:]
290
291 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000292 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000293 kind = meth[j+1:]
294 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000295 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000296 except ValueError:
297 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000298 lookup = self.handle_error.get(protocol, {})
299 self.handle_error[protocol] = lookup
300 elif condition == "open":
301 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000302 lookup = self.handle_open
303 elif condition == "response":
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000304 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000305 lookup = self.process_response
306 elif condition == "request":
307 kind = protocol
308 lookup = self.process_request
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000309 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000310 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000311
312 handlers = lookup.setdefault(kind, [])
313 if handlers:
314 bisect.insort(handlers, handler)
315 else:
316 handlers.append(handler)
317 added = True
318
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000320 # XXX why does self.handlers need to be sorted?
321 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000322 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000323
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000324 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000325 # Only exists for backwards compatibility.
326 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000327
328 def _call_chain(self, chain, kind, meth_name, *args):
Georg Brandlc5ffd912006-04-02 20:48:11 +0000329 # Handlers raise an exception if no one else should try to handle
330 # the request, or return None if they can't but another handler
331 # could. Otherwise, they return the response.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000332 handlers = chain.get(kind, ())
333 for handler in handlers:
334 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000335
336 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000337 if result is not None:
338 return result
339
340 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000341 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000342 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000343 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000344 else:
345 req = fullurl
346 if data is not None:
347 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000348
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000349 protocol = req.get_type()
350
351 # pre-process request
352 meth_name = protocol+"_request"
353 for processor in self.process_request.get(protocol, []):
354 meth = getattr(processor, meth_name)
355 req = meth(req)
356
357 response = self._open(req, data)
358
359 # post-process response
360 meth_name = protocol+"_response"
361 for processor in self.process_response.get(protocol, []):
362 meth = getattr(processor, meth_name)
363 response = meth(req, response)
364
365 return response
366
367 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000368 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000369 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000370 if result:
371 return result
372
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000373 protocol = req.get_type()
374 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000375 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000376 if result:
377 return result
378
379 return self._call_chain(self.handle_open, 'unknown',
380 'unknown_open', req)
381
382 def error(self, proto, *args):
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000383 if proto in ('http', 'https'):
Fred Draked5214b02001-11-08 17:19:29 +0000384 # XXX http[s] protocols are special-cased
385 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000386 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000387 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000388 http_err = 1
389 orig_args = args
390 else:
391 dict = self.handle_error
392 meth_name = proto + '_error'
393 http_err = 0
394 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000395 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000396 if result:
397 return result
398
399 if http_err:
400 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000401 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000402
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000403# XXX probably also want an abstract factory that knows when it makes
404# sense to skip a superclass in favor of a subclass and when it might
405# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000406
407def build_opener(*handlers):
408 """Create an opener object from a list of handlers.
409
410 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000411 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000412
413 If any of the handlers passed as arguments are subclasses of the
414 default handlers, the default handlers will not be used.
415 """
Tim Peterse1190062001-01-15 03:34:38 +0000416
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000417 opener = OpenerDirector()
418 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
419 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000420 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000421 if hasattr(httplib, 'HTTPS'):
422 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000423 skip = []
424 for klass in default_classes:
425 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000426 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000427 if issubclass(check, klass):
428 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000429 elif isinstance(check, klass):
430 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000431 for klass in skip:
432 default_classes.remove(klass)
433
434 for klass in default_classes:
435 opener.add_handler(klass())
436
437 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000438 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000439 h = h()
440 opener.add_handler(h)
441 return opener
442
443class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000444 handler_order = 500
445
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000446 def add_parent(self, parent):
447 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000448
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000449 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000450 # Only exists for backwards compatibility
451 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000452
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000453 def __lt__(self, other):
454 if not hasattr(other, "handler_order"):
455 # Try to preserve the old behavior of having custom classes
456 # inserted after default ones (works only for custom user
457 # classes which are not aware of handler_order).
458 return True
459 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000460
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000461
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000462class HTTPErrorProcessor(BaseHandler):
463 """Process HTTP error responses."""
464 handler_order = 1000 # after all other processing
465
466 def http_response(self, request, response):
467 code, msg, hdrs = response.code, response.msg, response.info()
468
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000469 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000470 response = self.parent.error(
471 'http', request, response, code, msg, hdrs)
472
473 return response
474
475 https_response = http_response
476
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000477class HTTPDefaultErrorHandler(BaseHandler):
478 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000479 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000480
481class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000482 # maximum number of redirections to any single URL
483 # this is needed because of the state that cookies introduce
484 max_repeats = 4
485 # maximum total number of redirections (regardless of URL) before
486 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000487 max_redirections = 10
488
Jeremy Hylton03892952003-05-05 04:09:13 +0000489 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000490 """Return a Request or None in response to a redirect.
491
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000492 This is called by the http_error_30x methods when a
493 redirection response is received. If a redirection should
494 take place, return a new Request to allow http_error_30x to
495 perform the redirect. Otherwise, raise HTTPError if no-one
496 else should try to handle this url. Return None if you can't
497 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000498 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000499 m = req.get_method()
500 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000501 or code in (301, 302, 303) and m == "POST"):
502 # Strictly (according to RFC 2616), 301 or 302 in response
503 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000504 # from the user (of urllib2, in this case). In practice,
505 # essentially all clients do redirect in this case, so we
506 # do the same.
Georg Brandlddb84d72006-03-18 11:35:18 +0000507 # be conciliant with URIs containing a space
508 newurl = newurl.replace(' ', '%20')
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000509 return Request(newurl,
510 headers=req.headers,
511 origin_req_host=req.get_origin_req_host(),
512 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000513 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000514 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000515
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000516 # Implementation note: To avoid the server sending us into an
517 # infinite loop, the request object needs to track what URLs we
518 # have already seen. Do this by adding a handler-specific
519 # attribute to the Request object.
520 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000521 # Some servers (incorrectly) return multiple Location headers
522 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000523 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000524 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000525 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000526 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000527 else:
528 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000529 newurl = urlparse.urljoin(req.get_full_url(), newurl)
530
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000531 # XXX Probably want to forget about the state of the current
532 # request, although that might interact poorly with other
533 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000534 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000535 if new is None:
536 return
537
538 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000539 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000540 if hasattr(req, 'redirect_dict'):
541 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000542 if (visited.get(newurl, 0) >= self.max_repeats or
543 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000544 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000545 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000546 else:
547 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000548 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000549
550 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000551 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000552 fp.read()
553 fp.close()
554
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000555 return self.parent.open(new)
556
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000557 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000558
Martin v. Löwis162f0812003-07-12 07:33:32 +0000559 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000560 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000561 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000562
Georg Brandl720096a2006-04-02 20:45:34 +0000563
564def _parse_proxy(proxy):
565 """Return (scheme, user, password, host/port) given a URL or an authority.
566
567 If a URL is supplied, it must have an authority (host:port) component.
568 According to RFC 3986, having an authority component means the URL must
569 have two slashes after the scheme:
570
571 >>> _parse_proxy('file:/ftp.example.com/')
572 Traceback (most recent call last):
573 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
574
575 The first three items of the returned tuple may be None.
576
577 Examples of authority parsing:
578
579 >>> _parse_proxy('proxy.example.com')
580 (None, None, None, 'proxy.example.com')
581 >>> _parse_proxy('proxy.example.com:3128')
582 (None, None, None, 'proxy.example.com:3128')
583
584 The authority component may optionally include userinfo (assumed to be
585 username:password):
586
587 >>> _parse_proxy('joe:password@proxy.example.com')
588 (None, 'joe', 'password', 'proxy.example.com')
589 >>> _parse_proxy('joe:password@proxy.example.com:3128')
590 (None, 'joe', 'password', 'proxy.example.com:3128')
591
592 Same examples, but with URLs instead:
593
594 >>> _parse_proxy('http://proxy.example.com/')
595 ('http', None, None, 'proxy.example.com')
596 >>> _parse_proxy('http://proxy.example.com:3128/')
597 ('http', None, None, 'proxy.example.com:3128')
598 >>> _parse_proxy('http://joe:password@proxy.example.com/')
599 ('http', 'joe', 'password', 'proxy.example.com')
600 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
601 ('http', 'joe', 'password', 'proxy.example.com:3128')
602
603 Everything after the authority is ignored:
604
605 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
606 ('ftp', 'joe', 'password', 'proxy.example.com')
607
608 Test for no trailing '/' case:
609
610 >>> _parse_proxy('http://joe:password@proxy.example.com')
611 ('http', 'joe', 'password', 'proxy.example.com')
612
613 """
Georg Brandl720096a2006-04-02 20:45:34 +0000614 scheme, r_scheme = splittype(proxy)
615 if not r_scheme.startswith("/"):
616 # authority
617 scheme = None
618 authority = proxy
619 else:
620 # URL
621 if not r_scheme.startswith("//"):
622 raise ValueError("proxy URL with no authority: %r" % proxy)
623 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
624 # and 3.3.), path is empty or starts with '/'
625 end = r_scheme.find("/", 2)
626 if end == -1:
627 end = None
628 authority = r_scheme[2:end]
629 userinfo, hostport = splituser(authority)
630 if userinfo is not None:
631 user, password = splitpasswd(userinfo)
632 else:
633 user = password = None
634 return scheme, user, password, hostport
635
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000637 # Proxies must be in front
638 handler_order = 100
639
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000640 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000641 if proxies is None:
642 proxies = getproxies()
643 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
644 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000645 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000646 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000647 lambda r, proxy=url, type=type, meth=self.proxy_open: \
648 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000649
650 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000651 orig_type = req.get_type()
Georg Brandl720096a2006-04-02 20:45:34 +0000652 proxy_type, user, password, hostport = _parse_proxy(proxy)
653 if proxy_type is None:
654 proxy_type = orig_type
Georg Brandl531ceba2006-01-21 07:20:56 +0000655 if user and password:
Georg Brandl720096a2006-04-02 20:45:34 +0000656 user_pass = '%s:%s' % (unquote(user), unquote(password))
657 creds = base64.encodestring(user_pass).strip()
658 req.add_header('Proxy-authorization', 'Basic ' + creds)
659 hostport = unquote(hostport)
660 req.set_proxy(hostport, proxy_type)
661 if orig_type == proxy_type:
Fred Drake13a2c272000-02-10 17:17:14 +0000662 # let other handlers take care of it
Fred Drake13a2c272000-02-10 17:17:14 +0000663 return None
664 else:
665 # need to start over, because the other handlers don't
666 # grok the proxy's URL type
Georg Brandl720096a2006-04-02 20:45:34 +0000667 # e.g. if we have a constructor arg proxies like so:
668 # {'http': 'ftp://proxy.example.com'}, we may end up turning
669 # a request for http://acme.example.com/a into one for
670 # ftp://proxy.example.com/a
Fred Drake13a2c272000-02-10 17:17:14 +0000671 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000672
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000673class HTTPPasswordMgr:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000674
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000676 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000677
678 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000679 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000680 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000681 uri = [uri]
682 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000683 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000684 self.passwd[realm] = {}
685 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000686
687 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000688 domains = self.passwd.get(realm, {})
689 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000690 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000691 for uri in uris:
692 if self.is_suburi(uri, authuri):
693 return authinfo
694 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000695
696 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000697 """Accept netloc or URI and extract only the netloc and path"""
Georg Brandlfa42bd72006-04-30 07:06:11 +0000698 parts = urlparse.urlsplit(uri)
Fred Drake13a2c272000-02-10 17:17:14 +0000699 if parts[1]:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000700 # URI
Fred Drake13a2c272000-02-10 17:17:14 +0000701 return parts[1], parts[2] or '/'
Georg Brandlfa42bd72006-04-30 07:06:11 +0000702 elif parts[0]:
703 # host:port
704 return uri, '/'
Fred Drake13a2c272000-02-10 17:17:14 +0000705 else:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000706 # host
Fred Drake13a2c272000-02-10 17:17:14 +0000707 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000708
709 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000710 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000711
Fred Drake13a2c272000-02-10 17:17:14 +0000712 Both args must be URIs in reduced form.
713 """
714 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000715 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000716 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000717 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000718 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000719 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000720 return True
721 return False
Tim Peterse1190062001-01-15 03:34:38 +0000722
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000723
Moshe Zadka8a18e992001-03-01 08:40:42 +0000724class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
725
726 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000727 user, password = HTTPPasswordMgr.find_user_password(self, realm,
728 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000729 if user is not None:
730 return user, password
731 return HTTPPasswordMgr.find_user_password(self, None, authuri)
732
733
734class AbstractBasicAuthHandler:
735
Neal Norwitz853ddd52002-10-09 23:17:04 +0000736 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000737
738 # XXX there can actually be multiple auth-schemes in a
739 # www-authenticate header. should probably be a lot more careful
740 # in parsing them to extract multiple alternatives
741
Moshe Zadka8a18e992001-03-01 08:40:42 +0000742 def __init__(self, password_mgr=None):
743 if password_mgr is None:
744 password_mgr = HTTPPasswordMgr()
745 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000746 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000747
Moshe Zadka8a18e992001-03-01 08:40:42 +0000748 def http_error_auth_reqed(self, authreq, host, req, headers):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000749 # host may be an authority (without userinfo) or a URL with an
750 # authority
Moshe Zadka8a18e992001-03-01 08:40:42 +0000751 # XXX could be multiple headers
752 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000753 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000754 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000755 if mo:
756 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000757 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000758 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000759
Moshe Zadka8a18e992001-03-01 08:40:42 +0000760 def retry_http_basic_auth(self, host, req, realm):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000761 user, pw = self.passwd.find_user_password(realm, host)
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000762 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000763 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000764 auth = 'Basic %s' % base64.encodestring(raw).strip()
765 if req.headers.get(self.auth_header, None) == auth:
766 return None
767 req.add_header(self.auth_header, auth)
768 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000769 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000770 return None
771
Georg Brandlfa42bd72006-04-30 07:06:11 +0000772
Moshe Zadka8a18e992001-03-01 08:40:42 +0000773class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000774
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000775 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000776
Moshe Zadka8a18e992001-03-01 08:40:42 +0000777 def http_error_401(self, req, fp, code, msg, headers):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000778 url = req.get_full_url()
Tim Peters30edd232001-03-16 08:29:48 +0000779 return self.http_error_auth_reqed('www-authenticate',
Georg Brandlfa42bd72006-04-30 07:06:11 +0000780 url, req, headers)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000781
782
783class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
784
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000785 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000786
787 def http_error_407(self, req, fp, code, msg, headers):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000788 # http_error_auth_reqed requires that there is no userinfo component in
789 # authority. Assume there isn't one, since urllib2 does not (and
790 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
791 # userinfo.
792 authority = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000793 return self.http_error_auth_reqed('proxy-authenticate',
Georg Brandlfa42bd72006-04-30 07:06:11 +0000794 authority, req, headers)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000795
796
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000797def randombytes(n):
798 """Return n random bytes."""
799 # Use /dev/urandom if it is available. Fall back to random module
800 # if not. It might be worthwhile to extend this function to use
801 # other platform-specific mechanisms for getting random bytes.
802 if os.path.exists("/dev/urandom"):
803 f = open("/dev/urandom")
804 s = f.read(n)
805 f.close()
806 return s
807 else:
808 L = [chr(random.randrange(0, 256)) for i in range(n)]
809 return "".join(L)
810
Moshe Zadka8a18e992001-03-01 08:40:42 +0000811class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000812 # Digest authentication is specified in RFC 2617.
813
814 # XXX The client does not inspect the Authentication-Info header
815 # in a successful response.
816
817 # XXX It should be possible to test this implementation against
818 # a mock server that just generates a static set of challenges.
819
820 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000821
822 def __init__(self, passwd=None):
823 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000824 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000825 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000826 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000827 self.retried = 0
828 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000829
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000830 def reset_retry_count(self):
831 self.retried = 0
832
833 def http_error_auth_reqed(self, auth_header, host, req, headers):
834 authreq = headers.get(auth_header, None)
835 if self.retried > 5:
836 # Don't fail endlessly - if we failed once, we'll probably
837 # fail a second time. Hm. Unless the Password Manager is
838 # prompting for the information. Crap. This isn't great
839 # but it's better than the current 'repeat until recursion
840 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000841 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000842 headers, None)
843 else:
844 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000845 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000846 scheme = authreq.split()[0]
847 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000848 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000849 else:
850 raise ValueError("AbstractDigestAuthHandler doesn't know "
851 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000852
853 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000854 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000855 chal = parse_keqv_list(parse_http_list(challenge))
856 auth = self.get_authorization(req, chal)
857 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000858 auth_val = 'Digest %s' % auth
859 if req.headers.get(self.auth_header, None) == auth_val:
860 return None
Georg Brandl852bb002006-05-03 05:05:02 +0000861 req.add_unredirected_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000862 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000863 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000864
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000865 def get_cnonce(self, nonce):
866 # The cnonce-value is an opaque
867 # quoted string value provided by the client and used by both client
868 # and server to avoid chosen plaintext attacks, to provide mutual
869 # authentication, and to provide some message integrity protection.
870 # This isn't a fabulous effort, but it's probably Good Enough.
Georg Brandlbffb0bc2006-04-30 08:57:35 +0000871 dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
872 randombytes(8))).hexdigest()
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000873 return dig[:16]
874
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000875 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000876 try:
877 realm = chal['realm']
878 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000879 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000880 algorithm = chal.get('algorithm', 'MD5')
881 # mod_digest doesn't send an opaque, even though it isn't
882 # supposed to be optional
883 opaque = chal.get('opaque', None)
884 except KeyError:
885 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000886
Fred Drake13a2c272000-02-10 17:17:14 +0000887 H, KD = self.get_algorithm_impls(algorithm)
888 if H is None:
889 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000890
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000891 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000892 if user is None:
893 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000894
Fred Drake13a2c272000-02-10 17:17:14 +0000895 # XXX not implemented yet
896 if req.has_data():
897 entdig = self.get_entity_digest(req.get_data(), chal)
898 else:
899 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000900
Fred Drake13a2c272000-02-10 17:17:14 +0000901 A1 = "%s:%s:%s" % (user, realm, pw)
Johannes Gijsberscdd625a2005-01-09 05:51:49 +0000902 A2 = "%s:%s" % (req.get_method(),
Fred Drake13a2c272000-02-10 17:17:14 +0000903 # XXX selector: what about proxies and full urls
904 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000905 if qop == 'auth':
906 self.nonce_count += 1
907 ncvalue = '%08x' % self.nonce_count
908 cnonce = self.get_cnonce(nonce)
909 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
910 respdig = KD(H(A1), noncebit)
911 elif qop is None:
912 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
913 else:
914 # XXX handle auth-int.
915 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000916
Fred Drake13a2c272000-02-10 17:17:14 +0000917 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000918
Fred Drake13a2c272000-02-10 17:17:14 +0000919 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
920 'response="%s"' % (user, realm, nonce, req.get_selector(),
921 respdig)
922 if opaque:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000923 base += ', opaque="%s"' % opaque
Fred Drake13a2c272000-02-10 17:17:14 +0000924 if entdig:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000925 base += ', digest="%s"' % entdig
926 base += ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000927 if qop:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000928 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000929 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000930
931 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000932 # lambdas assume digest modules are imported at the top level
933 if algorithm == 'MD5':
Georg Brandlbffb0bc2006-04-30 08:57:35 +0000934 H = lambda x: hashlib.md5(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000935 elif algorithm == 'SHA':
Georg Brandlbffb0bc2006-04-30 08:57:35 +0000936 H = lambda x: hashlib.sha1(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000937 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000938 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000939 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000940
941 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000942 # XXX not implemented yet
943 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000944
Moshe Zadka8a18e992001-03-01 08:40:42 +0000945
946class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
947 """An authentication protocol defined by RFC 2069
948
949 Digest authentication improves on basic authentication because it
950 does not transmit passwords in the clear.
951 """
952
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000953 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000954
955 def http_error_401(self, req, fp, code, msg, headers):
956 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000957 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000958 host, req, headers)
959 self.reset_retry_count()
960 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000961
962
963class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
964
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000965 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000966
967 def http_error_407(self, req, fp, code, msg, headers):
968 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000969 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000970 host, req, headers)
971 self.reset_retry_count()
972 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000973
Moshe Zadka8a18e992001-03-01 08:40:42 +0000974class AbstractHTTPHandler(BaseHandler):
975
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000976 def __init__(self, debuglevel=0):
977 self._debuglevel = debuglevel
978
979 def set_http_debuglevel(self, level):
980 self._debuglevel = level
981
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000982 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000983 host = request.get_host()
984 if not host:
985 raise URLError('no host given')
986
987 if request.has_data(): # POST
988 data = request.get_data()
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000989 if not request.has_header('Content-type'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000990 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000991 'Content-type',
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000992 'application/x-www-form-urlencoded')
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000993 if not request.has_header('Content-length'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000994 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000995 'Content-length', '%d' % len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000996
997 scheme, sel = splittype(request.get_selector())
998 sel_host, sel_path = splithost(sel)
999 if not request.has_header('Host'):
1000 request.add_unredirected_header('Host', sel_host or host)
1001 for name, value in self.parent.addheaders:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001002 name = name.capitalize()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001003 if not request.has_header(name):
1004 request.add_unredirected_header(name, value)
1005
1006 return request
1007
Moshe Zadka8a18e992001-03-01 08:40:42 +00001008 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001009 """Return an addinfourl object for the request, using http_class.
1010
1011 http_class must implement the HTTPConnection API from httplib.
1012 The addinfourl return value is a file-like object. It also
1013 has methods and attributes including:
1014 - info(): return a mimetools.Message object for the headers
1015 - geturl(): return the original request URL
1016 - code: HTTP status code
1017 """
Moshe Zadka76676802001-04-11 07:44:53 +00001018 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001019 if not host:
1020 raise URLError('no host given')
1021
Jeremy Hylton828023b2003-05-04 23:44:49 +00001022 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001023 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +00001024
Jeremy Hylton023518a2003-12-17 18:52:16 +00001025 headers = dict(req.headers)
1026 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +00001027 # We want to make an HTTP/1.1 request, but the addinfourl
1028 # class isn't prepared to deal with a persistent connection.
1029 # It will try to read all remaining data from the socket,
1030 # which will block while the server waits for the next request.
1031 # So make sure the connection gets closed after the (only)
1032 # request.
1033 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +00001034 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +00001035 h.request(req.get_method(), req.get_selector(), req.data, headers)
1036 r = h.getresponse()
1037 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +00001038 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001039
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001040 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001041 # object initialized properly.
1042
1043 # Wrap the HTTPResponse object in socket's file object adapter
1044 # for Windows. That adapter calls recv(), so delegate recv()
1045 # to read(). This weird wrapping allows the returned object to
1046 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001047
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001048 # XXX It might be better to extract the read buffering code
1049 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001050
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001051 r.recv = r.read
1052 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001053
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001054 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001055 resp.code = r.status
1056 resp.msg = r.reason
1057 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001058
Moshe Zadka8a18e992001-03-01 08:40:42 +00001059
1060class HTTPHandler(AbstractHTTPHandler):
1061
1062 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001063 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001064
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001065 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001066
1067if hasattr(httplib, 'HTTPS'):
1068 class HTTPSHandler(AbstractHTTPHandler):
1069
1070 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001071 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001072
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001073 https_request = AbstractHTTPHandler.do_request_
1074
1075class HTTPCookieProcessor(BaseHandler):
1076 def __init__(self, cookiejar=None):
1077 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001078 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001079 self.cookiejar = cookiejar
1080
1081 def http_request(self, request):
1082 self.cookiejar.add_cookie_header(request)
1083 return request
1084
1085 def http_response(self, request, response):
1086 self.cookiejar.extract_cookies(response, request)
1087 return response
1088
1089 https_request = http_request
1090 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001091
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001092class UnknownHandler(BaseHandler):
1093 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001094 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001095 raise URLError('unknown url type: %s' % type)
1096
1097def parse_keqv_list(l):
1098 """Parse list of key=value strings where keys are not duplicated."""
1099 parsed = {}
1100 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001101 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001102 if v[0] == '"' and v[-1] == '"':
1103 v = v[1:-1]
1104 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001105 return parsed
1106
1107def parse_http_list(s):
1108 """Parse lists as described by RFC 2068 Section 2.
Tim Peters9e34c042005-08-26 15:20:46 +00001109
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001110 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001111 the list may include quoted-strings. A quoted-string could
Georg Brandle1b13d22005-08-24 22:20:32 +00001112 contain a comma. A non-quoted string could have quotes in the
1113 middle. Neither commas nor quotes count if they are escaped.
1114 Only double-quotes count, not single-quotes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001115 """
Georg Brandle1b13d22005-08-24 22:20:32 +00001116 res = []
1117 part = ''
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001118
Georg Brandle1b13d22005-08-24 22:20:32 +00001119 escape = quote = False
1120 for cur in s:
1121 if escape:
1122 part += cur
1123 escape = False
1124 continue
1125 if quote:
1126 if cur == '\\':
1127 escape = True
Fred Drake13a2c272000-02-10 17:17:14 +00001128 continue
Georg Brandle1b13d22005-08-24 22:20:32 +00001129 elif cur == '"':
1130 quote = False
1131 part += cur
1132 continue
1133
1134 if cur == ',':
1135 res.append(part)
1136 part = ''
1137 continue
1138
1139 if cur == '"':
1140 quote = True
Tim Peters9e34c042005-08-26 15:20:46 +00001141
Georg Brandle1b13d22005-08-24 22:20:32 +00001142 part += cur
1143
1144 # append last part
1145 if part:
1146 res.append(part)
1147
1148 return [part.strip() for part in res]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001149
1150class FileHandler(BaseHandler):
1151 # Use local file or FTP depending on form of URL
1152 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001153 url = req.get_selector()
1154 if url[:2] == '//' and url[2:3] != '/':
1155 req.type = 'ftp'
1156 return self.parent.open(req)
1157 else:
1158 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001159
1160 # names for the localhost
1161 names = None
1162 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001163 if FileHandler.names is None:
Georg Brandl4eb521e2006-04-02 20:37:17 +00001164 try:
1165 FileHandler.names = (socket.gethostbyname('localhost'),
1166 socket.gethostbyname(socket.gethostname()))
1167 except socket.gaierror:
1168 FileHandler.names = (socket.gethostbyname('localhost'),)
Fred Drake13a2c272000-02-10 17:17:14 +00001169 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001170
1171 # not entirely sure what the rules are here
1172 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001173 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001174 host = req.get_host()
1175 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001176 localfile = url2pathname(file)
1177 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001178 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001179 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001180 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001181 headers = mimetools.Message(StringIO(
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001182 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001183 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001184 if host:
1185 host, port = splitport(host)
1186 if not host or \
1187 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001188 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001189 headers, 'file:'+file)
1190 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001191
1192class FTPHandler(BaseHandler):
1193 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001194 host = req.get_host()
1195 if not host:
1196 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001197 host, port = splitport(host)
1198 if port is None:
1199 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001200 else:
1201 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001202
1203 # username/password handling
1204 user, host = splituser(host)
1205 if user:
1206 user, passwd = splitpasswd(user)
1207 else:
1208 passwd = None
1209 host = unquote(host)
1210 user = unquote(user or '')
1211 passwd = unquote(passwd or '')
1212
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001213 try:
1214 host = socket.gethostbyname(host)
1215 except socket.error, msg:
1216 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001217 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001218 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001219 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001220 dirs, file = dirs[:-1], dirs[-1]
1221 if dirs and not dirs[0]:
1222 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001223 try:
1224 fw = self.connect_ftp(user, passwd, host, port, dirs)
1225 type = file and 'I' or 'D'
1226 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001227 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001228 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001229 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001230 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001231 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001232 headers = ""
1233 mtype = mimetypes.guess_type(req.get_full_url())[0]
1234 if mtype:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001235 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001236 if retrlen is not None and retrlen >= 0:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001237 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001238 sf = StringIO(headers)
1239 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001240 return addinfourl(fp, headers, req.get_full_url())
1241 except ftplib.all_errors, msg:
1242 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001243
1244 def connect_ftp(self, user, passwd, host, port, dirs):
1245 fw = ftpwrapper(user, passwd, host, port, dirs)
1246## fw.ftp.set_debuglevel(1)
1247 return fw
1248
1249class CacheFTPHandler(FTPHandler):
1250 # XXX would be nice to have pluggable cache strategies
1251 # XXX this stuff is definitely not thread safe
1252 def __init__(self):
1253 self.cache = {}
1254 self.timeout = {}
1255 self.soonest = 0
1256 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001257 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001258
1259 def setTimeout(self, t):
1260 self.delay = t
1261
1262 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001263 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001264
1265 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001266 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001267 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001268 self.timeout[key] = time.time() + self.delay
1269 else:
1270 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1271 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001272 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001273 return self.cache[key]
1274
1275 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001276 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001277 t = time.time()
1278 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001279 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001280 if v < t:
1281 self.cache[k].close()
1282 del self.cache[k]
1283 del self.timeout[k]
1284 self.soonest = min(self.timeout.values())
1285
1286 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001287 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001288 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001289 if v == self.soonest:
1290 del self.cache[k]
1291 del self.timeout[k]
1292 break
1293 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001294
1295class GopherHandler(BaseHandler):
1296 def gopher_open(self, req):
Georg Brandlc5ffd912006-04-02 20:48:11 +00001297 # XXX can raise socket.error
Tim Peterse2c9a6c2006-02-18 04:14:16 +00001298 import gopherlib # this raises DeprecationWarning in 2.5
Fred Drake13a2c272000-02-10 17:17:14 +00001299 host = req.get_host()
1300 if not host:
1301 raise GopherError('no host given')
1302 host = unquote(host)
1303 selector = req.get_selector()
1304 type, selector = splitgophertype(selector)
1305 selector, query = splitquery(selector)
1306 selector = unquote(selector)
1307 if query:
1308 query = unquote(query)
1309 fp = gopherlib.send_query(selector, query, host)
1310 else:
1311 fp = gopherlib.send_selector(selector, host)
1312 return addinfourl(fp, noheaders(), req.get_full_url())