blob: 5948376f6cf96f405ed9e68b4bb4eb80d42c77ec [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
Georg Brandlc5ffd912006-04-02 20:48:11 +000017urlopen(url, data=None) -- basic usage is the same as original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000018urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080# ftp errors aren't handled cleanly
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000081# check digest against correct (i.e. non-apache) implementation
82
Georg Brandlc5ffd912006-04-02 20:48:11 +000083# Possible extensions:
84# complex proxies XXX not sure what exactly was meant by this
85# abstract factory for opener
86
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000087import base64
88import ftplib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000089import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000090import inspect
Georg Brandlbffb0bc2006-04-30 08:57:35 +000091import hashlib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000092import mimetypes
93import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000094import os
95import posixpath
96import random
97import re
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000098import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000099import sys
100import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000101import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000102import bisect
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000103import cookielib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000104
105try:
106 from cStringIO import StringIO
107except ImportError:
108 from StringIO import StringIO
109
Georg Brandl7fff58c2006-04-02 21:13:13 +0000110from urllib import (unwrap, unquote, splittype, splithost, quote,
Andrew M. Kuchling33ad28b2004-08-31 11:38:12 +0000111 addinfourl, splitport, splitgophertype, splitquery,
112 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000113
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000114# support for FileHandler, proxies via environment variables
115from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000116
Georg Brandl720096a2006-04-02 20:45:34 +0000117# used in User-Agent header sent
118__version__ = sys.version[:3]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000119
120_opener = None
121def urlopen(url, data=None):
122 global _opener
123 if _opener is None:
124 _opener = build_opener()
125 return _opener.open(url, data)
126
127def install_opener(opener):
128 global _opener
129 _opener = opener
130
131# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000132# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000133# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000134
135class URLError(IOError):
136 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000137 # the implementation. need to override __init__ and __str__.
138 # It sets self.args for compatibility with other EnvironmentError
139 # subclasses, but args doesn't have the typical format with errno in
140 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000141 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000142 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000143 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000144
145 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000146 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000147
148class HTTPError(URLError, addinfourl):
149 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000150 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000151
152 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000153 self.code = code
154 self.msg = msg
155 self.hdrs = hdrs
156 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000157 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000158 # The addinfourl classes depend on fp being a valid file
159 # object. In some cases, the HTTPError may not have a valid
160 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000161 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000162 if fp is not None:
163 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000164
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000165 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000166 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000167
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000168class GopherError(URLError):
169 pass
170
Moshe Zadka8a18e992001-03-01 08:40:42 +0000171
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000173
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000174 def __init__(self, url, data=None, headers={},
175 origin_req_host=None, unverifiable=False):
Fred Drake13a2c272000-02-10 17:17:14 +0000176 # unwrap('<URL:type://host/path>') --> 'type://host/path'
177 self.__original = unwrap(url)
178 self.type = None
179 # self.__r_type is what's left after doing the splittype
180 self.host = None
181 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000182 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000183 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000184 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000185 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000186 self.unredirected_hdrs = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000187 if origin_req_host is None:
188 origin_req_host = cookielib.request_host(self)
189 self.origin_req_host = origin_req_host
190 self.unverifiable = unverifiable
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191
192 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000193 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000194 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000195 # too complicated and/or unnecessary.
196 # XXX should the __r_XXX attributes be public?
197 if attr[:12] == '_Request__r_':
198 name = attr[12:]
199 if hasattr(Request, 'get_' + name):
200 getattr(self, 'get_' + name)()
201 return getattr(self, attr)
202 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000203
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000204 def get_method(self):
205 if self.has_data():
206 return "POST"
207 else:
208 return "GET"
209
Jeremy Hylton023518a2003-12-17 18:52:16 +0000210 # XXX these helper methods are lame
211
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000212 def add_data(self, data):
213 self.data = data
214
215 def has_data(self):
216 return self.data is not None
217
218 def get_data(self):
219 return self.data
220
221 def get_full_url(self):
222 return self.__original
223
224 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000225 if self.type is None:
226 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000227 if self.type is None:
228 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000229 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000230
231 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000232 if self.host is None:
233 self.host, self.__r_host = splithost(self.__r_type)
234 if self.host:
235 self.host = unquote(self.host)
236 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000237
238 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000239 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000240
Moshe Zadka8a18e992001-03-01 08:40:42 +0000241 def set_proxy(self, host, type):
242 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000243 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000244
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000245 def get_origin_req_host(self):
246 return self.origin_req_host
247
248 def is_unverifiable(self):
249 return self.unverifiable
250
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000251 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000252 # useful for something like authentication
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000253 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000254
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000255 def add_unredirected_header(self, key, val):
256 # will not be added to a redirected request
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000257 self.unredirected_hdrs[key.capitalize()] = val
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000258
259 def has_header(self, header_name):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000260 return (header_name in self.headers or
261 header_name in self.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000262
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000263 def get_header(self, header_name, default=None):
264 return self.headers.get(
265 header_name,
266 self.unredirected_hdrs.get(header_name, default))
267
268 def header_items(self):
269 hdrs = self.unredirected_hdrs.copy()
270 hdrs.update(self.headers)
271 return hdrs.items()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000272
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000273class OpenerDirector:
274 def __init__(self):
Georg Brandl8d457c72005-06-26 22:01:35 +0000275 client_version = "Python-urllib/%s" % __version__
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000276 self.addheaders = [('User-agent', client_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000277 # manage the individual handlers
278 self.handlers = []
279 self.handle_open = {}
280 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000281 self.process_response = {}
282 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000283
284 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000285 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000286 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000287 i = meth.find("_")
288 protocol = meth[:i]
289 condition = meth[i+1:]
290
291 if condition.startswith("error"):
Neal Norwitz1cdd3632004-06-07 03:49:50 +0000292 j = condition.find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000293 kind = meth[j+1:]
294 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000295 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000296 except ValueError:
297 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000298 lookup = self.handle_error.get(protocol, {})
299 self.handle_error[protocol] = lookup
300 elif condition == "open":
301 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000302 lookup = self.handle_open
303 elif condition == "response":
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000304 kind = protocol
Raymond Hettingerf7bf02d2005-02-05 14:37:06 +0000305 lookup = self.process_response
306 elif condition == "request":
307 kind = protocol
308 lookup = self.process_request
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000309 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000310 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000311
312 handlers = lookup.setdefault(kind, [])
313 if handlers:
314 bisect.insort(handlers, handler)
315 else:
316 handlers.append(handler)
317 added = True
318
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000319 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000320 # XXX why does self.handlers need to be sorted?
321 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000322 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000323
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000324 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000325 # Only exists for backwards compatibility.
326 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000327
328 def _call_chain(self, chain, kind, meth_name, *args):
Georg Brandlc5ffd912006-04-02 20:48:11 +0000329 # Handlers raise an exception if no one else should try to handle
330 # the request, or return None if they can't but another handler
331 # could. Otherwise, they return the response.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000332 handlers = chain.get(kind, ())
333 for handler in handlers:
334 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000335
336 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000337 if result is not None:
338 return result
339
340 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000341 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000342 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000343 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000344 else:
345 req = fullurl
346 if data is not None:
347 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000348
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000349 protocol = req.get_type()
350
351 # pre-process request
352 meth_name = protocol+"_request"
353 for processor in self.process_request.get(protocol, []):
354 meth = getattr(processor, meth_name)
355 req = meth(req)
356
357 response = self._open(req, data)
358
359 # post-process response
360 meth_name = protocol+"_response"
361 for processor in self.process_response.get(protocol, []):
362 meth = getattr(processor, meth_name)
363 response = meth(req, response)
364
365 return response
366
367 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000368 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000369 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000370 if result:
371 return result
372
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000373 protocol = req.get_type()
374 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000375 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000376 if result:
377 return result
378
379 return self._call_chain(self.handle_open, 'unknown',
380 'unknown_open', req)
381
382 def error(self, proto, *args):
Raymond Hettingerdbecd932005-02-06 06:57:08 +0000383 if proto in ('http', 'https'):
Fred Draked5214b02001-11-08 17:19:29 +0000384 # XXX http[s] protocols are special-cased
385 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000386 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000387 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000388 http_err = 1
389 orig_args = args
390 else:
391 dict = self.handle_error
392 meth_name = proto + '_error'
393 http_err = 0
394 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000395 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000396 if result:
397 return result
398
399 if http_err:
400 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000401 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000402
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000403# XXX probably also want an abstract factory that knows when it makes
404# sense to skip a superclass in favor of a subclass and when it might
405# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000406
407def build_opener(*handlers):
408 """Create an opener object from a list of handlers.
409
410 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000411 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000412
413 If any of the handlers passed as arguments are subclasses of the
414 default handlers, the default handlers will not be used.
415 """
Tim Peterse1190062001-01-15 03:34:38 +0000416
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000417 opener = OpenerDirector()
418 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
419 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000420 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000421 if hasattr(httplib, 'HTTPS'):
422 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000423 skip = []
424 for klass in default_classes:
425 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000426 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000427 if issubclass(check, klass):
428 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000429 elif isinstance(check, klass):
430 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000431 for klass in skip:
432 default_classes.remove(klass)
433
434 for klass in default_classes:
435 opener.add_handler(klass())
436
437 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000438 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000439 h = h()
440 opener.add_handler(h)
441 return opener
442
443class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000444 handler_order = 500
445
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000446 def add_parent(self, parent):
447 self.parent = parent
Tim Peters58eb11c2004-01-18 20:29:55 +0000448
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000449 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000450 # Only exists for backwards compatibility
451 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000452
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000453 def __lt__(self, other):
454 if not hasattr(other, "handler_order"):
455 # Try to preserve the old behavior of having custom classes
456 # inserted after default ones (works only for custom user
457 # classes which are not aware of handler_order).
458 return True
459 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000460
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000461
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000462class HTTPErrorProcessor(BaseHandler):
463 """Process HTTP error responses."""
464 handler_order = 1000 # after all other processing
465
466 def http_response(self, request, response):
467 code, msg, hdrs = response.code, response.msg, response.info()
468
Andrew M. Kuchling08c08bb2004-06-29 13:19:19 +0000469 if code not in (200, 206):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000470 response = self.parent.error(
471 'http', request, response, code, msg, hdrs)
472
473 return response
474
475 https_response = http_response
476
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000477class HTTPDefaultErrorHandler(BaseHandler):
478 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000479 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000480
481class HTTPRedirectHandler(BaseHandler):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000482 # maximum number of redirections to any single URL
483 # this is needed because of the state that cookies introduce
484 max_repeats = 4
485 # maximum total number of redirections (regardless of URL) before
486 # assuming we're in a loop
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000487 max_redirections = 10
488
Jeremy Hylton03892952003-05-05 04:09:13 +0000489 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000490 """Return a Request or None in response to a redirect.
491
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000492 This is called by the http_error_30x methods when a
493 redirection response is received. If a redirection should
494 take place, return a new Request to allow http_error_30x to
495 perform the redirect. Otherwise, raise HTTPError if no-one
496 else should try to handle this url. Return None if you can't
497 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000498 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000499 m = req.get_method()
500 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000501 or code in (301, 302, 303) and m == "POST"):
502 # Strictly (according to RFC 2616), 301 or 302 in response
503 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000504 # from the user (of urllib2, in this case). In practice,
505 # essentially all clients do redirect in this case, so we
506 # do the same.
Georg Brandlddb84d72006-03-18 11:35:18 +0000507 # be conciliant with URIs containing a space
508 newurl = newurl.replace(' ', '%20')
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000509 return Request(newurl,
510 headers=req.headers,
511 origin_req_host=req.get_origin_req_host(),
512 unverifiable=True)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000513 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000514 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000515
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000516 # Implementation note: To avoid the server sending us into an
517 # infinite loop, the request object needs to track what URLs we
518 # have already seen. Do this by adding a handler-specific
519 # attribute to the Request object.
520 def http_error_302(self, req, fp, code, msg, headers):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000521 # Some servers (incorrectly) return multiple Location headers
522 # (so probably same goes for URI). Use first header.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000523 if 'location' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000524 newurl = headers.getheaders('location')[0]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000525 elif 'uri' in headers:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000526 newurl = headers.getheaders('uri')[0]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000527 else:
528 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000529 newurl = urlparse.urljoin(req.get_full_url(), newurl)
530
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000531 # XXX Probably want to forget about the state of the current
532 # request, although that might interact poorly with other
533 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000534 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000535 if new is None:
536 return
537
538 # loop detection
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000539 # .redirect_dict has a key url if url was previously visited.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000540 if hasattr(req, 'redirect_dict'):
541 visited = new.redirect_dict = req.redirect_dict
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000542 if (visited.get(newurl, 0) >= self.max_repeats or
543 len(visited) >= self.max_redirections):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000544 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000545 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000546 else:
547 visited = new.redirect_dict = req.redirect_dict = {}
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000548 visited[newurl] = visited.get(newurl, 0) + 1
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000549
550 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000551 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000552 fp.read()
553 fp.close()
554
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000555 return self.parent.open(new)
556
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000557 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000558
Martin v. Löwis162f0812003-07-12 07:33:32 +0000559 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000560 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000561 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000562
Georg Brandl720096a2006-04-02 20:45:34 +0000563
564def _parse_proxy(proxy):
565 """Return (scheme, user, password, host/port) given a URL or an authority.
566
567 If a URL is supplied, it must have an authority (host:port) component.
568 According to RFC 3986, having an authority component means the URL must
569 have two slashes after the scheme:
570
571 >>> _parse_proxy('file:/ftp.example.com/')
572 Traceback (most recent call last):
573 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
574
575 The first three items of the returned tuple may be None.
576
577 Examples of authority parsing:
578
579 >>> _parse_proxy('proxy.example.com')
580 (None, None, None, 'proxy.example.com')
581 >>> _parse_proxy('proxy.example.com:3128')
582 (None, None, None, 'proxy.example.com:3128')
583
584 The authority component may optionally include userinfo (assumed to be
585 username:password):
586
587 >>> _parse_proxy('joe:password@proxy.example.com')
588 (None, 'joe', 'password', 'proxy.example.com')
589 >>> _parse_proxy('joe:password@proxy.example.com:3128')
590 (None, 'joe', 'password', 'proxy.example.com:3128')
591
592 Same examples, but with URLs instead:
593
594 >>> _parse_proxy('http://proxy.example.com/')
595 ('http', None, None, 'proxy.example.com')
596 >>> _parse_proxy('http://proxy.example.com:3128/')
597 ('http', None, None, 'proxy.example.com:3128')
598 >>> _parse_proxy('http://joe:password@proxy.example.com/')
599 ('http', 'joe', 'password', 'proxy.example.com')
600 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
601 ('http', 'joe', 'password', 'proxy.example.com:3128')
602
603 Everything after the authority is ignored:
604
605 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
606 ('ftp', 'joe', 'password', 'proxy.example.com')
607
608 Test for no trailing '/' case:
609
610 >>> _parse_proxy('http://joe:password@proxy.example.com')
611 ('http', 'joe', 'password', 'proxy.example.com')
612
613 """
Georg Brandl720096a2006-04-02 20:45:34 +0000614 scheme, r_scheme = splittype(proxy)
615 if not r_scheme.startswith("/"):
616 # authority
617 scheme = None
618 authority = proxy
619 else:
620 # URL
621 if not r_scheme.startswith("//"):
622 raise ValueError("proxy URL with no authority: %r" % proxy)
623 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
624 # and 3.3.), path is empty or starts with '/'
625 end = r_scheme.find("/", 2)
626 if end == -1:
627 end = None
628 authority = r_scheme[2:end]
629 userinfo, hostport = splituser(authority)
630 if userinfo is not None:
631 user, password = splitpasswd(userinfo)
632 else:
633 user = password = None
634 return scheme, user, password, hostport
635
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000636class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000637 # Proxies must be in front
638 handler_order = 100
639
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000640 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000641 if proxies is None:
642 proxies = getproxies()
643 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
644 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000645 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000646 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000647 lambda r, proxy=url, type=type, meth=self.proxy_open: \
648 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000649
650 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000651 orig_type = req.get_type()
Georg Brandl720096a2006-04-02 20:45:34 +0000652 proxy_type, user, password, hostport = _parse_proxy(proxy)
653 if proxy_type is None:
654 proxy_type = orig_type
Georg Brandl531ceba2006-01-21 07:20:56 +0000655 if user and password:
Georg Brandl720096a2006-04-02 20:45:34 +0000656 user_pass = '%s:%s' % (unquote(user), unquote(password))
657 creds = base64.encodestring(user_pass).strip()
658 req.add_header('Proxy-authorization', 'Basic ' + creds)
659 hostport = unquote(hostport)
660 req.set_proxy(hostport, proxy_type)
661 if orig_type == proxy_type:
Fred Drake13a2c272000-02-10 17:17:14 +0000662 # let other handlers take care of it
Fred Drake13a2c272000-02-10 17:17:14 +0000663 return None
664 else:
665 # need to start over, because the other handlers don't
666 # grok the proxy's URL type
Georg Brandl720096a2006-04-02 20:45:34 +0000667 # e.g. if we have a constructor arg proxies like so:
668 # {'http': 'ftp://proxy.example.com'}, we may end up turning
669 # a request for http://acme.example.com/a into one for
670 # ftp://proxy.example.com/a
Fred Drake13a2c272000-02-10 17:17:14 +0000671 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000672
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000673class HTTPPasswordMgr:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000674
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000675 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000676 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000677
678 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000679 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000680 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000681 uri = [uri]
682 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000683 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000684 self.passwd[realm] = {}
685 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000686
687 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000688 domains = self.passwd.get(realm, {})
689 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000690 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000691 for uri in uris:
692 if self.is_suburi(uri, authuri):
693 return authinfo
694 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000695
696 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000697 """Accept netloc or URI and extract only the netloc and path"""
Georg Brandlfa42bd72006-04-30 07:06:11 +0000698 parts = urlparse.urlsplit(uri)
Fred Drake13a2c272000-02-10 17:17:14 +0000699 if parts[1]:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000700 # URI
Fred Drake13a2c272000-02-10 17:17:14 +0000701 return parts[1], parts[2] or '/'
Georg Brandlfa42bd72006-04-30 07:06:11 +0000702 elif parts[0]:
703 # host:port
704 return uri, '/'
Fred Drake13a2c272000-02-10 17:17:14 +0000705 else:
Georg Brandlfa42bd72006-04-30 07:06:11 +0000706 # host
Fred Drake13a2c272000-02-10 17:17:14 +0000707 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000708
709 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000710 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000711
Fred Drake13a2c272000-02-10 17:17:14 +0000712 Both args must be URIs in reduced form.
713 """
714 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000715 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000716 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000717 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000718 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000719 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000720 return True
721 return False
Tim Peterse1190062001-01-15 03:34:38 +0000722
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000723
Moshe Zadka8a18e992001-03-01 08:40:42 +0000724class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
725
726 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000727 user, password = HTTPPasswordMgr.find_user_password(self, realm,
728 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000729 if user is not None:
730 return user, password
731 return HTTPPasswordMgr.find_user_password(self, None, authuri)
732
733
734class AbstractBasicAuthHandler:
735
Neal Norwitz853ddd52002-10-09 23:17:04 +0000736 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000737
738 # XXX there can actually be multiple auth-schemes in a
739 # www-authenticate header. should probably be a lot more careful
740 # in parsing them to extract multiple alternatives
741
Moshe Zadka8a18e992001-03-01 08:40:42 +0000742 def __init__(self, password_mgr=None):
743 if password_mgr is None:
744 password_mgr = HTTPPasswordMgr()
745 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000746 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000747
Moshe Zadka8a18e992001-03-01 08:40:42 +0000748 def http_error_auth_reqed(self, authreq, host, req, headers):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000749 # host may be an authority (without userinfo) or a URL with an
750 # authority
Moshe Zadka8a18e992001-03-01 08:40:42 +0000751 # XXX could be multiple headers
752 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000753 if authreq:
Martin v. Löwis65a79752004-08-03 12:59:55 +0000754 mo = AbstractBasicAuthHandler.rx.search(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000755 if mo:
756 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000757 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000758 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000759
Moshe Zadka8a18e992001-03-01 08:40:42 +0000760 def retry_http_basic_auth(self, host, req, realm):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000761 user, pw = self.passwd.find_user_password(realm, host)
Martin v. Löwis8b3e8712004-05-06 01:41:26 +0000762 if pw is not None:
Fred Drake13a2c272000-02-10 17:17:14 +0000763 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000764 auth = 'Basic %s' % base64.encodestring(raw).strip()
765 if req.headers.get(self.auth_header, None) == auth:
766 return None
767 req.add_header(self.auth_header, auth)
768 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000769 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000770 return None
771
Georg Brandlfa42bd72006-04-30 07:06:11 +0000772
Moshe Zadka8a18e992001-03-01 08:40:42 +0000773class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000774
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000775 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000776
Moshe Zadka8a18e992001-03-01 08:40:42 +0000777 def http_error_401(self, req, fp, code, msg, headers):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000778 url = req.get_full_url()
Tim Peters30edd232001-03-16 08:29:48 +0000779 return self.http_error_auth_reqed('www-authenticate',
Georg Brandlfa42bd72006-04-30 07:06:11 +0000780 url, req, headers)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000781
782
783class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
784
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000785 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000786
787 def http_error_407(self, req, fp, code, msg, headers):
Georg Brandlfa42bd72006-04-30 07:06:11 +0000788 # http_error_auth_reqed requires that there is no userinfo component in
789 # authority. Assume there isn't one, since urllib2 does not (and
790 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
791 # userinfo.
792 authority = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000793 return self.http_error_auth_reqed('proxy-authenticate',
Georg Brandlfa42bd72006-04-30 07:06:11 +0000794 authority, req, headers)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000795
796
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000797def randombytes(n):
798 """Return n random bytes."""
799 # Use /dev/urandom if it is available. Fall back to random module
800 # if not. It might be worthwhile to extend this function to use
801 # other platform-specific mechanisms for getting random bytes.
802 if os.path.exists("/dev/urandom"):
803 f = open("/dev/urandom")
804 s = f.read(n)
805 f.close()
806 return s
807 else:
808 L = [chr(random.randrange(0, 256)) for i in range(n)]
809 return "".join(L)
810
Moshe Zadka8a18e992001-03-01 08:40:42 +0000811class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000812 # Digest authentication is specified in RFC 2617.
813
814 # XXX The client does not inspect the Authentication-Info header
815 # in a successful response.
816
817 # XXX It should be possible to test this implementation against
818 # a mock server that just generates a static set of challenges.
819
820 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000821
822 def __init__(self, passwd=None):
823 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000824 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000825 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000826 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000827 self.retried = 0
828 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000829
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000830 def reset_retry_count(self):
831 self.retried = 0
832
833 def http_error_auth_reqed(self, auth_header, host, req, headers):
834 authreq = headers.get(auth_header, None)
835 if self.retried > 5:
836 # Don't fail endlessly - if we failed once, we'll probably
837 # fail a second time. Hm. Unless the Password Manager is
838 # prompting for the information. Crap. This isn't great
839 # but it's better than the current 'repeat until recursion
840 # depth exceeded' approach <wink>
Tim Peters58eb11c2004-01-18 20:29:55 +0000841 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000842 headers, None)
843 else:
844 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000845 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000846 scheme = authreq.split()[0]
847 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000848 return self.retry_http_digest_auth(req, authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000849
850 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000851 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000852 chal = parse_keqv_list(parse_http_list(challenge))
853 auth = self.get_authorization(req, chal)
854 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000855 auth_val = 'Digest %s' % auth
856 if req.headers.get(self.auth_header, None) == auth_val:
857 return None
Georg Brandl852bb002006-05-03 05:05:02 +0000858 req.add_unredirected_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000859 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000860 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000861
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000862 def get_cnonce(self, nonce):
863 # The cnonce-value is an opaque
864 # quoted string value provided by the client and used by both client
865 # and server to avoid chosen plaintext attacks, to provide mutual
866 # authentication, and to provide some message integrity protection.
867 # This isn't a fabulous effort, but it's probably Good Enough.
Georg Brandlbffb0bc2006-04-30 08:57:35 +0000868 dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
869 randombytes(8))).hexdigest()
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000870 return dig[:16]
871
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000872 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000873 try:
874 realm = chal['realm']
875 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000876 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000877 algorithm = chal.get('algorithm', 'MD5')
878 # mod_digest doesn't send an opaque, even though it isn't
879 # supposed to be optional
880 opaque = chal.get('opaque', None)
881 except KeyError:
882 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000883
Fred Drake13a2c272000-02-10 17:17:14 +0000884 H, KD = self.get_algorithm_impls(algorithm)
885 if H is None:
886 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000887
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000888 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000889 if user is None:
890 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000891
Fred Drake13a2c272000-02-10 17:17:14 +0000892 # XXX not implemented yet
893 if req.has_data():
894 entdig = self.get_entity_digest(req.get_data(), chal)
895 else:
896 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000897
Fred Drake13a2c272000-02-10 17:17:14 +0000898 A1 = "%s:%s:%s" % (user, realm, pw)
Johannes Gijsberscdd625a2005-01-09 05:51:49 +0000899 A2 = "%s:%s" % (req.get_method(),
Fred Drake13a2c272000-02-10 17:17:14 +0000900 # XXX selector: what about proxies and full urls
901 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000902 if qop == 'auth':
903 self.nonce_count += 1
904 ncvalue = '%08x' % self.nonce_count
905 cnonce = self.get_cnonce(nonce)
906 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
907 respdig = KD(H(A1), noncebit)
908 elif qop is None:
909 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
910 else:
911 # XXX handle auth-int.
912 pass
Tim Peters58eb11c2004-01-18 20:29:55 +0000913
Fred Drake13a2c272000-02-10 17:17:14 +0000914 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000915
Fred Drake13a2c272000-02-10 17:17:14 +0000916 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
917 'response="%s"' % (user, realm, nonce, req.get_selector(),
918 respdig)
919 if opaque:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000920 base += ', opaque="%s"' % opaque
Fred Drake13a2c272000-02-10 17:17:14 +0000921 if entdig:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000922 base += ', digest="%s"' % entdig
923 base += ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000924 if qop:
Jeremy Hyltonb300ae32004-12-22 14:27:19 +0000925 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000926 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000927
928 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000929 # lambdas assume digest modules are imported at the top level
930 if algorithm == 'MD5':
Georg Brandlbffb0bc2006-04-30 08:57:35 +0000931 H = lambda x: hashlib.md5(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000932 elif algorithm == 'SHA':
Georg Brandlbffb0bc2006-04-30 08:57:35 +0000933 H = lambda x: hashlib.sha1(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000934 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000935 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000936 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000937
938 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000939 # XXX not implemented yet
940 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000941
Moshe Zadka8a18e992001-03-01 08:40:42 +0000942
943class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
944 """An authentication protocol defined by RFC 2069
945
946 Digest authentication improves on basic authentication because it
947 does not transmit passwords in the clear.
948 """
949
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000950 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000951
952 def http_error_401(self, req, fp, code, msg, headers):
953 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters58eb11c2004-01-18 20:29:55 +0000954 retry = self.http_error_auth_reqed('www-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000955 host, req, headers)
956 self.reset_retry_count()
957 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000958
959
960class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
961
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000962 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000963
964 def http_error_407(self, req, fp, code, msg, headers):
965 host = req.get_host()
Tim Peters58eb11c2004-01-18 20:29:55 +0000966 retry = self.http_error_auth_reqed('proxy-authenticate',
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000967 host, req, headers)
968 self.reset_retry_count()
969 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000970
Moshe Zadka8a18e992001-03-01 08:40:42 +0000971class AbstractHTTPHandler(BaseHandler):
972
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000973 def __init__(self, debuglevel=0):
974 self._debuglevel = debuglevel
975
976 def set_http_debuglevel(self, level):
977 self._debuglevel = level
978
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000979 def do_request_(self, request):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000980 host = request.get_host()
981 if not host:
982 raise URLError('no host given')
983
984 if request.has_data(): # POST
985 data = request.get_data()
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000986 if not request.has_header('Content-type'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000987 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000988 'Content-type',
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000989 'application/x-www-form-urlencoded')
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000990 if not request.has_header('Content-length'):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000991 request.add_unredirected_header(
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000992 'Content-length', '%d' % len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000993
994 scheme, sel = splittype(request.get_selector())
995 sel_host, sel_path = splithost(sel)
996 if not request.has_header('Host'):
997 request.add_unredirected_header('Host', sel_host or host)
998 for name, value in self.parent.addheaders:
Georg Brandl80bb2bb2006-03-28 19:19:56 +0000999 name = name.capitalize()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001000 if not request.has_header(name):
1001 request.add_unredirected_header(name, value)
1002
1003 return request
1004
Moshe Zadka8a18e992001-03-01 08:40:42 +00001005 def do_open(self, http_class, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001006 """Return an addinfourl object for the request, using http_class.
1007
1008 http_class must implement the HTTPConnection API from httplib.
1009 The addinfourl return value is a file-like object. It also
1010 has methods and attributes including:
1011 - info(): return a mimetools.Message object for the headers
1012 - geturl(): return the original request URL
1013 - code: HTTP status code
1014 """
Moshe Zadka76676802001-04-11 07:44:53 +00001015 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001016 if not host:
1017 raise URLError('no host given')
1018
Jeremy Hylton828023b2003-05-04 23:44:49 +00001019 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001020 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +00001021
Jeremy Hylton023518a2003-12-17 18:52:16 +00001022 headers = dict(req.headers)
1023 headers.update(req.unredirected_hdrs)
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +00001024 # We want to make an HTTP/1.1 request, but the addinfourl
1025 # class isn't prepared to deal with a persistent connection.
1026 # It will try to read all remaining data from the socket,
1027 # which will block while the server waits for the next request.
1028 # So make sure the connection gets closed after the (only)
1029 # request.
1030 headers["Connection"] = "close"
Jeremy Hylton828023b2003-05-04 23:44:49 +00001031 try:
Jeremy Hylton023518a2003-12-17 18:52:16 +00001032 h.request(req.get_method(), req.get_selector(), req.data, headers)
1033 r = h.getresponse()
1034 except socket.error, err: # XXX what error?
Jeremy Hylton828023b2003-05-04 23:44:49 +00001035 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001036
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001037 # Pick apart the HTTPResponse object to get the addinfourl
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001038 # object initialized properly.
1039
1040 # Wrap the HTTPResponse object in socket's file object adapter
1041 # for Windows. That adapter calls recv(), so delegate recv()
1042 # to read(). This weird wrapping allows the returned object to
1043 # have readline() and readlines() methods.
Tim Peters9ca3f852004-08-08 01:05:14 +00001044
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001045 # XXX It might be better to extract the read buffering code
1046 # out of socket._fileobject() and into a base class.
Tim Peters9ca3f852004-08-08 01:05:14 +00001047
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001048 r.recv = r.read
1049 fp = socket._fileobject(r)
Tim Peters9ca3f852004-08-08 01:05:14 +00001050
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001051 resp = addinfourl(fp, r.msg, req.get_full_url())
Andrew M. Kuchlingf9ea7c02004-07-10 15:34:34 +00001052 resp.code = r.status
1053 resp.msg = r.reason
1054 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001055
Moshe Zadka8a18e992001-03-01 08:40:42 +00001056
1057class HTTPHandler(AbstractHTTPHandler):
1058
1059 def http_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001060 return self.do_open(httplib.HTTPConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001061
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001062 http_request = AbstractHTTPHandler.do_request_
Moshe Zadka8a18e992001-03-01 08:40:42 +00001063
1064if hasattr(httplib, 'HTTPS'):
1065 class HTTPSHandler(AbstractHTTPHandler):
1066
1067 def https_open(self, req):
Jeremy Hylton023518a2003-12-17 18:52:16 +00001068 return self.do_open(httplib.HTTPSConnection, req)
Moshe Zadka8a18e992001-03-01 08:40:42 +00001069
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001070 https_request = AbstractHTTPHandler.do_request_
1071
1072class HTTPCookieProcessor(BaseHandler):
1073 def __init__(self, cookiejar=None):
1074 if cookiejar is None:
Neal Norwitz1cdd3632004-06-07 03:49:50 +00001075 cookiejar = cookielib.CookieJar()
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001076 self.cookiejar = cookiejar
1077
1078 def http_request(self, request):
1079 self.cookiejar.add_cookie_header(request)
1080 return request
1081
1082 def http_response(self, request, response):
1083 self.cookiejar.extract_cookies(response, request)
1084 return response
1085
1086 https_request = http_request
1087 https_response = http_response
Moshe Zadka8a18e992001-03-01 08:40:42 +00001088
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001089class UnknownHandler(BaseHandler):
1090 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001091 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001092 raise URLError('unknown url type: %s' % type)
1093
1094def parse_keqv_list(l):
1095 """Parse list of key=value strings where keys are not duplicated."""
1096 parsed = {}
1097 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001098 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +00001099 if v[0] == '"' and v[-1] == '"':
1100 v = v[1:-1]
1101 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001102 return parsed
1103
1104def parse_http_list(s):
1105 """Parse lists as described by RFC 2068 Section 2.
Tim Peters9e34c042005-08-26 15:20:46 +00001106
Andrew M. Kuchling22ab06e2004-04-06 19:43:03 +00001107 In particular, parse comma-separated lists where the elements of
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001108 the list may include quoted-strings. A quoted-string could
Georg Brandle1b13d22005-08-24 22:20:32 +00001109 contain a comma. A non-quoted string could have quotes in the
1110 middle. Neither commas nor quotes count if they are escaped.
1111 Only double-quotes count, not single-quotes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001112 """
Georg Brandle1b13d22005-08-24 22:20:32 +00001113 res = []
1114 part = ''
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001115
Georg Brandle1b13d22005-08-24 22:20:32 +00001116 escape = quote = False
1117 for cur in s:
1118 if escape:
1119 part += cur
1120 escape = False
1121 continue
1122 if quote:
1123 if cur == '\\':
1124 escape = True
Fred Drake13a2c272000-02-10 17:17:14 +00001125 continue
Georg Brandle1b13d22005-08-24 22:20:32 +00001126 elif cur == '"':
1127 quote = False
1128 part += cur
1129 continue
1130
1131 if cur == ',':
1132 res.append(part)
1133 part = ''
1134 continue
1135
1136 if cur == '"':
1137 quote = True
Tim Peters9e34c042005-08-26 15:20:46 +00001138
Georg Brandle1b13d22005-08-24 22:20:32 +00001139 part += cur
1140
1141 # append last part
1142 if part:
1143 res.append(part)
1144
1145 return [part.strip() for part in res]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001146
1147class FileHandler(BaseHandler):
1148 # Use local file or FTP depending on form of URL
1149 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001150 url = req.get_selector()
1151 if url[:2] == '//' and url[2:3] != '/':
1152 req.type = 'ftp'
1153 return self.parent.open(req)
1154 else:
1155 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001156
1157 # names for the localhost
1158 names = None
1159 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001160 if FileHandler.names is None:
Georg Brandl4eb521e2006-04-02 20:37:17 +00001161 try:
1162 FileHandler.names = (socket.gethostbyname('localhost'),
1163 socket.gethostbyname(socket.gethostname()))
1164 except socket.gaierror:
1165 FileHandler.names = (socket.gethostbyname('localhost'),)
Fred Drake13a2c272000-02-10 17:17:14 +00001166 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001167
1168 # not entirely sure what the rules are here
1169 def open_local_file(self, req):
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001170 import email.Utils
Fred Drake13a2c272000-02-10 17:17:14 +00001171 host = req.get_host()
1172 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001173 localfile = url2pathname(file)
1174 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001175 size = stats.st_size
Anthony Baxter3dd9e462004-10-11 13:53:08 +00001176 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001177 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001178 headers = mimetools.Message(StringIO(
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001179 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001180 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001181 if host:
1182 host, port = splitport(host)
1183 if not host or \
1184 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001185 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001186 headers, 'file:'+file)
1187 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001188
1189class FTPHandler(BaseHandler):
1190 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001191 host = req.get_host()
1192 if not host:
1193 raise IOError, ('ftp error', 'no host given')
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001194 host, port = splitport(host)
1195 if port is None:
1196 port = ftplib.FTP_PORT
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001197 else:
1198 port = int(port)
Martin v. Löwisa79449e2004-02-15 21:19:18 +00001199
1200 # username/password handling
1201 user, host = splituser(host)
1202 if user:
1203 user, passwd = splitpasswd(user)
1204 else:
1205 passwd = None
1206 host = unquote(host)
1207 user = unquote(user or '')
1208 passwd = unquote(passwd or '')
1209
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001210 try:
1211 host = socket.gethostbyname(host)
1212 except socket.error, msg:
1213 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001214 path, attrs = splitattr(req.get_selector())
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001215 dirs = path.split('/')
Martin v. Löwis7db04e72004-02-15 20:51:39 +00001216 dirs = map(unquote, dirs)
Fred Drake13a2c272000-02-10 17:17:14 +00001217 dirs, file = dirs[:-1], dirs[-1]
1218 if dirs and not dirs[0]:
1219 dirs = dirs[1:]
Fred Drake13a2c272000-02-10 17:17:14 +00001220 try:
1221 fw = self.connect_ftp(user, passwd, host, port, dirs)
1222 type = file and 'I' or 'D'
1223 for attr in attrs:
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +00001224 attr, value = splitvalue(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001225 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001226 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001227 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001228 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001229 headers = ""
1230 mtype = mimetypes.guess_type(req.get_full_url())[0]
1231 if mtype:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001232 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001233 if retrlen is not None and retrlen >= 0:
Georg Brandl80bb2bb2006-03-28 19:19:56 +00001234 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001235 sf = StringIO(headers)
1236 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001237 return addinfourl(fp, headers, req.get_full_url())
1238 except ftplib.all_errors, msg:
1239 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001240
1241 def connect_ftp(self, user, passwd, host, port, dirs):
1242 fw = ftpwrapper(user, passwd, host, port, dirs)
1243## fw.ftp.set_debuglevel(1)
1244 return fw
1245
1246class CacheFTPHandler(FTPHandler):
1247 # XXX would be nice to have pluggable cache strategies
1248 # XXX this stuff is definitely not thread safe
1249 def __init__(self):
1250 self.cache = {}
1251 self.timeout = {}
1252 self.soonest = 0
1253 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001254 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001255
1256 def setTimeout(self, t):
1257 self.delay = t
1258
1259 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001260 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001261
1262 def connect_ftp(self, user, passwd, host, port, dirs):
Mark Hammondc533c982004-05-10 07:35:33 +00001263 key = user, host, port, '/'.join(dirs)
Raymond Hettinger54f02222002-06-01 14:18:47 +00001264 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001265 self.timeout[key] = time.time() + self.delay
1266 else:
1267 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1268 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001269 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001270 return self.cache[key]
1271
1272 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001273 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001274 t = time.time()
1275 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001276 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001277 if v < t:
1278 self.cache[k].close()
1279 del self.cache[k]
1280 del self.timeout[k]
1281 self.soonest = min(self.timeout.values())
1282
1283 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001284 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001285 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001286 if v == self.soonest:
1287 del self.cache[k]
1288 del self.timeout[k]
1289 break
1290 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001291
1292class GopherHandler(BaseHandler):
1293 def gopher_open(self, req):
Georg Brandlc5ffd912006-04-02 20:48:11 +00001294 # XXX can raise socket.error
Tim Peterse2c9a6c2006-02-18 04:14:16 +00001295 import gopherlib # this raises DeprecationWarning in 2.5
Fred Drake13a2c272000-02-10 17:17:14 +00001296 host = req.get_host()
1297 if not host:
1298 raise GopherError('no host given')
1299 host = unquote(host)
1300 selector = req.get_selector()
1301 type, selector = splitgophertype(selector)
1302 selector, query = splitquery(selector)
1303 selector = unquote(selector)
1304 if query:
1305 query = unquote(query)
1306 fp = gopherlib.send_query(selector, query, host)
1307 else:
1308 fp = gopherlib.send_selector(selector, host)
1309 return addinfourl(fp, noheaders(), req.get_full_url())