blob: 35a46be553831d39f0bb7f3901ec47bfabf90bad [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""An extensible library for opening URLs using a variety of protocols
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00002
3The simplest way to use this module is to call the urlopen function,
Tim Peterse1190062001-01-15 03:34:38 +00004which accepts a string containing a URL or a Request object (described
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00005below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
Jeremy Hyltone1906632002-10-11 17:27:55 +00008The OpenerDirector manages a collection of Handler objects that do
Tim Peterse1190062001-01-15 03:34:38 +00009all the actual work. Each Handler implements a particular protocol or
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000010option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
Raymond Hettinger024aaa12003-04-24 15:32:12 +000014HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000016
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
Tim Peterse1190062001-01-15 03:34:38 +000019get a file-like object back. One difference is that you can also pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000020a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
Tim Peterse1190062001-01-15 03:34:38 +000045HTTPError-- also a valid HTTP response, so you can treat an HTTP error
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000046as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
Moshe Zadka8a18e992001-03-01 08:40:42 +000060proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
Tim Peterse1190062001-01-15 03:34:38 +000062# build a new opener that adds authentication and caching FTP handlers
Moshe Zadka8a18e992001-03-01 08:40:42 +000063opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000064
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
Fred Draked5214b02001-11-08 17:19:29 +000075# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000080
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000090import base64
91import ftplib
92import gopherlib
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000093import httplib
Jeremy Hylton8b78b992001-10-09 16:18:45 +000094import inspect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +000095import md5
96import mimetypes
97import mimetools
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +000098import os
99import posixpath
100import random
101import re
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +0000102import rfc822
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000103import sha
104import socket
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000105import sys
106import time
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000107import urlparse
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000108import bisect
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000109
110try:
111 from cStringIO import StringIO
112except ImportError:
113 from StringIO import StringIO
114
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000115# not sure how many of these need to be gotten rid of
116from urllib import unwrap, unquote, splittype, splithost, \
117 addinfourl, splitport, splitgophertype, splitquery, \
118 splitattr, ftpwrapper, noheaders
119
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000120# support for FileHandler, proxies via environment variables
121from urllib import localhost, url2pathname, getproxies
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000122
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000123__version__ = "2.1"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000124
125_opener = None
126def urlopen(url, data=None):
127 global _opener
128 if _opener is None:
129 _opener = build_opener()
130 return _opener.open(url, data)
131
132def install_opener(opener):
133 global _opener
134 _opener = opener
135
136# do these error classes make sense?
Tim Peterse1190062001-01-15 03:34:38 +0000137# make sure all of the IOError stuff is overridden. we just want to be
Fred Drakea87a5212002-08-13 13:59:55 +0000138# subtypes.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000139
140class URLError(IOError):
141 # URLError is a sub-type of IOError, but it doesn't share any of
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000142 # the implementation. need to override __init__ and __str__.
143 # It sets self.args for compatibility with other EnvironmentError
144 # subclasses, but args doesn't have the typical format with errno in
145 # slot 0 and strerror in slot 1. This may be better than nothing.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000146 def __init__(self, reason):
Jeremy Hylton0a4a50d2003-10-06 05:15:13 +0000147 self.args = reason,
Fred Drake13a2c272000-02-10 17:17:14 +0000148 self.reason = reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000149
150 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000151 return '<urlopen error %s>' % self.reason
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000152
153class HTTPError(URLError, addinfourl):
154 """Raised when HTTP error occurs, but also acts like non-error return"""
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000155 __super_init = addinfourl.__init__
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000156
157 def __init__(self, url, code, msg, hdrs, fp):
Fred Drake13a2c272000-02-10 17:17:14 +0000158 self.code = code
159 self.msg = msg
160 self.hdrs = hdrs
161 self.fp = fp
Fred Drake13a2c272000-02-10 17:17:14 +0000162 self.filename = url
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000163 # The addinfourl classes depend on fp being a valid file
164 # object. In some cases, the HTTPError may not have a valid
165 # file object. If this happens, the simplest workaround is to
Tim Petersc411dba2002-07-16 21:35:23 +0000166 # not initialize the base classes.
Jeremy Hylton40bbae32002-06-03 16:53:00 +0000167 if fp is not None:
168 self.__super_init(fp, hdrs, url)
Tim Peterse1190062001-01-15 03:34:38 +0000169
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000170 def __str__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000171 return 'HTTP Error %s: %s' % (self.code, self.msg)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000172
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000173class GopherError(URLError):
174 pass
175
Moshe Zadka8a18e992001-03-01 08:40:42 +0000176
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000177class Request:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000178
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000179 def __init__(self, url, data=None, headers={}):
Fred Drake13a2c272000-02-10 17:17:14 +0000180 # unwrap('<URL:type://host/path>') --> 'type://host/path'
181 self.__original = unwrap(url)
182 self.type = None
183 # self.__r_type is what's left after doing the splittype
184 self.host = None
185 self.port = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000186 self.data = data
Fred Drake13a2c272000-02-10 17:17:14 +0000187 self.headers = {}
Brett Cannonc8b188a2003-05-17 19:51:26 +0000188 for key, value in headers.items():
Brett Cannon86503b12003-05-12 07:29:42 +0000189 self.add_header(key, value)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000190 self.unredirected_hdrs = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000191
192 def __getattr__(self, attr):
Fred Drake13a2c272000-02-10 17:17:14 +0000193 # XXX this is a fallback mechanism to guard against these
Tim Peterse1190062001-01-15 03:34:38 +0000194 # methods getting called in a non-standard order. this may be
Fred Drake13a2c272000-02-10 17:17:14 +0000195 # too complicated and/or unnecessary.
196 # XXX should the __r_XXX attributes be public?
197 if attr[:12] == '_Request__r_':
198 name = attr[12:]
199 if hasattr(Request, 'get_' + name):
200 getattr(self, 'get_' + name)()
201 return getattr(self, attr)
202 raise AttributeError, attr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000203
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000204 def get_method(self):
205 if self.has_data():
206 return "POST"
207 else:
208 return "GET"
209
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000210 def add_data(self, data):
211 self.data = data
212
213 def has_data(self):
214 return self.data is not None
215
216 def get_data(self):
217 return self.data
218
219 def get_full_url(self):
220 return self.__original
221
222 def get_type(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000223 if self.type is None:
224 self.type, self.__r_type = splittype(self.__original)
Jeremy Hylton78cae612001-05-09 15:49:24 +0000225 if self.type is None:
226 raise ValueError, "unknown url type: %s" % self.__original
Fred Drake13a2c272000-02-10 17:17:14 +0000227 return self.type
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000228
229 def get_host(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000230 if self.host is None:
231 self.host, self.__r_host = splithost(self.__r_type)
232 if self.host:
233 self.host = unquote(self.host)
234 return self.host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000235
236 def get_selector(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000237 return self.__r_host
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000238
Moshe Zadka8a18e992001-03-01 08:40:42 +0000239 def set_proxy(self, host, type):
240 self.host, self.type = host, type
Fred Drake13a2c272000-02-10 17:17:14 +0000241 self.__r_host = self.__original
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000242
243 def add_header(self, key, val):
Fred Drake13a2c272000-02-10 17:17:14 +0000244 # useful for something like authentication
Brett Cannon86503b12003-05-12 07:29:42 +0000245 self.headers[key.capitalize()] = val
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000246
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000247 def add_unredirected_header(self, key, val):
248 # will not be added to a redirected request
249 self.unredirected_hdrs[key.capitalize()] = val
250
251 def has_header(self, header_name):
252 return bool(header_name in self.headers or
253 header_name in self.unredirected_hdrs)
254
255
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000256class OpenerDirector:
257 def __init__(self):
258 server_version = "Python-urllib/%s" % __version__
Brett Cannon783eaf42003-06-17 21:52:34 +0000259 self.addheaders = [('User-agent', server_version)]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000260 # manage the individual handlers
261 self.handlers = []
262 self.handle_open = {}
263 self.handle_error = {}
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000264 self.process_response = {}
265 self.process_request = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000266
267 def add_handler(self, handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000268 added = False
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000269 for meth in dir(handler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000270 i = meth.find("_")
271 protocol = meth[:i]
272 condition = meth[i+1:]
273
274 if condition.startswith("error"):
275 j = meth[i+1:].find("_") + i + 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000276 kind = meth[j+1:]
277 try:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000278 kind = int(kind)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000279 except ValueError:
280 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000281 lookup = self.handle_error.get(protocol, {})
282 self.handle_error[protocol] = lookup
283 elif condition == "open":
284 kind = protocol
285 lookup = getattr(self, "handle_"+condition)
286 elif condition in ["response", "request"]:
287 kind = protocol
288 lookup = getattr(self, "process_"+condition)
289 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000290 continue
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000291
292 handlers = lookup.setdefault(kind, [])
293 if handlers:
294 bisect.insort(handlers, handler)
295 else:
296 handlers.append(handler)
297 added = True
298
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000299 if added:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000300 # XXX why does self.handlers need to be sorted?
301 bisect.insort(self.handlers, handler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000302 handler.add_parent(self)
Tim Peterse1190062001-01-15 03:34:38 +0000303
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000304 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000305 # Only exists for backwards compatibility.
306 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000307
308 def _call_chain(self, chain, kind, meth_name, *args):
309 # XXX raise an exception if no one else should try to handle
310 # this url. return None if you can't but someone else could.
311 handlers = chain.get(kind, ())
312 for handler in handlers:
313 func = getattr(handler, meth_name)
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000314
315 result = func(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000316 if result is not None:
317 return result
318
319 def open(self, fullurl, data=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000320 # accept a URL or a Request object
Walter Dörwald65230a22002-06-03 15:58:32 +0000321 if isinstance(fullurl, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000322 req = Request(fullurl, data)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000323 else:
324 req = fullurl
325 if data is not None:
326 req.add_data(data)
Tim Peterse1190062001-01-15 03:34:38 +0000327
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000328 protocol = req.get_type()
329
330 # pre-process request
331 meth_name = protocol+"_request"
332 for processor in self.process_request.get(protocol, []):
333 meth = getattr(processor, meth_name)
334 req = meth(req)
335
336 response = self._open(req, data)
337
338 # post-process response
339 meth_name = protocol+"_response"
340 for processor in self.process_response.get(protocol, []):
341 meth = getattr(processor, meth_name)
342 response = meth(req, response)
343
344 return response
345
346 def _open(self, req, data=None):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000347 result = self._call_chain(self.handle_open, 'default',
Tim Peterse1190062001-01-15 03:34:38 +0000348 'default_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000349 if result:
350 return result
351
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000352 protocol = req.get_type()
353 result = self._call_chain(self.handle_open, protocol, protocol +
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000354 '_open', req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000355 if result:
356 return result
357
358 return self._call_chain(self.handle_open, 'unknown',
359 'unknown_open', req)
360
361 def error(self, proto, *args):
Moshe Zadka8a18e992001-03-01 08:40:42 +0000362 if proto in ['http', 'https']:
Fred Draked5214b02001-11-08 17:19:29 +0000363 # XXX http[s] protocols are special-cased
364 dict = self.handle_error['http'] # https is not different than http
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000365 proto = args[2] # YUCK!
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000366 meth_name = 'http_error_%s' % proto
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000367 http_err = 1
368 orig_args = args
369 else:
370 dict = self.handle_error
371 meth_name = proto + '_error'
372 http_err = 0
373 args = (dict, proto, meth_name) + args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000374 result = self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000375 if result:
376 return result
377
378 if http_err:
379 args = (dict, 'default', 'http_error_default') + orig_args
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000380 return self._call_chain(*args)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000381
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000382# XXX probably also want an abstract factory that knows when it makes
383# sense to skip a superclass in favor of a subclass and when it might
384# make sense to include both
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000385
386def build_opener(*handlers):
387 """Create an opener object from a list of handlers.
388
389 The opener will use several default handlers, including support
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000390 for HTTP and FTP.
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000391
392 If any of the handlers passed as arguments are subclasses of the
393 default handlers, the default handlers will not be used.
394 """
Tim Peterse1190062001-01-15 03:34:38 +0000395
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000396 opener = OpenerDirector()
397 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
398 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000399 FTPHandler, FileHandler, HTTPErrorProcessor]
Moshe Zadka8a18e992001-03-01 08:40:42 +0000400 if hasattr(httplib, 'HTTPS'):
401 default_classes.append(HTTPSHandler)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000402 skip = []
403 for klass in default_classes:
404 for check in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000405 if inspect.isclass(check):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000406 if issubclass(check, klass):
407 skip.append(klass)
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000408 elif isinstance(check, klass):
409 skip.append(klass)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000410 for klass in skip:
411 default_classes.remove(klass)
412
413 for klass in default_classes:
414 opener.add_handler(klass())
415
416 for h in handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +0000417 if inspect.isclass(h):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000418 h = h()
419 opener.add_handler(h)
420 return opener
421
422class BaseHandler:
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000423 handler_order = 500
424
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000425 def add_parent(self, parent):
426 self.parent = parent
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000427
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000428 def close(self):
Jeremy Hyltondce391c2003-12-15 16:08:48 +0000429 # Only exists for backwards compatibility
430 pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000431
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000432 def __lt__(self, other):
433 if not hasattr(other, "handler_order"):
434 # Try to preserve the old behavior of having custom classes
435 # inserted after default ones (works only for custom user
436 # classes which are not aware of handler_order).
437 return True
438 return self.handler_order < other.handler_order
Tim Petersf545baa2003-06-15 23:26:30 +0000439
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000440
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000441class HTTPErrorProcessor(BaseHandler):
442 """Process HTTP error responses."""
443 handler_order = 1000 # after all other processing
444
445 def http_response(self, request, response):
446 code, msg, hdrs = response.code, response.msg, response.info()
447
448 if code != 200:
449 response = self.parent.error(
450 'http', request, response, code, msg, hdrs)
451
452 return response
453
454 https_response = http_response
455
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000456class HTTPDefaultErrorHandler(BaseHandler):
457 def http_error_default(self, req, fp, code, msg, hdrs):
Fred Drake13a2c272000-02-10 17:17:14 +0000458 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000459
460class HTTPRedirectHandler(BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000461 # maximum number of redirections before assuming we're in a loop
462 max_redirections = 10
463
Jeremy Hylton03892952003-05-05 04:09:13 +0000464 def redirect_request(self, req, fp, code, msg, headers, newurl):
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000465 """Return a Request or None in response to a redirect.
466
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000467 This is called by the http_error_30x methods when a
468 redirection response is received. If a redirection should
469 take place, return a new Request to allow http_error_30x to
470 perform the redirect. Otherwise, raise HTTPError if no-one
471 else should try to handle this url. Return None if you can't
472 but another Handler might.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000473 """
Jeremy Hylton828023b2003-05-04 23:44:49 +0000474 m = req.get_method()
475 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
Martin v. Löwis162f0812003-07-12 07:33:32 +0000476 or code in (301, 302, 303) and m == "POST"):
477 # Strictly (according to RFC 2616), 301 or 302 in response
478 # to a POST MUST NOT cause a redirection without confirmation
Jeremy Hylton828023b2003-05-04 23:44:49 +0000479 # from the user (of urllib2, in this case). In practice,
480 # essentially all clients do redirect in this case, so we
481 # do the same.
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000482 return Request(newurl, headers=req.headers)
483 else:
Martin v. Löwise3b67bc2003-06-14 05:51:25 +0000484 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000485
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000486 # Implementation note: To avoid the server sending us into an
487 # infinite loop, the request object needs to track what URLs we
488 # have already seen. Do this by adding a handler-specific
489 # attribute to the Request object.
490 def http_error_302(self, req, fp, code, msg, headers):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000491 if 'location' in headers:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000492 newurl = headers['location']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000493 elif 'uri' in headers:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000494 newurl = headers['uri']
495 else:
496 return
Jeremy Hylton73574ee2000-10-12 18:54:18 +0000497 newurl = urlparse.urljoin(req.get_full_url(), newurl)
498
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000499 # XXX Probably want to forget about the state of the current
500 # request, although that might interact poorly with other
501 # handlers that also use handler-specific request attributes
Jeremy Hylton03892952003-05-05 04:09:13 +0000502 new = self.redirect_request(req, fp, code, msg, headers, newurl)
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000503 if new is None:
504 return
505
506 # loop detection
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000507 # .redirect_dict has a key (url, code) if url was previously
508 # visited as a result of a redirection with that code. The
509 # code is needed in addition to the URL because visiting a URL
510 # twice isn't necessarily a loop: there is more than one way
511 # to redirect (301, 302, 303, 307, refresh).
512 key = (newurl, code)
513 if hasattr(req, 'redirect_dict'):
514 visited = new.redirect_dict = req.redirect_dict
515 if key in visited or len(visited) >= self.max_redirections:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000516 raise HTTPError(req.get_full_url(), code,
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000517 self.inf_msg + msg, headers, fp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000518 else:
519 visited = new.redirect_dict = req.redirect_dict = {}
520 visited[key] = None
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000521
522 # Don't close the fp until we are sure that we won't use it
Tim Petersab9ba272001-08-09 21:40:30 +0000523 # with HTTPError.
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000524 fp.read()
525 fp.close()
526
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000527 return self.parent.open(new)
528
Raymond Hettinger024aaa12003-04-24 15:32:12 +0000529 http_error_301 = http_error_303 = http_error_307 = http_error_302
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000530
Martin v. Löwis162f0812003-07-12 07:33:32 +0000531 inf_msg = "The HTTP server returned a redirect error that would " \
Thomas Wouters7e474022000-07-16 12:04:32 +0000532 "lead to an infinite loop.\n" \
Martin v. Löwis162f0812003-07-12 07:33:32 +0000533 "The last 30x error message was:\n"
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000534
535class ProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000536 # Proxies must be in front
537 handler_order = 100
538
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000539 def __init__(self, proxies=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000540 if proxies is None:
541 proxies = getproxies()
542 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
543 self.proxies = proxies
Brett Cannondf0d87a2003-05-18 02:25:07 +0000544 for type, url in proxies.items():
Tim Peterse1190062001-01-15 03:34:38 +0000545 setattr(self, '%s_open' % type,
Fred Drake13a2c272000-02-10 17:17:14 +0000546 lambda r, proxy=url, type=type, meth=self.proxy_open: \
547 meth(r, proxy, type))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000548
549 def proxy_open(self, req, proxy, type):
Fred Drake13a2c272000-02-10 17:17:14 +0000550 orig_type = req.get_type()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000551 type, r_type = splittype(proxy)
552 host, XXX = splithost(r_type)
553 if '@' in host:
554 user_pass, host = host.split('@', 1)
Jeremy Hylton144dea32002-07-07 16:57:35 +0000555 if ':' in user_pass:
556 user, password = user_pass.split(':', 1)
Tim Petersc411dba2002-07-16 21:35:23 +0000557 user_pass = base64.encodestring('%s:%s' % (unquote(user),
Jeremy Hylton144dea32002-07-07 16:57:35 +0000558 unquote(password)))
Brett Cannon783eaf42003-06-17 21:52:34 +0000559 req.add_header('Proxy-authorization', 'Basic ' + user_pass)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000560 host = unquote(host)
561 req.set_proxy(host, type)
Fred Drake13a2c272000-02-10 17:17:14 +0000562 if orig_type == type:
563 # let other handlers take care of it
564 # XXX this only makes sense if the proxy is before the
565 # other handlers
566 return None
567 else:
568 # need to start over, because the other handlers don't
569 # grok the proxy's URL type
570 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000571
572# feature suggested by Duncan Booth
573# XXX custom is not a good name
574class CustomProxy:
575 # either pass a function to the constructor or override handle
576 def __init__(self, proto, func=None, proxy_addr=None):
Fred Drake13a2c272000-02-10 17:17:14 +0000577 self.proto = proto
578 self.func = func
579 self.addr = proxy_addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000580
581 def handle(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000582 if self.func and self.func(req):
583 return 1
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000584
585 def get_proxy(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000586 return self.addr
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000587
588class CustomProxyHandler(BaseHandler):
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +0000589 # Proxies must be in front
590 handler_order = 100
591
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000592 def __init__(self, *proxies):
Fred Drake13a2c272000-02-10 17:17:14 +0000593 self.proxies = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000594
595 def proxy_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000596 proto = req.get_type()
597 try:
598 proxies = self.proxies[proto]
599 except KeyError:
600 return None
601 for p in proxies:
602 if p.handle(req):
603 req.set_proxy(p.get_proxy())
604 return self.parent.open(req)
605 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000606
607 def do_proxy(self, p, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000608 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000609
610 def add_proxy(self, cpo):
Raymond Hettinger54f02222002-06-01 14:18:47 +0000611 if cpo.proto in self.proxies:
Fred Drake13a2c272000-02-10 17:17:14 +0000612 self.proxies[cpo.proto].append(cpo)
613 else:
614 self.proxies[cpo.proto] = [cpo]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000615
616class HTTPPasswordMgr:
617 def __init__(self):
Fred Drake13a2c272000-02-10 17:17:14 +0000618 self.passwd = {}
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000619
620 def add_password(self, realm, uri, user, passwd):
Fred Drake13a2c272000-02-10 17:17:14 +0000621 # uri could be a single URI or a sequence
Walter Dörwald65230a22002-06-03 15:58:32 +0000622 if isinstance(uri, basestring):
Fred Drake13a2c272000-02-10 17:17:14 +0000623 uri = [uri]
624 uri = tuple(map(self.reduce_uri, uri))
Raymond Hettinger54f02222002-06-01 14:18:47 +0000625 if not realm in self.passwd:
Fred Drake13a2c272000-02-10 17:17:14 +0000626 self.passwd[realm] = {}
627 self.passwd[realm][uri] = (user, passwd)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000628
629 def find_user_password(self, realm, authuri):
Fred Drake13a2c272000-02-10 17:17:14 +0000630 domains = self.passwd.get(realm, {})
631 authuri = self.reduce_uri(authuri)
Brett Cannon86503b12003-05-12 07:29:42 +0000632 for uris, authinfo in domains.iteritems():
Fred Drake13a2c272000-02-10 17:17:14 +0000633 for uri in uris:
634 if self.is_suburi(uri, authuri):
635 return authinfo
636 return None, None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000637
638 def reduce_uri(self, uri):
Fred Drake13a2c272000-02-10 17:17:14 +0000639 """Accept netloc or URI and extract only the netloc and path"""
640 parts = urlparse.urlparse(uri)
641 if parts[1]:
642 return parts[1], parts[2] or '/'
643 else:
644 return parts[2], '/'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000645
646 def is_suburi(self, base, test):
Fred Drake13a2c272000-02-10 17:17:14 +0000647 """Check if test is below base in a URI tree
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000648
Fred Drake13a2c272000-02-10 17:17:14 +0000649 Both args must be URIs in reduced form.
650 """
651 if base == test:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000652 return True
Fred Drake13a2c272000-02-10 17:17:14 +0000653 if base[0] != test[0]:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000654 return False
Moshe Zadka8a18e992001-03-01 08:40:42 +0000655 common = posixpath.commonprefix((base[1], test[1]))
Fred Drake13a2c272000-02-10 17:17:14 +0000656 if len(common) == len(base[1]):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000657 return True
658 return False
Tim Peterse1190062001-01-15 03:34:38 +0000659
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000660
Moshe Zadka8a18e992001-03-01 08:40:42 +0000661class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
662
663 def find_user_password(self, realm, authuri):
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000664 user, password = HTTPPasswordMgr.find_user_password(self, realm,
665 authuri)
Moshe Zadka8a18e992001-03-01 08:40:42 +0000666 if user is not None:
667 return user, password
668 return HTTPPasswordMgr.find_user_password(self, None, authuri)
669
670
671class AbstractBasicAuthHandler:
672
Neal Norwitz853ddd52002-10-09 23:17:04 +0000673 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000674
675 # XXX there can actually be multiple auth-schemes in a
676 # www-authenticate header. should probably be a lot more careful
677 # in parsing them to extract multiple alternatives
678
Moshe Zadka8a18e992001-03-01 08:40:42 +0000679 def __init__(self, password_mgr=None):
680 if password_mgr is None:
681 password_mgr = HTTPPasswordMgr()
682 self.passwd = password_mgr
Fred Drake13a2c272000-02-10 17:17:14 +0000683 self.add_password = self.passwd.add_password
Tim Peterse1190062001-01-15 03:34:38 +0000684
Moshe Zadka8a18e992001-03-01 08:40:42 +0000685 def http_error_auth_reqed(self, authreq, host, req, headers):
686 # XXX could be multiple headers
687 authreq = headers.get(authreq, None)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000688 if authreq:
Moshe Zadka8a18e992001-03-01 08:40:42 +0000689 mo = AbstractBasicAuthHandler.rx.match(authreq)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000690 if mo:
691 scheme, realm = mo.groups()
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000692 if scheme.lower() == 'basic':
Moshe Zadka8a18e992001-03-01 08:40:42 +0000693 return self.retry_http_basic_auth(host, req, realm)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000694
Moshe Zadka8a18e992001-03-01 08:40:42 +0000695 def retry_http_basic_auth(self, host, req, realm):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000696 user,pw = self.passwd.find_user_password(realm, host)
697 if pw:
Fred Drake13a2c272000-02-10 17:17:14 +0000698 raw = "%s:%s" % (user, pw)
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000699 auth = 'Basic %s' % base64.encodestring(raw).strip()
700 if req.headers.get(self.auth_header, None) == auth:
701 return None
702 req.add_header(self.auth_header, auth)
703 return self.parent.open(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000704 else:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000705 return None
706
Moshe Zadka8a18e992001-03-01 08:40:42 +0000707class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000708
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000709 auth_header = 'Authorization'
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000710
Moshe Zadka8a18e992001-03-01 08:40:42 +0000711 def http_error_401(self, req, fp, code, msg, headers):
712 host = urlparse.urlparse(req.get_full_url())[1]
Tim Peters30edd232001-03-16 08:29:48 +0000713 return self.http_error_auth_reqed('www-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000714 host, req, headers)
715
716
717class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
718
Brett Cannon783eaf42003-06-17 21:52:34 +0000719 auth_header = 'Proxy-authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000720
721 def http_error_407(self, req, fp, code, msg, headers):
722 host = req.get_host()
Tim Peters30edd232001-03-16 08:29:48 +0000723 return self.http_error_auth_reqed('proxy-authenticate',
Moshe Zadka8a18e992001-03-01 08:40:42 +0000724 host, req, headers)
725
726
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000727def randombytes(n):
728 """Return n random bytes."""
729 # Use /dev/urandom if it is available. Fall back to random module
730 # if not. It might be worthwhile to extend this function to use
731 # other platform-specific mechanisms for getting random bytes.
732 if os.path.exists("/dev/urandom"):
733 f = open("/dev/urandom")
734 s = f.read(n)
735 f.close()
736 return s
737 else:
738 L = [chr(random.randrange(0, 256)) for i in range(n)]
739 return "".join(L)
740
Moshe Zadka8a18e992001-03-01 08:40:42 +0000741class AbstractDigestAuthHandler:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000742 # Digest authentication is specified in RFC 2617.
743
744 # XXX The client does not inspect the Authentication-Info header
745 # in a successful response.
746
747 # XXX It should be possible to test this implementation against
748 # a mock server that just generates a static set of challenges.
749
750 # XXX qop="auth-int" supports is shaky
Moshe Zadka8a18e992001-03-01 08:40:42 +0000751
752 def __init__(self, passwd=None):
753 if passwd is None:
Jeremy Hylton54e99e82001-08-07 21:12:25 +0000754 passwd = HTTPPasswordMgr()
Moshe Zadka8a18e992001-03-01 08:40:42 +0000755 self.passwd = passwd
Fred Drake13a2c272000-02-10 17:17:14 +0000756 self.add_password = self.passwd.add_password
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000757 self.retried = 0
758 self.nonce_count = 0
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000759
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000760 def reset_retry_count(self):
761 self.retried = 0
762
763 def http_error_auth_reqed(self, auth_header, host, req, headers):
764 authreq = headers.get(auth_header, None)
765 if self.retried > 5:
766 # Don't fail endlessly - if we failed once, we'll probably
767 # fail a second time. Hm. Unless the Password Manager is
768 # prompting for the information. Crap. This isn't great
769 # but it's better than the current 'repeat until recursion
770 # depth exceeded' approach <wink>
771 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
772 headers, None)
773 else:
774 self.retried += 1
Fred Drake13a2c272000-02-10 17:17:14 +0000775 if authreq:
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000776 scheme = authreq.split()[0]
777 if scheme.lower() == 'digest':
Fred Drake13a2c272000-02-10 17:17:14 +0000778 return self.retry_http_digest_auth(req, authreq)
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000779 else:
780 raise ValueError("AbstractDigestAuthHandler doesn't know "
781 "about %s"%(scheme))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000782
783 def retry_http_digest_auth(self, req, auth):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000784 token, challenge = auth.split(' ', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000785 chal = parse_keqv_list(parse_http_list(challenge))
786 auth = self.get_authorization(req, chal)
787 if auth:
Jeremy Hylton52a17be2001-11-09 16:46:51 +0000788 auth_val = 'Digest %s' % auth
789 if req.headers.get(self.auth_header, None) == auth_val:
790 return None
791 req.add_header(self.auth_header, auth_val)
Fred Drake13a2c272000-02-10 17:17:14 +0000792 resp = self.parent.open(req)
Fred Drake13a2c272000-02-10 17:17:14 +0000793 return resp
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000794
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000795 def get_cnonce(self, nonce):
796 # The cnonce-value is an opaque
797 # quoted string value provided by the client and used by both client
798 # and server to avoid chosen plaintext attacks, to provide mutual
799 # authentication, and to provide some message integrity protection.
800 # This isn't a fabulous effort, but it's probably Good Enough.
801 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
802 randombytes(8))).hexdigest()
803 return dig[:16]
804
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000805 def get_authorization(self, req, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000806 try:
807 realm = chal['realm']
808 nonce = chal['nonce']
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000809 qop = chal.get('qop')
Fred Drake13a2c272000-02-10 17:17:14 +0000810 algorithm = chal.get('algorithm', 'MD5')
811 # mod_digest doesn't send an opaque, even though it isn't
812 # supposed to be optional
813 opaque = chal.get('opaque', None)
814 except KeyError:
815 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000816
Fred Drake13a2c272000-02-10 17:17:14 +0000817 H, KD = self.get_algorithm_impls(algorithm)
818 if H is None:
819 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000820
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000821 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
Fred Drake13a2c272000-02-10 17:17:14 +0000822 if user is None:
823 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000824
Fred Drake13a2c272000-02-10 17:17:14 +0000825 # XXX not implemented yet
826 if req.has_data():
827 entdig = self.get_entity_digest(req.get_data(), chal)
828 else:
829 entdig = None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000830
Fred Drake13a2c272000-02-10 17:17:14 +0000831 A1 = "%s:%s:%s" % (user, realm, pw)
832 A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',
833 # XXX selector: what about proxies and full urls
834 req.get_selector())
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000835 if qop == 'auth':
836 self.nonce_count += 1
837 ncvalue = '%08x' % self.nonce_count
838 cnonce = self.get_cnonce(nonce)
839 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
840 respdig = KD(H(A1), noncebit)
841 elif qop is None:
842 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
843 else:
844 # XXX handle auth-int.
845 pass
846
Fred Drake13a2c272000-02-10 17:17:14 +0000847 # XXX should the partial digests be encoded too?
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000848
Fred Drake13a2c272000-02-10 17:17:14 +0000849 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
850 'response="%s"' % (user, realm, nonce, req.get_selector(),
851 respdig)
852 if opaque:
853 base = base + ', opaque="%s"' % opaque
854 if entdig:
855 base = base + ', digest="%s"' % entdig
856 if algorithm != 'MD5':
857 base = base + ', algorithm="%s"' % algorithm
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000858 if qop:
859 base = base + ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
Fred Drake13a2c272000-02-10 17:17:14 +0000860 return base
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000861
862 def get_algorithm_impls(self, algorithm):
Fred Drake13a2c272000-02-10 17:17:14 +0000863 # lambdas assume digest modules are imported at the top level
864 if algorithm == 'MD5':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000865 H = lambda x: md5.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000866 elif algorithm == 'SHA':
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000867 H = lambda x: sha.new(x).hexdigest()
Fred Drake13a2c272000-02-10 17:17:14 +0000868 # XXX MD5-sess
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000869 KD = lambda s, d: H("%s:%s" % (s, d))
Fred Drake13a2c272000-02-10 17:17:14 +0000870 return H, KD
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000871
872 def get_entity_digest(self, data, chal):
Fred Drake13a2c272000-02-10 17:17:14 +0000873 # XXX not implemented yet
874 return None
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000875
Moshe Zadka8a18e992001-03-01 08:40:42 +0000876
877class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
878 """An authentication protocol defined by RFC 2069
879
880 Digest authentication improves on basic authentication because it
881 does not transmit passwords in the clear.
882 """
883
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000884 auth_header = 'Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000885
886 def http_error_401(self, req, fp, code, msg, headers):
887 host = urlparse.urlparse(req.get_full_url())[1]
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000888 retry = self.http_error_auth_reqed('www-authenticate',
889 host, req, headers)
890 self.reset_retry_count()
891 return retry
Moshe Zadka8a18e992001-03-01 08:40:42 +0000892
893
894class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
895
Jeremy Hyltonaefae552003-07-10 13:30:12 +0000896 auth_header = 'Proxy-Authorization'
Moshe Zadka8a18e992001-03-01 08:40:42 +0000897
898 def http_error_407(self, req, fp, code, msg, headers):
899 host = req.get_host()
Jeremy Hyltonfcefd0d2003-10-21 18:07:07 +0000900 retry = self.http_error_auth_reqed('proxy-authenticate',
901 host, req, headers)
902 self.reset_retry_count()
903 return retry
Tim Peterse1190062001-01-15 03:34:38 +0000904
Moshe Zadka8a18e992001-03-01 08:40:42 +0000905class AbstractHTTPHandler(BaseHandler):
906
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000907 def __init__(self, debuglevel=0):
908 self._debuglevel = debuglevel
909
910 def set_http_debuglevel(self, level):
911 self._debuglevel = level
912
913 def do_request(self, request):
914 host = request.get_host()
915 if not host:
916 raise URLError('no host given')
917
918 if request.has_data(): # POST
919 data = request.get_data()
920 if not request.has_header('Content-type'):
921 request.add_unredirected_header(
922 'Content-type',
923 'application/x-www-form-urlencoded')
924 if not request.has_header('Content-length'):
925 request.add_unredirected_header(
926 'Content-length', '%d' % len(data))
927
928 scheme, sel = splittype(request.get_selector())
929 sel_host, sel_path = splithost(sel)
930 if not request.has_header('Host'):
931 request.add_unredirected_header('Host', sel_host or host)
932 for name, value in self.parent.addheaders:
933 name = name.capitalize()
934 if not request.has_header(name):
935 request.add_unredirected_header(name, value)
936
937 return request
938
Jeremy Hylton828023b2003-05-04 23:44:49 +0000939 # XXX Should rewrite do_open() to use the new httplib interface,
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000940 # would be a little simpler.
Jeremy Hylton828023b2003-05-04 23:44:49 +0000941
Moshe Zadka8a18e992001-03-01 08:40:42 +0000942 def do_open(self, http_class, req):
Moshe Zadka76676802001-04-11 07:44:53 +0000943 host = req.get_host()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000944 if not host:
945 raise URLError('no host given')
946
Jeremy Hylton828023b2003-05-04 23:44:49 +0000947 h = http_class(host) # will parse host:port
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000948 h.set_debuglevel(self._debuglevel)
Tim Peterse1190062001-01-15 03:34:38 +0000949
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000950 h.putrequest(req.get_method(), req.get_selector())
Brett Cannondf0d87a2003-05-18 02:25:07 +0000951 for k, v in req.headers.items():
Fred Drake13a2c272000-02-10 17:17:14 +0000952 h.putheader(k, v)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000953 for k, v in req.unredirected_hdrs.items():
954 h.putheader(k, v)
Jeremy Hyltonf6b444e2003-05-05 01:47:13 +0000955 # httplib will attempt to connect() here. be prepared
956 # to convert a socket error to a URLError.
Jeremy Hylton828023b2003-05-04 23:44:49 +0000957 try:
958 h.endheaders()
959 except socket.error, err:
960 raise URLError(err)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000961 if req.has_data():
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000962 h.send(req.get_data())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000963
964 code, msg, hdrs = h.getreply()
965 fp = h.getfile()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000966 response = addinfourl(fp, hdrs, req.get_full_url())
967 # XXXX should these be methods, for uniformity with rest of interface?
968 response.code = code
969 response.msg = msg
970 return response
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000971
Moshe Zadka8a18e992001-03-01 08:40:42 +0000972
973class HTTPHandler(AbstractHTTPHandler):
974
975 def http_open(self, req):
976 return self.do_open(httplib.HTTP, req)
977
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000978 http_request = AbstractHTTPHandler.do_request
Moshe Zadka8a18e992001-03-01 08:40:42 +0000979
980if hasattr(httplib, 'HTTPS'):
981 class HTTPSHandler(AbstractHTTPHandler):
982
983 def https_open(self, req):
984 return self.do_open(httplib.HTTPS, req)
985
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000986 https_request = AbstractHTTPHandler.do_request
Moshe Zadka8a18e992001-03-01 08:40:42 +0000987
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000988class UnknownHandler(BaseHandler):
989 def unknown_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +0000990 type = req.get_type()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +0000991 raise URLError('unknown url type: %s' % type)
992
993def parse_keqv_list(l):
994 """Parse list of key=value strings where keys are not duplicated."""
995 parsed = {}
996 for elt in l:
Eric S. Raymondb08b2d32001-02-09 11:10:16 +0000997 k, v = elt.split('=', 1)
Fred Drake13a2c272000-02-10 17:17:14 +0000998 if v[0] == '"' and v[-1] == '"':
999 v = v[1:-1]
1000 parsed[k] = v
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001001 return parsed
1002
1003def parse_http_list(s):
1004 """Parse lists as described by RFC 2068 Section 2.
1005
1006 In particular, parse comman-separated lists where the elements of
1007 the list may include quoted-strings. A quoted-string could
1008 contain a comma.
1009 """
1010 # XXX this function could probably use more testing
1011
1012 list = []
1013 end = len(s)
1014 i = 0
1015 inquote = 0
1016 start = 0
1017 while i < end:
Fred Drake13a2c272000-02-10 17:17:14 +00001018 cur = s[i:]
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001019 c = cur.find(',')
1020 q = cur.find('"')
Fred Drake13a2c272000-02-10 17:17:14 +00001021 if c == -1:
1022 list.append(s[start:])
1023 break
1024 if q == -1:
1025 if inquote:
1026 raise ValueError, "unbalanced quotes"
1027 else:
1028 list.append(s[start:i+c])
1029 i = i + c + 1
1030 continue
1031 if inquote:
1032 if q < c:
1033 list.append(s[start:i+c])
1034 i = i + c + 1
1035 start = i
1036 inquote = 0
1037 else:
Tim Peterse1190062001-01-15 03:34:38 +00001038 i = i + q
Fred Drake13a2c272000-02-10 17:17:14 +00001039 else:
1040 if c < q:
1041 list.append(s[start:i+c])
1042 i = i + c + 1
1043 start = i
1044 else:
1045 inquote = 1
1046 i = i + q + 1
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001047 return map(lambda x: x.strip(), list)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001048
1049class FileHandler(BaseHandler):
1050 # Use local file or FTP depending on form of URL
1051 def file_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001052 url = req.get_selector()
1053 if url[:2] == '//' and url[2:3] != '/':
1054 req.type = 'ftp'
1055 return self.parent.open(req)
1056 else:
1057 return self.open_local_file(req)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001058
1059 # names for the localhost
1060 names = None
1061 def get_names(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001062 if FileHandler.names is None:
Tim Peterse1190062001-01-15 03:34:38 +00001063 FileHandler.names = (socket.gethostbyname('localhost'),
Fred Drake13a2c272000-02-10 17:17:14 +00001064 socket.gethostbyname(socket.gethostname()))
1065 return FileHandler.names
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001066
1067 # not entirely sure what the rules are here
1068 def open_local_file(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001069 host = req.get_host()
1070 file = req.get_selector()
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001071 localfile = url2pathname(file)
1072 stats = os.stat(localfile)
Martin v. Löwis9d3eba82002-03-18 08:37:19 +00001073 size = stats.st_size
1074 modified = rfc822.formatdate(stats.st_mtime)
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001075 mtype = mimetypes.guess_type(file)[0]
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001076 headers = mimetools.Message(StringIO(
Brett Cannon783eaf42003-06-17 21:52:34 +00001077 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001078 (mtype or 'text/plain', size, modified)))
Fred Drake13a2c272000-02-10 17:17:14 +00001079 if host:
1080 host, port = splitport(host)
1081 if not host or \
1082 (not port and socket.gethostbyname(host) in self.get_names()):
Jeremy Hylton6d8c1aa2001-08-27 20:16:53 +00001083 return addinfourl(open(localfile, 'rb'),
Fred Drake13a2c272000-02-10 17:17:14 +00001084 headers, 'file:'+file)
1085 raise URLError('file not on local host')
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001086
1087class FTPHandler(BaseHandler):
1088 def ftp_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001089 host = req.get_host()
1090 if not host:
1091 raise IOError, ('ftp error', 'no host given')
1092 # XXX handle custom username & password
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001093 try:
1094 host = socket.gethostbyname(host)
1095 except socket.error, msg:
1096 raise URLError(msg)
Fred Drake13a2c272000-02-10 17:17:14 +00001097 host, port = splitport(host)
1098 if port is None:
1099 port = ftplib.FTP_PORT
1100 path, attrs = splitattr(req.get_selector())
1101 path = unquote(path)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001102 dirs = path.split('/')
Fred Drake13a2c272000-02-10 17:17:14 +00001103 dirs, file = dirs[:-1], dirs[-1]
1104 if dirs and not dirs[0]:
1105 dirs = dirs[1:]
1106 user = passwd = '' # XXX
1107 try:
1108 fw = self.connect_ftp(user, passwd, host, port, dirs)
1109 type = file and 'I' or 'D'
1110 for attr in attrs:
1111 attr, value = splitattr(attr)
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001112 if attr.lower() == 'type' and \
Fred Drake13a2c272000-02-10 17:17:14 +00001113 value in ('a', 'A', 'i', 'I', 'd', 'D'):
Eric S. Raymondb08b2d32001-02-09 11:10:16 +00001114 type = value.upper()
Fred Drake13a2c272000-02-10 17:17:14 +00001115 fp, retrlen = fw.retrfile(file, type)
Guido van Rossum833a8d82001-08-24 13:10:13 +00001116 headers = ""
1117 mtype = mimetypes.guess_type(req.get_full_url())[0]
1118 if mtype:
Brett Cannon783eaf42003-06-17 21:52:34 +00001119 headers += "Content-type: %s\n" % mtype
Fred Drake13a2c272000-02-10 17:17:14 +00001120 if retrlen is not None and retrlen >= 0:
Brett Cannon783eaf42003-06-17 21:52:34 +00001121 headers += "Content-length: %d\n" % retrlen
Guido van Rossum833a8d82001-08-24 13:10:13 +00001122 sf = StringIO(headers)
1123 headers = mimetools.Message(sf)
Fred Drake13a2c272000-02-10 17:17:14 +00001124 return addinfourl(fp, headers, req.get_full_url())
1125 except ftplib.all_errors, msg:
1126 raise IOError, ('ftp error', msg), sys.exc_info()[2]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001127
1128 def connect_ftp(self, user, passwd, host, port, dirs):
1129 fw = ftpwrapper(user, passwd, host, port, dirs)
1130## fw.ftp.set_debuglevel(1)
1131 return fw
1132
1133class CacheFTPHandler(FTPHandler):
1134 # XXX would be nice to have pluggable cache strategies
1135 # XXX this stuff is definitely not thread safe
1136 def __init__(self):
1137 self.cache = {}
1138 self.timeout = {}
1139 self.soonest = 0
1140 self.delay = 60
Fred Drake13a2c272000-02-10 17:17:14 +00001141 self.max_conns = 16
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001142
1143 def setTimeout(self, t):
1144 self.delay = t
1145
1146 def setMaxConns(self, m):
Fred Drake13a2c272000-02-10 17:17:14 +00001147 self.max_conns = m
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001148
1149 def connect_ftp(self, user, passwd, host, port, dirs):
1150 key = user, passwd, host, port
Raymond Hettinger54f02222002-06-01 14:18:47 +00001151 if key in self.cache:
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001152 self.timeout[key] = time.time() + self.delay
1153 else:
1154 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1155 self.timeout[key] = time.time() + self.delay
Fred Drake13a2c272000-02-10 17:17:14 +00001156 self.check_cache()
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001157 return self.cache[key]
1158
1159 def check_cache(self):
Fred Drake13a2c272000-02-10 17:17:14 +00001160 # first check for old ones
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001161 t = time.time()
1162 if self.soonest <= t:
Raymond Hettinger4ec4fa22003-05-23 08:51:51 +00001163 for k, v in self.timeout.items():
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001164 if v < t:
1165 self.cache[k].close()
1166 del self.cache[k]
1167 del self.timeout[k]
1168 self.soonest = min(self.timeout.values())
1169
1170 # then check the size
Fred Drake13a2c272000-02-10 17:17:14 +00001171 if len(self.cache) == self.max_conns:
Brett Cannonc8b188a2003-05-17 19:51:26 +00001172 for k, v in self.timeout.items():
Fred Drake13a2c272000-02-10 17:17:14 +00001173 if v == self.soonest:
1174 del self.cache[k]
1175 del self.timeout[k]
1176 break
1177 self.soonest = min(self.timeout.values())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001178
1179class GopherHandler(BaseHandler):
1180 def gopher_open(self, req):
Fred Drake13a2c272000-02-10 17:17:14 +00001181 host = req.get_host()
1182 if not host:
1183 raise GopherError('no host given')
1184 host = unquote(host)
1185 selector = req.get_selector()
1186 type, selector = splitgophertype(selector)
1187 selector, query = splitquery(selector)
1188 selector = unquote(selector)
1189 if query:
1190 query = unquote(query)
1191 fp = gopherlib.send_query(selector, query, host)
1192 else:
1193 fp = gopherlib.send_selector(selector, host)
1194 return addinfourl(fp, noheaders(), req.get_full_url())
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001195
1196#bleck! don't use this yet
1197class OpenerFactory:
1198
1199 default_handlers = [UnknownHandler, HTTPHandler,
Tim Peterse1190062001-01-15 03:34:38 +00001200 HTTPDefaultErrorHandler, HTTPRedirectHandler,
Fred Drake13a2c272000-02-10 17:17:14 +00001201 FTPHandler, FileHandler]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001202 handlers = []
1203 replacement_handlers = []
1204
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001205 def add_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001206 self.handlers = self.handlers + [h]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001207
1208 def replace_handler(self, h):
Fred Drake13a2c272000-02-10 17:17:14 +00001209 pass
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001210
1211 def build_opener(self):
Jeremy Hylton54e99e82001-08-07 21:12:25 +00001212 opener = OpenerDirector()
Gustavo Niemeyer9556fba2003-06-07 17:53:08 +00001213 for ph in self.default_handlers:
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001214 if inspect.isclass(ph):
Fred Drake13a2c272000-02-10 17:17:14 +00001215 ph = ph()
1216 opener.add_handler(ph)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001217
1218if __name__ == "__main__":
Tim Peterse1190062001-01-15 03:34:38 +00001219 # XXX some of the test code depends on machine configurations that
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001220 # are internal to CNRI. Need to set up a public server with the
1221 # right authentication configuration for test purposes.
1222 if socket.gethostname() == 'bitdiddle':
1223 localhost = 'bitdiddle.cnri.reston.va.us'
Jeremy Hylton73574ee2000-10-12 18:54:18 +00001224 elif socket.gethostname() == 'bitdiddle.concentric.net':
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001225 localhost = 'localhost'
1226 else:
1227 localhost = None
1228 urls = [
Fred Drake13a2c272000-02-10 17:17:14 +00001229 # Thanks to Fred for finding these!
1230 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1231 'gopher://gopher.vt.edu:10010/10/33',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001232
Fred Drake13a2c272000-02-10 17:17:14 +00001233 'file:/etc/passwd',
1234 'file://nonsensename/etc/passwd',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001235 'ftp://www.python.org/pub/python/misc/sousa.au',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001236 'ftp://www.python.org/pub/tmp/blat',
Fred Drake13a2c272000-02-10 17:17:14 +00001237 'http://www.espn.com/', # redirect
1238 'http://www.python.org/Spanish/Inquistion/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001239 ('http://www.python.org/cgi-bin/faqw.py',
Fred Drake13a2c272000-02-10 17:17:14 +00001240 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1241 'http://www.python.org/',
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001242 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001243 ]
1244
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001245## if localhost is not None:
1246## urls = urls + [
1247## 'file://%s/etc/passwd' % localhost,
1248## 'http://%s/simple/' % localhost,
1249## 'http://%s/digest/' % localhost,
1250## 'http://%s/not/found.h' % localhost,
1251## ]
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001252
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001253## bauth = HTTPBasicAuthHandler()
1254## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1255## 'password')
1256## dauth = HTTPDigestAuthHandler()
1257## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1258## 'password')
Tim Peterse1190062001-01-15 03:34:38 +00001259
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001260
1261 cfh = CacheFTPHandler()
1262 cfh.setTimeout(1)
1263
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001264## # XXX try out some custom proxy objects too!
1265## def at_cnri(req):
1266## host = req.get_host()
1267## print host
1268## if host[-18:] == '.cnri.reston.va.us':
1269## return 1
1270## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1271## ph = CustomProxyHandler(p)
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001272
Jeremy Hylton8b78b992001-10-09 16:18:45 +00001273## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1274 install_opener(build_opener(cfh, GopherHandler))
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001275
1276 for url in urls:
Walter Dörwald65230a22002-06-03 15:58:32 +00001277 if isinstance(url, tuple):
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001278 url, req = url
1279 else:
1280 req = None
1281 print url
1282 try:
1283 f = urlopen(url, req)
1284 except IOError, err:
Fred Drake13a2c272000-02-10 17:17:14 +00001285 print "IOError:", err
1286 except socket.error, err:
1287 print "socket.error:", err
Jeremy Hylton6d7e47b2000-01-20 18:19:08 +00001288 else:
1289 buf = f.read()
1290 f.close()
1291 print "read %d bytes" % len(buf)
1292 print
1293 time.sleep(0.1)