blob: 0ab6367fd51cc45cb782079f0a9b872740192994 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000024 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000030
31 file_url = "file://%s" % fname
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000033
34 buf = f.read()
35 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000036
Georg Brandle1b13d22005-08-24 22:20:32 +000037 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038 tests = [
39 ('a,b,c', ['a', 'b', 'c']),
40 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
41 ('a, b, "c", "d", "e,f", g, h',
42 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
43 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
44 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000045 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000047
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000048
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000049def test_request_headers_dict():
50 """
51 The Request.headers dictionary is not a documented interface. It should
52 stay that way, because the complete set of headers are only accessible
53 through the .get_header(), .has_header(), .header_items() interface.
54 However, .headers pre-dates those methods, and so real code will be using
55 the dictionary.
56
57 The introduction in 2.4 of those methods was a mistake for the same reason:
58 code that previously saw all (urllib2 user)-provided headers in .headers
59 now sees only a subset (and the function interface is ugly and incomplete).
60 A better change would have been to replace .headers dict with a dict
61 subclass (or UserDict.DictMixin instance?) that preserved the .headers
62 interface and also provided access to the "unredirected" headers. It's
63 probably too late to fix that, though.
64
65
66 Check .capitalize() case normalization:
67
68 >>> url = "http://example.com"
69 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
70 'blah'
71 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
72 'blah'
73
74 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
75 but that could be changed in future.
76
77 """
78
79def test_request_headers_methods():
80 """
81 Note the case normalization of header names here, to .capitalize()-case.
82 This should be preserved for backwards-compatibility. (In the HTTP case,
83 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000084 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000085
86 >>> url = "http://example.com"
87 >>> r = Request(url, headers={"Spam-eggs": "blah"})
88 >>> r.has_header("Spam-eggs")
89 True
90 >>> r.header_items()
91 [('Spam-eggs', 'blah')]
92 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000093 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000094 >>> items
95 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
96
97 Note that e.g. r.has_header("spam-EggS") is currently False, and
98 r.get_header("spam-EggS") returns None, but that could be changed in
99 future.
100
101 >>> r.has_header("Not-there")
102 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000103 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104 None
105 >>> r.get_header("Not-there", "default")
106 'default'
107
108 """
109
110
Thomas Wouters477c8d52006-05-27 19:21:47 +0000111def test_password_manager(self):
112 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114 >>> add = mgr.add_password
115 >>> add("Some Realm", "http://example.com/", "joe", "password")
116 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
117 >>> add("c", "http://example.com/foo", "foo", "ni")
118 >>> add("c", "http://example.com/bar", "bar", "nini")
119 >>> add("b", "http://example.com/", "first", "blah")
120 >>> add("b", "http://example.com/", "second", "spam")
121 >>> add("a", "http://example.com", "1", "a")
122 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
123 >>> add("Some Realm", "d.example.com", "4", "d")
124 >>> add("Some Realm", "e.example.com:3128", "5", "e")
125
126 >>> mgr.find_user_password("Some Realm", "example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("c", "http://example.com/foo")
137 ('foo', 'ni')
138 >>> mgr.find_user_password("c", "http://example.com/bar")
139 ('bar', 'nini')
140
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 Actually, this is really undefined ATM
142## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
145## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
147 Use latest add_password() in case of conflict:
148
149 >>> mgr.find_user_password("b", "http://example.com/")
150 ('second', 'spam')
151
152 No special relationship between a.example.com and example.com:
153
154 >>> mgr.find_user_password("a", "http://example.com/")
155 ('1', 'a')
156 >>> mgr.find_user_password("a", "http://a.example.com/")
157 (None, None)
158
159 Ports:
160
161 >>> mgr.find_user_password("Some Realm", "c.example.com")
162 (None, None)
163 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "d.example.com")
168 ('4', 'd')
169 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
170 ('5', 'e')
171
172 """
173 pass
174
175
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000176def test_password_manager_default_port(self):
177 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000178 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179 >>> add = mgr.add_password
180
181 The point to note here is that we can't guess the default port if there's
182 no scheme. This applies to both add_password and find_user_password.
183
184 >>> add("f", "http://g.example.com:80", "10", "j")
185 >>> add("g", "http://h.example.com", "11", "k")
186 >>> add("h", "i.example.com:80", "12", "l")
187 >>> add("i", "j.example.com", "13", "m")
188 >>> mgr.find_user_password("f", "g.example.com:100")
189 (None, None)
190 >>> mgr.find_user_password("f", "g.example.com:80")
191 ('10', 'j')
192 >>> mgr.find_user_password("f", "g.example.com")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:100")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:80")
197 ('10', 'j')
198 >>> mgr.find_user_password("f", "http://g.example.com")
199 ('10', 'j')
200 >>> mgr.find_user_password("g", "h.example.com")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "http://h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("h", "i.example.com")
207 (None, None)
208 >>> mgr.find_user_password("h", "i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("h", "http://i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("i", "j.example.com")
213 ('13', 'm')
214 >>> mgr.find_user_password("i", "j.example.com:80")
215 (None, None)
216 >>> mgr.find_user_password("i", "http://j.example.com")
217 ('13', 'm')
218 >>> mgr.find_user_password("i", "http://j.example.com:80")
219 (None, None)
220
221 """
222
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000223class MockOpener:
224 addheaders = []
225 def open(self, req, data=None):
226 self.req, self.data = req, data
227 def error(self, proto, *args):
228 self.proto, self.args = proto, args
229
230class MockFile:
231 def read(self, count=None): pass
232 def readline(self, count=None): pass
233 def close(self): pass
234
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000235class MockHeaders(dict):
236 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000237 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238
Guido van Rossum34d19282007-08-09 01:03:29 +0000239class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000241 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242 self.code, self.msg, self.headers, self.url = code, msg, headers, url
243 def info(self):
244 return self.headers
245 def geturl(self):
246 return self.url
247
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000248class MockCookieJar:
249 def add_cookie_header(self, request):
250 self.ach_req = request
251 def extract_cookies(self, response, request):
252 self.ec_req, self.ec_r = request, response
253
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000254class FakeMethod:
255 def __init__(self, meth_name, action, handle):
256 self.meth_name = meth_name
257 self.handle = handle
258 self.action = action
259 def __call__(self, *args):
260 return self.handle(self.meth_name, self.action, *args)
261
262class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 # useful for testing handler machinery
264 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000265 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000266 def __init__(self, methods):
267 self._define_methods(methods)
268 def _define_methods(self, methods):
269 for spec in methods:
270 if len(spec) == 2: name, action = spec
271 else: name, action = spec, None
272 meth = FakeMethod(name, action, self.handle)
273 setattr(self.__class__, name, meth)
274 def handle(self, fn_name, action, *args, **kwds):
275 self.parent.calls.append((self, fn_name, args, kwds))
276 if action is None:
277 return None
278 elif action == "return self":
279 return self
280 elif action == "return response":
281 res = MockResponse(200, "OK", {}, "")
282 return res
283 elif action == "return request":
284 return Request("http://blah/")
285 elif action.startswith("error"):
286 code = action[action.rfind(" ")+1:]
287 try:
288 code = int(code)
289 except ValueError:
290 pass
291 res = MockResponse(200, "OK", {}, "")
292 return self.parent.error("http", args[0], res, code, "", {})
293 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000295 assert False
296 def close(self): pass
297 def add_parent(self, parent):
298 self.parent = parent
299 self.parent.calls = []
300 def __lt__(self, other):
301 if not hasattr(other, "handler_order"):
302 # No handler_order, leave in original order. Yuck.
303 return True
304 return self.handler_order < other.handler_order
305
306def add_ordered_mock_handlers(opener, meth_spec):
307 """Create MockHandlers and add them to an OpenerDirector.
308
309 meth_spec: list of lists of tuples and strings defining methods to define
310 on handlers. eg:
311
312 [["http_error", "ftp_open"], ["http_open"]]
313
314 defines methods .http_error() and .ftp_open() on one handler, and
315 .http_open() on another. These methods just record their arguments and
316 return None. Using a tuple instead of a string causes the method to
317 perform some action (see MockHandler.handle()), eg:
318
319 [["http_error"], [("http_open", "return request")]]
320
321 defines .http_error() on one handler (which simply returns None), and
322 .http_open() on another handler, which returns a Request object.
323
324 """
325 handlers = []
326 count = 0
327 for meths in meth_spec:
328 class MockHandlerSubclass(MockHandler): pass
329 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000330 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000331 h.add_parent(opener)
332 count = count + 1
333 handlers.append(h)
334 opener.add_handler(h)
335 return handlers
336
Thomas Wouters477c8d52006-05-27 19:21:47 +0000337def build_test_opener(*handler_instances):
338 opener = OpenerDirector()
339 for h in handler_instances:
340 opener.add_handler(h)
341 return opener
342
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000343class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000344 # useful for testing redirections and auth
345 # sends supplied headers and code as first response
346 # sends 200 OK as second response
347 def __init__(self, code, headers):
348 self.code = code
349 self.headers = headers
350 self.reset()
351 def reset(self):
352 self._count = 0
353 self.requests = []
354 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000355 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000356 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000357 self.requests.append(copy.deepcopy(req))
358 if self._count == 0:
359 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000360 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000361 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000362 return self.parent.error(
363 "http", req, MockFile(), self.code, name, msg)
364 else:
365 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000366 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000367 return MockResponse(200, "OK", msg, "", req.get_full_url())
368
369class MockPasswordManager:
370 def add_password(self, realm, uri, user, password):
371 self.realm = realm
372 self.url = uri
373 self.user = user
374 self.password = password
375 def find_user_password(self, realm, authuri):
376 self.target_realm = realm
377 self.target_url = authuri
378 return self.user, self.password
379
380
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381class OpenerDirectorTests(unittest.TestCase):
382
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000383 def test_add_non_handler(self):
384 class NonHandler(object):
385 pass
386 self.assertRaises(TypeError,
387 OpenerDirector().add_handler, NonHandler())
388
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000389 def test_badly_named_methods(self):
390 # test work-around for three methods that accidentally follow the
391 # naming conventions for handler methods
392 # (*_open() / *_request() / *_response())
393
394 # These used to call the accidentally-named methods, causing a
395 # TypeError in real code; here, returning self from these mock
396 # methods would either cause no exception, or AttributeError.
397
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000399
400 o = OpenerDirector()
401 meth_spec = [
402 [("do_open", "return self"), ("proxy_open", "return self")],
403 [("redirect_request", "return self")],
404 ]
405 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000406 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000407 for scheme in "do", "proxy", "redirect":
408 self.assertRaises(URLError, o.open, scheme+"://example.com/")
409
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000410 def test_handled(self):
411 # handler returning non-None means no more handlers will be called
412 o = OpenerDirector()
413 meth_spec = [
414 ["http_open", "ftp_open", "http_error_302"],
415 ["ftp_open"],
416 [("http_open", "return self")],
417 [("http_open", "return self")],
418 ]
419 handlers = add_ordered_mock_handlers(o, meth_spec)
420
421 req = Request("http://example.com/")
422 r = o.open(req)
423 # Second .http_open() gets called, third doesn't, since second returned
424 # non-None. Handlers without .http_open() never get any methods called
425 # on them.
426 # In fact, second mock handler defining .http_open() returns self
427 # (instead of response), which becomes the OpenerDirector's return
428 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000429 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000430 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
431 for expected, got in zip(calls, o.calls):
432 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000433 self.assertEqual((handler, name), expected)
434 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000435
436 def test_handler_order(self):
437 o = OpenerDirector()
438 handlers = []
439 for meths, handler_order in [
440 ([("http_open", "return self")], 500),
441 (["http_open"], 0),
442 ]:
443 class MockHandlerSubclass(MockHandler): pass
444 h = MockHandlerSubclass(meths)
445 h.handler_order = handler_order
446 handlers.append(h)
447 o.add_handler(h)
448
449 r = o.open("http://example.com/")
450 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000451 self.assertEqual(o.calls[0][0], handlers[1])
452 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000453
454 def test_raise(self):
455 # raising URLError stops processing of request
456 o = OpenerDirector()
457 meth_spec = [
458 [("http_open", "raise")],
459 [("http_open", "return self")],
460 ]
461 handlers = add_ordered_mock_handlers(o, meth_spec)
462
463 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000464 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000465 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000466
467## def test_error(self):
468## # XXX this doesn't actually seem to be used in standard library,
469## # but should really be tested anyway...
470
471 def test_http_error(self):
472 # XXX http_error_default
473 # http errors are a special case
474 o = OpenerDirector()
475 meth_spec = [
476 [("http_open", "error 302")],
477 [("http_error_400", "raise"), "http_open"],
478 [("http_error_302", "return response"), "http_error_303",
479 "http_error"],
480 [("http_error_302")],
481 ]
482 handlers = add_ordered_mock_handlers(o, meth_spec)
483
484 class Unknown:
485 def __eq__(self, other): return True
486
487 req = Request("http://example.com/")
488 r = o.open(req)
489 assert len(o.calls) == 2
490 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000491 (handlers[2], "http_error_302",
492 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000493 for expected, got in zip(calls, o.calls):
494 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000495 self.assertEqual((handler, method_name), got[:2])
496 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000497
498 def test_processors(self):
499 # *_request / *_response methods get called appropriately
500 o = OpenerDirector()
501 meth_spec = [
502 [("http_request", "return request"),
503 ("http_response", "return response")],
504 [("http_request", "return request"),
505 ("http_response", "return response")],
506 ]
507 handlers = add_ordered_mock_handlers(o, meth_spec)
508
509 req = Request("http://example.com/")
510 r = o.open(req)
511 # processor methods are called on *all* handlers that define them,
512 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000513 calls = [
514 (handlers[0], "http_request"), (handlers[1], "http_request"),
515 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000516
517 for i, (handler, name, args, kwds) in enumerate(o.calls):
518 if i < 2:
519 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000520 self.assertEqual((handler, name), calls[i])
521 self.assertEqual(len(args), 1)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000522 self.assert_(isinstance(args[0], Request))
523 else:
524 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000525 self.assertEqual((handler, name), calls[i])
526 self.assertEqual(len(args), 2)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000527 self.assert_(isinstance(args[0], Request))
528 # response from opener.open is None, because there's no
529 # handler that defines http_open to handle it
530 self.assert_(args[1] is None or
531 isinstance(args[1], MockResponse))
532
533
Tim Peters58eb11c2004-01-18 20:29:55 +0000534def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000535 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000536 if os.name == "nt" and urlpath.startswith("///"):
537 urlpath = urlpath[2:]
538 # XXX don't ask me about the mac...
539 return urlpath
540
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000541class HandlerTests(unittest.TestCase):
542
543 def test_ftp(self):
544 class MockFTPWrapper:
545 def __init__(self, data): self.data = data
546 def retrfile(self, filename, filetype):
547 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000548 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000549
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000550 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000551 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000552 def connect_ftp(self, user, passwd, host, port, dirs,
553 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 self.user, self.passwd = user, passwd
555 self.host, self.port = host, port
556 self.dirs = dirs
557 self.ftpwrapper = MockFTPWrapper(self.data)
558 return self.ftpwrapper
559
Georg Brandlf78e02b2008-06-10 17:40:04 +0000560 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000561 data = "rheum rhaponicum"
562 h = NullFTPHandler(data)
563 o = h.parent = MockOpener()
564
565 for url, host, port, type_, dirs, filename, mimetype in [
566 ("ftp://localhost/foo/bar/baz.html",
567 "localhost", ftplib.FTP_PORT, "I",
568 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000569 ("ftp://localhost:80/foo/bar/",
570 "localhost", 80, "D",
571 ["foo", "bar"], "", None),
572 ("ftp://localhost/baz.gif;type=a",
573 "localhost", ftplib.FTP_PORT, "A",
574 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000575 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000576 req = Request(url)
577 req.timeout = None
578 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000579 # ftp authentication not yet implemented by FTPHandler
580 self.assert_(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual(h.host, socket.gethostbyname(host))
582 self.assertEqual(h.port, port)
583 self.assertEqual(h.dirs, dirs)
584 self.assertEqual(h.ftpwrapper.filename, filename)
585 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000586 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000587 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000588 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000589
590 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000591 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000592 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 o = h.parent = MockOpener()
594
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000595 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000596 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000597 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000598 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000599 "file://localhost%s" % urlpath,
600 "file://%s" % urlpath,
601 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000602 ]
603 try:
604 localaddr = socket.gethostbyname(socket.gethostname())
605 except socket.gaierror:
606 localaddr = ''
607 if localaddr:
608 urls.append("file://%s%s" % (localaddr, urlpath))
609
610 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000611 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000612 try:
613 try:
614 f.write(towrite)
615 finally:
616 f.close()
617
618 r = h.file_open(Request(url))
619 try:
620 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000621 headers = r.info()
622 newurl = r.geturl()
623 finally:
624 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000625 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000626 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000627 finally:
628 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000629 self.assertEqual(data, towrite)
630 self.assertEqual(headers["Content-type"], "text/plain")
631 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000632 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000633
634 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000635 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636 "file:///file_does_not_exist.txt",
637 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
638 os.getcwd(), TESTFN),
639 "file://somerandomhost.ontheinternet.com%s/%s" %
640 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 ]:
642 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000643 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000644 try:
645 f.write(towrite)
646 finally:
647 f.close()
648
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000649 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000650 h.file_open, Request(url))
651 finally:
652 os.remove(TESTFN)
653
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000655 o = h.parent = MockOpener()
656 # XXXX why does // mean ftp (and /// mean not ftp!), and where
657 # is file: scheme specified? I think this is really a bug, and
658 # what was intended was to distinguish between URLs like:
659 # file:/blah.txt (a file)
660 # file://localhost/blah.txt (a file)
661 # file:///blah.txt (a file)
662 # file://ftp.example.com/blah.txt (an ftp URL)
663 for url, ftp in [
664 ("file://ftp.example.com//foo.txt", True),
665 ("file://ftp.example.com///foo.txt", False),
666# XXXX bug: fails with OSError, should be URLError
667 ("file://ftp.example.com/foo.txt", False),
668 ]:
669 req = Request(url)
670 try:
671 h.file_open(req)
672 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000673 except (urllib.error.URLError, OSError):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674 self.assert_(not ftp)
675 else:
676 self.assert_(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000677 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000678
679 def test_http(self):
Guido van Rossum700bd922007-08-27 18:10:06 +0000680 class MockHTTPResponse(io.IOBase):
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000681 def __init__(self, fp, msg, status, reason):
682 self.fp = fp
683 self.msg = msg
684 self.status = status
685 self.reason = reason
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000686 def read(self):
687 return ''
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000688 class MockHTTPClass:
689 def __init__(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000690 self.level = 0
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000691 self.req_headers = []
692 self.data = None
693 self.raise_on_endheaders = False
Georg Brandlf78e02b2008-06-10 17:40:04 +0000694 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000695 self.host = host
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000696 self.timeout = timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000697 return self
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000698 def set_debuglevel(self, level):
699 self.level = level
700 def request(self, method, url, body=None, headers={}):
701 self.method = method
702 self.selector = url
703 self.req_headers += headers.items()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000704 self.req_headers.sort()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000705 if body:
706 self.data = body
707 if self.raise_on_endheaders:
708 import socket
709 raise socket.error()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000710 def getresponse(self):
711 return MockHTTPResponse(MockFile(), {}, 200, "OK")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000712
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000713 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000714 o = h.parent = MockOpener()
715
716 url = "http://example.com/"
717 for method, data in [("GET", None), ("POST", "blah")]:
718 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000719 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000720 req.add_unredirected_header("Spam", "eggs")
721 http = MockHTTPClass()
722 r = h.do_open(http, req)
723
724 # result attributes
725 r.read; r.readline # wrapped MockFile methods
726 r.info; r.geturl # addinfourl methods
727 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
728 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000729 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000730 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000731
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000732 self.assertEqual(http.host, "example.com")
733 self.assertEqual(http.level, 0)
734 self.assertEqual(http.method, method)
735 self.assertEqual(http.selector, "/")
736 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000737 [("Connection", "close"),
738 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000739 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000740
741 # check socket.error converted to URLError
742 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000743 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000744
745 # check adding of standard headers
746 o.addheaders = [("Spam", "eggs")]
747 for data in "", None: # POST, GET
748 req = Request("http://example.com/", data)
749 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000750 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000751 if data is None: # GET
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000752 self.assert_("Content-length" not in req.unredirected_hdrs)
753 self.assert_("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000754 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000755 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
756 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000757 "application/x-www-form-urlencoded")
758 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000759 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
760 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000761
762 # don't clobber existing headers
763 req.add_unredirected_header("Content-length", "foo")
764 req.add_unredirected_header("Content-type", "bar")
765 req.add_unredirected_header("Host", "baz")
766 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000767 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000768 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
769 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000770 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
771 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000772
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000773 def test_http_doubleslash(self):
774 # Checks the presence of any unnecessary double slash in url does not
775 # break anything. Previously, a double slash directly after the host
776 # could could cause incorrect parsing.
777 h = urllib.request.AbstractHTTPHandler()
778 o = h.parent = MockOpener()
779
780 data = ""
781 ds_urls = [
782 "http://example.com/foo/bar/baz.html",
783 "http://example.com//foo/bar/baz.html",
784 "http://example.com/foo//bar/baz.html",
785 "http://example.com/foo/bar//baz.html"
786 ]
787
788 for ds_url in ds_urls:
789 ds_req = Request(ds_url, data)
790
791 # Check whether host is determined correctly if there is no proxy
792 np_ds_req = h.do_request_(ds_req)
793 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
794
795 # Check whether host is determined correctly if there is a proxy
796 ds_req.set_proxy("someproxy:3128",None)
797 p_ds_req = h.do_request_(ds_req)
798 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
799
800
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000801 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000803 o = h.parent = MockOpener()
804
805 url = "http://example.com/"
806 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808 r = MockResponse(200, "OK", {}, "", url)
809 newr = h.http_response(req, r)
810 self.assert_(r is newr)
811 self.assert_(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 r = MockResponse(202, "Accepted", {}, "", url)
813 newr = h.http_response(req, r)
814 self.assert_(r is newr)
815 self.assert_(not hasattr(o, "proto")) # o.error not called
816 r = MockResponse(206, "Partial content", {}, "", url)
817 newr = h.http_response(req, r)
818 self.assert_(r is newr)
819 self.assert_(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000820 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 r = MockResponse(502, "Bad gateway", {}, "", url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000822 self.assert_(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000823 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000824 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000825
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000826 def test_cookies(self):
827 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000828 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000829 o = h.parent = MockOpener()
830
831 req = Request("http://example.com/")
832 r = MockResponse(200, "OK", {}, "")
833 newreq = h.http_request(req)
834 self.assert_(cj.ach_req is req is newreq)
835 self.assertEquals(req.get_origin_req_host(), "example.com")
836 self.assert_(not req.is_unverifiable())
837 newr = h.http_response(req, r)
838 self.assert_(cj.ec_req is req)
839 self.assert_(cj.ec_r is r is newr)
840
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000841 def test_redirect(self):
842 from_url = "http://example.com/a.html"
843 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000845 o = h.parent = MockOpener()
846
847 # ordinary redirect behaviour
848 for code in 301, 302, 303, 307:
849 for data in None, "blah\nblah\n":
850 method = getattr(h, "http_error_%s" % code)
851 req = Request(from_url, data)
852 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000853 if data is not None:
854 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000855 req.add_unredirected_header("Spam", "spam")
856 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000857 method(req, MockFile(), code, "Blah",
858 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000859 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000860 # 307 in response to POST requires user OK
861 self.assert_(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000862 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000863 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000864 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000865 except AttributeError:
866 self.assert_(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000867
868 # now it's a GET, there should not be headers regarding content
869 # (possibly dragged from before being a POST)
870 headers = [x.lower() for x in o.req.headers]
871 self.assertTrue("content-length" not in headers)
872 self.assertTrue("content-type" not in headers)
873
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000874 self.assertEqual(o.req.headers["Nonsense"],
875 "viking=withhold")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000876 self.assert_("Spam" not in o.req.headers)
877 self.assert_("Spam" not in o.req.unredirected_hdrs)
878
879 # loop detection
880 req = Request(from_url)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000881 def redirect(h, req, url=to_url):
882 h.http_error_302(req, MockFile(), 302, "Blah",
883 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000884 # Note that the *original* request shares the same record of
885 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000886
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000887 # detect infinite loop redirect of a URL to itself
888 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000889 count = 0
890 try:
891 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000892 redirect(h, req, "http://example.com/")
893 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000894 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000895 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000896 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000897
898 # detect endless non-repeating chain of redirects
899 req = Request(from_url, origin_req_host="example.com")
900 count = 0
901 try:
902 while 1:
903 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000904 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000905 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000906 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000907 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000908
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000909 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000910 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000911 from http.cookiejar import CookieJar
912 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000913
914 cj = CookieJar()
915 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000916 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000917 hdeh = urllib.request.HTTPDefaultErrorHandler()
918 hrh = urllib.request.HTTPRedirectHandler()
919 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000920 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000921 o.open("http://www.example.com/")
922 self.assert_(not hh.req.has_header("Cookie"))
923
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000924 def test_proxy(self):
925 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000927 o.add_handler(ph)
928 meth_spec = [
929 [("http_open", "return response")]
930 ]
931 handlers = add_ordered_mock_handlers(o, meth_spec)
932
933 req = Request("http://acme.example.com/")
934 self.assertEqual(req.get_host(), "acme.example.com")
935 r = o.open(req)
936 self.assertEqual(req.get_host(), "proxy.example.com:3128")
937
938 self.assertEqual([(handlers[0], "http_open")],
939 [tup[0:2] for tup in o.calls])
940
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000941 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000942 opener = OpenerDirector()
943 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000944 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000945 realm = "ACME Widget Store"
946 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000947 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
948 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000949 opener.add_handler(auth_handler)
950 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000951 self._test_basic_auth(opener, auth_handler, "Authorization",
952 realm, http_handler, password_manager,
953 "http://acme.example.com/protected",
954 "http://acme.example.com/protected",
955 )
956
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000957 def test_basic_auth_with_single_quoted_realm(self):
958 self.test_basic_auth(quote_char="'")
959
Thomas Wouters477c8d52006-05-27 19:21:47 +0000960 def test_proxy_basic_auth(self):
961 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000962 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000963 opener.add_handler(ph)
964 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000966 realm = "ACME Networks"
967 http_handler = MockHTTPHandler(
968 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000969 opener.add_handler(auth_handler)
970 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000971 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +0000972 realm, http_handler, password_manager,
973 "http://acme.example.com:3128/protected",
974 "proxy.example.com:3128",
975 )
976
977 def test_basic_and_digest_auth_handlers(self):
978 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
979 # response (http://python.org/sf/1479302), where it should instead
980 # return None to allow another handler (especially
981 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000982
983 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
984 # try digest first (since it's the strongest auth scheme), so we record
985 # order of calls here to check digest comes first:
986 class RecordingOpenerDirector(OpenerDirector):
987 def __init__(self):
988 OpenerDirector.__init__(self)
989 self.recorded = []
990 def record(self, info):
991 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000992 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000993 def http_error_401(self, *args, **kwds):
994 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000995 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000996 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000997 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000998 def http_error_401(self, *args, **kwds):
999 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001000 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001001 *args, **kwds)
1002
1003 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001004 password_manager = MockPasswordManager()
1005 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001006 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001007 realm = "ACME Networks"
1008 http_handler = MockHTTPHandler(
1009 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001010 opener.add_handler(basic_handler)
1011 opener.add_handler(digest_handler)
1012 opener.add_handler(http_handler)
1013
1014 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001015 self._test_basic_auth(opener, basic_handler, "Authorization",
1016 realm, http_handler, password_manager,
1017 "http://acme.example.com/protected",
1018 "http://acme.example.com/protected",
1019 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001020 # check digest was tried before basic (twice, because
1021 # _test_basic_auth called .open() twice)
1022 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001023
1024 def _test_basic_auth(self, opener, auth_handler, auth_header,
1025 realm, http_handler, password_manager,
1026 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001027 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001028 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001029
1030 # .add_password() fed through to password manager
1031 auth_handler.add_password(realm, request_url, user, password)
1032 self.assertEqual(realm, password_manager.realm)
1033 self.assertEqual(request_url, password_manager.url)
1034 self.assertEqual(user, password_manager.user)
1035 self.assertEqual(password, password_manager.password)
1036
1037 r = opener.open(request_url)
1038
1039 # should have asked the password manager for the username/password
1040 self.assertEqual(password_manager.target_realm, realm)
1041 self.assertEqual(password_manager.target_url, protected_url)
1042
1043 # expect one request without authorization, then one with
1044 self.assertEqual(len(http_handler.requests), 2)
1045 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001046 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001047 auth_hdr_value = ('Basic ' +
1048 base64.encodestring(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1050 auth_hdr_value)
1051
1052 # if the password manager can't find a password, the handler won't
1053 # handle the HTTP auth error
1054 password_manager.user = password_manager.password = None
1055 http_handler.reset()
1056 r = opener.open(request_url)
1057 self.assertEqual(len(http_handler.requests), 1)
1058 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1059
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001060
1061class MiscTests(unittest.TestCase):
1062
1063 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001064 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1065 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001066 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001067 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001068 def bar_open(self): pass
1069
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001070 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001071
1072 o = build_opener(FooHandler, BarHandler)
1073 self.opener_has_handler(o, FooHandler)
1074 self.opener_has_handler(o, BarHandler)
1075
1076 # can take a mix of classes and instances
1077 o = build_opener(FooHandler, BarHandler())
1078 self.opener_has_handler(o, FooHandler)
1079 self.opener_has_handler(o, BarHandler)
1080
1081 # subclasses of default handlers override default handlers
1082 o = build_opener(MyHTTPHandler)
1083 self.opener_has_handler(o, MyHTTPHandler)
1084
1085 # a particular case of overriding: default handlers can be passed
1086 # in explicitly
1087 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001088 self.opener_has_handler(o, urllib.request.HTTPHandler)
1089 o = build_opener(urllib.request.HTTPHandler)
1090 self.opener_has_handler(o, urllib.request.HTTPHandler)
1091 o = build_opener(urllib.request.HTTPHandler())
1092 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001093
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001094 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001095 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001096 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1097 self.opener_has_handler(o, MyHTTPHandler)
1098 self.opener_has_handler(o, MyOtherHTTPHandler)
1099
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001100 def opener_has_handler(self, opener, handler_class):
1101 for h in opener.handlers:
1102 if h.__class__ == handler_class:
1103 break
1104 else:
1105 self.assert_(False)
1106
1107
1108def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001109 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001110 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001111 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001112 tests = (TrivialTests,
1113 OpenerDirectorTests,
1114 HandlerTests,
1115 MiscTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001116 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001117
1118if __name__ == "__main__":
1119 test_main(verbose=True)