blob: 8cd5ca06601e4c9c1318c1da709b7a75f6b06f31 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00007import array
Senthil Kumaran4de00a22011-05-11 21:17:57 +08008import sys
Jeremy Hyltone3e61042001-05-09 15:50:25 +00009
Jeremy Hylton1afc1692008-06-18 20:49:58 +000010import urllib.request
Ronald Oussorene72e1612011-03-14 18:15:25 -040011# The proxy bypass method imported below has logic specific to the OSX
12# proxy config data structure but is testable on all platforms.
13from urllib.request import Request, OpenerDirector, _proxy_bypass_macosx_sysconf
guido@google.coma119df92011-03-29 11:41:02 -070014import urllib.error
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016# XXX
17# Request
18# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000019# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000020
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000021class TrivialTests(unittest.TestCase):
22 def test_trivial(self):
23 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000024
Jeremy Hylton1afc1692008-06-18 20:49:58 +000025 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000026
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000027 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000028 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000029
Senthil Kumarand587e302010-01-10 17:45:52 +000030 if os.name == 'nt':
31 file_url = "file:///%s" % fname
32 else:
33 file_url = "file://%s" % fname
34
Jeremy Hylton1afc1692008-06-18 20:49:58 +000035 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000036
37 buf = f.read()
38 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000039
Georg Brandle1b13d22005-08-24 22:20:32 +000040 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 tests = [
42 ('a,b,c', ['a', 'b', 'c']),
43 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
44 ('a, b, "c", "d", "e,f", g, h',
45 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
46 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
47 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000048 for string, list in tests:
Florent Xicluna419e3842010-08-08 16:16:07 +000049 self.assertEqual(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000050
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000051
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052def test_request_headers_dict():
53 """
54 The Request.headers dictionary is not a documented interface. It should
55 stay that way, because the complete set of headers are only accessible
56 through the .get_header(), .has_header(), .header_items() interface.
57 However, .headers pre-dates those methods, and so real code will be using
58 the dictionary.
59
60 The introduction in 2.4 of those methods was a mistake for the same reason:
61 code that previously saw all (urllib2 user)-provided headers in .headers
62 now sees only a subset (and the function interface is ugly and incomplete).
63 A better change would have been to replace .headers dict with a dict
64 subclass (or UserDict.DictMixin instance?) that preserved the .headers
65 interface and also provided access to the "unredirected" headers. It's
66 probably too late to fix that, though.
67
68
69 Check .capitalize() case normalization:
70
71 >>> url = "http://example.com"
72 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
73 'blah'
74 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
75 'blah'
76
77 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
78 but that could be changed in future.
79
80 """
81
82def test_request_headers_methods():
83 """
84 Note the case normalization of header names here, to .capitalize()-case.
85 This should be preserved for backwards-compatibility. (In the HTTP case,
86 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000087 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000088
89 >>> url = "http://example.com"
90 >>> r = Request(url, headers={"Spam-eggs": "blah"})
91 >>> r.has_header("Spam-eggs")
92 True
93 >>> r.header_items()
94 [('Spam-eggs', 'blah')]
95 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000096 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000097 >>> items
98 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
99
100 Note that e.g. r.has_header("spam-EggS") is currently False, and
101 r.get_header("spam-EggS") returns None, but that could be changed in
102 future.
103
104 >>> r.has_header("Not-there")
105 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000106 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000107 None
108 >>> r.get_header("Not-there", "default")
109 'default'
110
111 """
112
113
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114def test_password_manager(self):
115 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000117 >>> add = mgr.add_password
118 >>> add("Some Realm", "http://example.com/", "joe", "password")
119 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
120 >>> add("c", "http://example.com/foo", "foo", "ni")
121 >>> add("c", "http://example.com/bar", "bar", "nini")
122 >>> add("b", "http://example.com/", "first", "blah")
123 >>> add("b", "http://example.com/", "second", "spam")
124 >>> add("a", "http://example.com", "1", "a")
125 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
126 >>> add("Some Realm", "d.example.com", "4", "d")
127 >>> add("Some Realm", "e.example.com:3128", "5", "e")
128
129 >>> mgr.find_user_password("Some Realm", "example.com")
130 ('joe', 'password')
131 >>> mgr.find_user_password("Some Realm", "http://example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com/")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("c", "http://example.com/foo")
140 ('foo', 'ni')
141 >>> mgr.find_user_password("c", "http://example.com/bar")
142 ('bar', 'nini')
143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144 Actually, this is really undefined ATM
145## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000147## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
148## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000149
150 Use latest add_password() in case of conflict:
151
152 >>> mgr.find_user_password("b", "http://example.com/")
153 ('second', 'spam')
154
155 No special relationship between a.example.com and example.com:
156
157 >>> mgr.find_user_password("a", "http://example.com/")
158 ('1', 'a')
159 >>> mgr.find_user_password("a", "http://a.example.com/")
160 (None, None)
161
162 Ports:
163
164 >>> mgr.find_user_password("Some Realm", "c.example.com")
165 (None, None)
166 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
167 ('3', 'c')
168 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "d.example.com")
171 ('4', 'd')
172 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
173 ('5', 'e')
174
175 """
176 pass
177
178
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179def test_password_manager_default_port(self):
180 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000181 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000182 >>> add = mgr.add_password
183
184 The point to note here is that we can't guess the default port if there's
185 no scheme. This applies to both add_password and find_user_password.
186
187 >>> add("f", "http://g.example.com:80", "10", "j")
188 >>> add("g", "http://h.example.com", "11", "k")
189 >>> add("h", "i.example.com:80", "12", "l")
190 >>> add("i", "j.example.com", "13", "m")
191 >>> mgr.find_user_password("f", "g.example.com:100")
192 (None, None)
193 >>> mgr.find_user_password("f", "g.example.com:80")
194 ('10', 'j')
195 >>> mgr.find_user_password("f", "g.example.com")
196 (None, None)
197 >>> mgr.find_user_password("f", "http://g.example.com:100")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:80")
200 ('10', 'j')
201 >>> mgr.find_user_password("f", "http://g.example.com")
202 ('10', 'j')
203 >>> mgr.find_user_password("g", "h.example.com")
204 ('11', 'k')
205 >>> mgr.find_user_password("g", "h.example.com:80")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "http://h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("h", "i.example.com")
210 (None, None)
211 >>> mgr.find_user_password("h", "i.example.com:80")
212 ('12', 'l')
213 >>> mgr.find_user_password("h", "http://i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("i", "j.example.com")
216 ('13', 'm')
217 >>> mgr.find_user_password("i", "j.example.com:80")
218 (None, None)
219 >>> mgr.find_user_password("i", "http://j.example.com")
220 ('13', 'm')
221 >>> mgr.find_user_password("i", "http://j.example.com:80")
222 (None, None)
223
224 """
225
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000226class MockOpener:
227 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000228 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
229 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000230 def error(self, proto, *args):
231 self.proto, self.args = proto, args
232
233class MockFile:
234 def read(self, count=None): pass
235 def readline(self, count=None): pass
236 def close(self): pass
237
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238class MockHeaders(dict):
239 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000240 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000241
Guido van Rossum34d19282007-08-09 01:03:29 +0000242class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000243 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000244 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000245 self.code, self.msg, self.headers, self.url = code, msg, headers, url
246 def info(self):
247 return self.headers
248 def geturl(self):
249 return self.url
250
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000251class MockCookieJar:
252 def add_cookie_header(self, request):
253 self.ach_req = request
254 def extract_cookies(self, response, request):
255 self.ec_req, self.ec_r = request, response
256
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000257class FakeMethod:
258 def __init__(self, meth_name, action, handle):
259 self.meth_name = meth_name
260 self.handle = handle
261 self.action = action
262 def __call__(self, *args):
263 return self.handle(self.meth_name, self.action, *args)
264
Senthil Kumaran47fff872009-12-20 07:10:31 +0000265class MockHTTPResponse(io.IOBase):
266 def __init__(self, fp, msg, status, reason):
267 self.fp = fp
268 self.msg = msg
269 self.status = status
270 self.reason = reason
271 self.code = 200
272
273 def read(self):
274 return ''
275
276 def info(self):
277 return {}
278
279 def geturl(self):
280 return self.url
281
282
283class MockHTTPClass:
284 def __init__(self):
285 self.level = 0
286 self.req_headers = []
287 self.data = None
288 self.raise_on_endheaders = False
289 self._tunnel_headers = {}
290
291 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
292 self.host = host
293 self.timeout = timeout
294 return self
295
296 def set_debuglevel(self, level):
297 self.level = level
298
299 def set_tunnel(self, host, port=None, headers=None):
300 self._tunnel_host = host
301 self._tunnel_port = port
302 if headers:
303 self._tunnel_headers = headers
304 else:
305 self._tunnel_headers.clear()
306
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000307 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000308 self.method = method
309 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000310 if headers is not None:
311 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000312 self.req_headers.sort()
313 if body:
314 self.data = body
315 if self.raise_on_endheaders:
316 import socket
317 raise socket.error()
318 def getresponse(self):
319 return MockHTTPResponse(MockFile(), {}, 200, "OK")
320
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000321class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000322 # useful for testing handler machinery
323 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000324 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000325 def __init__(self, methods):
326 self._define_methods(methods)
327 def _define_methods(self, methods):
328 for spec in methods:
329 if len(spec) == 2: name, action = spec
330 else: name, action = spec, None
331 meth = FakeMethod(name, action, self.handle)
332 setattr(self.__class__, name, meth)
333 def handle(self, fn_name, action, *args, **kwds):
334 self.parent.calls.append((self, fn_name, args, kwds))
335 if action is None:
336 return None
337 elif action == "return self":
338 return self
339 elif action == "return response":
340 res = MockResponse(200, "OK", {}, "")
341 return res
342 elif action == "return request":
343 return Request("http://blah/")
344 elif action.startswith("error"):
345 code = action[action.rfind(" ")+1:]
346 try:
347 code = int(code)
348 except ValueError:
349 pass
350 res = MockResponse(200, "OK", {}, "")
351 return self.parent.error("http", args[0], res, code, "", {})
352 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000353 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000354 assert False
355 def close(self): pass
356 def add_parent(self, parent):
357 self.parent = parent
358 self.parent.calls = []
359 def __lt__(self, other):
360 if not hasattr(other, "handler_order"):
361 # No handler_order, leave in original order. Yuck.
362 return True
363 return self.handler_order < other.handler_order
364
365def add_ordered_mock_handlers(opener, meth_spec):
366 """Create MockHandlers and add them to an OpenerDirector.
367
368 meth_spec: list of lists of tuples and strings defining methods to define
369 on handlers. eg:
370
371 [["http_error", "ftp_open"], ["http_open"]]
372
373 defines methods .http_error() and .ftp_open() on one handler, and
374 .http_open() on another. These methods just record their arguments and
375 return None. Using a tuple instead of a string causes the method to
376 perform some action (see MockHandler.handle()), eg:
377
378 [["http_error"], [("http_open", "return request")]]
379
380 defines .http_error() on one handler (which simply returns None), and
381 .http_open() on another handler, which returns a Request object.
382
383 """
384 handlers = []
385 count = 0
386 for meths in meth_spec:
387 class MockHandlerSubclass(MockHandler): pass
388 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000389 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000390 h.add_parent(opener)
391 count = count + 1
392 handlers.append(h)
393 opener.add_handler(h)
394 return handlers
395
Thomas Wouters477c8d52006-05-27 19:21:47 +0000396def build_test_opener(*handler_instances):
397 opener = OpenerDirector()
398 for h in handler_instances:
399 opener.add_handler(h)
400 return opener
401
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000402class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000403 # useful for testing redirections and auth
404 # sends supplied headers and code as first response
405 # sends 200 OK as second response
406 def __init__(self, code, headers):
407 self.code = code
408 self.headers = headers
409 self.reset()
410 def reset(self):
411 self._count = 0
412 self.requests = []
413 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000414 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000415 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416 self.requests.append(copy.deepcopy(req))
417 if self._count == 0:
418 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000419 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000420 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000421 return self.parent.error(
422 "http", req, MockFile(), self.code, name, msg)
423 else:
424 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000425 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000426 return MockResponse(200, "OK", msg, "", req.get_full_url())
427
Senthil Kumaran47fff872009-12-20 07:10:31 +0000428class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
429 # Useful for testing the Proxy-Authorization request by verifying the
430 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000431
432 def __init__(self):
433 urllib.request.AbstractHTTPHandler.__init__(self)
434 self.httpconn = MockHTTPClass()
435
Senthil Kumaran47fff872009-12-20 07:10:31 +0000436 def https_open(self, req):
437 return self.do_open(self.httpconn, req)
438
Thomas Wouters477c8d52006-05-27 19:21:47 +0000439class MockPasswordManager:
440 def add_password(self, realm, uri, user, password):
441 self.realm = realm
442 self.url = uri
443 self.user = user
444 self.password = password
445 def find_user_password(self, realm, authuri):
446 self.target_realm = realm
447 self.target_url = authuri
448 return self.user, self.password
449
450
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000451class OpenerDirectorTests(unittest.TestCase):
452
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000453 def test_add_non_handler(self):
454 class NonHandler(object):
455 pass
456 self.assertRaises(TypeError,
457 OpenerDirector().add_handler, NonHandler())
458
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000459 def test_badly_named_methods(self):
460 # test work-around for three methods that accidentally follow the
461 # naming conventions for handler methods
462 # (*_open() / *_request() / *_response())
463
464 # These used to call the accidentally-named methods, causing a
465 # TypeError in real code; here, returning self from these mock
466 # methods would either cause no exception, or AttributeError.
467
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000468 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000469
470 o = OpenerDirector()
471 meth_spec = [
472 [("do_open", "return self"), ("proxy_open", "return self")],
473 [("redirect_request", "return self")],
474 ]
475 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000476 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000477 for scheme in "do", "proxy", "redirect":
478 self.assertRaises(URLError, o.open, scheme+"://example.com/")
479
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000480 def test_handled(self):
481 # handler returning non-None means no more handlers will be called
482 o = OpenerDirector()
483 meth_spec = [
484 ["http_open", "ftp_open", "http_error_302"],
485 ["ftp_open"],
486 [("http_open", "return self")],
487 [("http_open", "return self")],
488 ]
489 handlers = add_ordered_mock_handlers(o, meth_spec)
490
491 req = Request("http://example.com/")
492 r = o.open(req)
493 # Second .http_open() gets called, third doesn't, since second returned
494 # non-None. Handlers without .http_open() never get any methods called
495 # on them.
496 # In fact, second mock handler defining .http_open() returns self
497 # (instead of response), which becomes the OpenerDirector's return
498 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000499 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000500 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
501 for expected, got in zip(calls, o.calls):
502 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000503 self.assertEqual((handler, name), expected)
504 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000505
506 def test_handler_order(self):
507 o = OpenerDirector()
508 handlers = []
509 for meths, handler_order in [
510 ([("http_open", "return self")], 500),
511 (["http_open"], 0),
512 ]:
513 class MockHandlerSubclass(MockHandler): pass
514 h = MockHandlerSubclass(meths)
515 h.handler_order = handler_order
516 handlers.append(h)
517 o.add_handler(h)
518
519 r = o.open("http://example.com/")
520 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000521 self.assertEqual(o.calls[0][0], handlers[1])
522 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000523
524 def test_raise(self):
525 # raising URLError stops processing of request
526 o = OpenerDirector()
527 meth_spec = [
528 [("http_open", "raise")],
529 [("http_open", "return self")],
530 ]
531 handlers = add_ordered_mock_handlers(o, meth_spec)
532
533 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000534 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000535 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000536
537## def test_error(self):
538## # XXX this doesn't actually seem to be used in standard library,
539## # but should really be tested anyway...
540
541 def test_http_error(self):
542 # XXX http_error_default
543 # http errors are a special case
544 o = OpenerDirector()
545 meth_spec = [
546 [("http_open", "error 302")],
547 [("http_error_400", "raise"), "http_open"],
548 [("http_error_302", "return response"), "http_error_303",
549 "http_error"],
550 [("http_error_302")],
551 ]
552 handlers = add_ordered_mock_handlers(o, meth_spec)
553
554 class Unknown:
555 def __eq__(self, other): return True
556
557 req = Request("http://example.com/")
558 r = o.open(req)
559 assert len(o.calls) == 2
560 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000561 (handlers[2], "http_error_302",
562 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000563 for expected, got in zip(calls, o.calls):
564 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000565 self.assertEqual((handler, method_name), got[:2])
566 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000567
568 def test_processors(self):
569 # *_request / *_response methods get called appropriately
570 o = OpenerDirector()
571 meth_spec = [
572 [("http_request", "return request"),
573 ("http_response", "return response")],
574 [("http_request", "return request"),
575 ("http_response", "return response")],
576 ]
577 handlers = add_ordered_mock_handlers(o, meth_spec)
578
579 req = Request("http://example.com/")
580 r = o.open(req)
581 # processor methods are called on *all* handlers that define them,
582 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000583 calls = [
584 (handlers[0], "http_request"), (handlers[1], "http_request"),
585 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000586
587 for i, (handler, name, args, kwds) in enumerate(o.calls):
588 if i < 2:
589 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000590 self.assertEqual((handler, name), calls[i])
591 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000592 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 else:
594 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000595 self.assertEqual((handler, name), calls[i])
596 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000597 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000598 # response from opener.open is None, because there's no
599 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000600 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000601 isinstance(args[1], MockResponse))
602
603
Tim Peters58eb11c2004-01-18 20:29:55 +0000604def sanepathname2url(path):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000605 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000606 path.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000607 except UnicodeEncodeError:
608 raise unittest.SkipTest("path is not encodable to utf8")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000609 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000610 if os.name == "nt" and urlpath.startswith("///"):
611 urlpath = urlpath[2:]
612 # XXX don't ask me about the mac...
613 return urlpath
614
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000615class HandlerTests(unittest.TestCase):
616
617 def test_ftp(self):
618 class MockFTPWrapper:
619 def __init__(self, data): self.data = data
620 def retrfile(self, filename, filetype):
621 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000622 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000624 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000625 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000626 def connect_ftp(self, user, passwd, host, port, dirs,
627 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000628 self.user, self.passwd = user, passwd
629 self.host, self.port = host, port
630 self.dirs = dirs
631 self.ftpwrapper = MockFTPWrapper(self.data)
632 return self.ftpwrapper
633
Georg Brandlf78e02b2008-06-10 17:40:04 +0000634 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000635 data = "rheum rhaponicum"
636 h = NullFTPHandler(data)
637 o = h.parent = MockOpener()
638
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000639 for url, host, port, user, passwd, type_, dirs, filename, mimetype in [
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000640 ("ftp://localhost/foo/bar/baz.html",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000641 "localhost", ftplib.FTP_PORT, "", "", "I",
642 ["foo", "bar"], "baz.html", "text/html"),
643 ("ftp://parrot@localhost/foo/bar/baz.html",
644 "localhost", ftplib.FTP_PORT, "parrot", "", "I",
645 ["foo", "bar"], "baz.html", "text/html"),
646 ("ftp://%25parrot@localhost/foo/bar/baz.html",
647 "localhost", ftplib.FTP_PORT, "%parrot", "", "I",
648 ["foo", "bar"], "baz.html", "text/html"),
649 ("ftp://%2542parrot@localhost/foo/bar/baz.html",
650 "localhost", ftplib.FTP_PORT, "%42parrot", "", "I",
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000652 ("ftp://localhost:80/foo/bar/",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000653 "localhost", 80, "", "", "D",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000654 ["foo", "bar"], "", None),
655 ("ftp://localhost/baz.gif;type=a",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000656 "localhost", ftplib.FTP_PORT, "", "", "A",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000657 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000658 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000659 req = Request(url)
660 req.timeout = None
661 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000662 # ftp authentication not yet implemented by FTPHandler
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000663 self.assertEqual(h.user, user)
664 self.assertEqual(h.passwd, passwd)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000665 self.assertEqual(h.host, socket.gethostbyname(host))
666 self.assertEqual(h.port, port)
667 self.assertEqual(h.dirs, dirs)
668 self.assertEqual(h.ftpwrapper.filename, filename)
669 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000670 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000671 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000672 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000673
674 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000675 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000676 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000677 o = h.parent = MockOpener()
678
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000679 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000680 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000681 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000682 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000683 "file://localhost%s" % urlpath,
684 "file://%s" % urlpath,
685 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000686 ]
687 try:
688 localaddr = socket.gethostbyname(socket.gethostname())
689 except socket.gaierror:
690 localaddr = ''
691 if localaddr:
692 urls.append("file://%s%s" % (localaddr, urlpath))
693
694 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000695 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696 try:
697 try:
698 f.write(towrite)
699 finally:
700 f.close()
701
702 r = h.file_open(Request(url))
703 try:
704 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000705 headers = r.info()
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000706 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000707 finally:
708 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000709 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000710 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000711 finally:
712 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000713 self.assertEqual(data, towrite)
714 self.assertEqual(headers["Content-type"], "text/plain")
715 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000716 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000717 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000718
719 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000720 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000721 "file:///file_does_not_exist.txt",
722 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
723 os.getcwd(), TESTFN),
724 "file://somerandomhost.ontheinternet.com%s/%s" %
725 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000726 ]:
727 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000728 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000729 try:
730 f.write(towrite)
731 finally:
732 f.close()
733
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000734 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000735 h.file_open, Request(url))
736 finally:
737 os.remove(TESTFN)
738
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000739 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000740 o = h.parent = MockOpener()
741 # XXXX why does // mean ftp (and /// mean not ftp!), and where
742 # is file: scheme specified? I think this is really a bug, and
743 # what was intended was to distinguish between URLs like:
744 # file:/blah.txt (a file)
745 # file://localhost/blah.txt (a file)
746 # file:///blah.txt (a file)
747 # file://ftp.example.com/blah.txt (an ftp URL)
748 for url, ftp in [
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000749 ("file://ftp.example.com//foo.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000750 ("file://ftp.example.com///foo.txt", False),
751# XXXX bug: fails with OSError, should be URLError
752 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000753 ("file://somehost//foo/something.txt", False),
Senthil Kumaran2ef16322010-07-11 03:12:43 +0000754 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000755 ]:
756 req = Request(url)
757 try:
758 h.file_open(req)
759 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000760 except (urllib.error.URLError, OSError):
Florent Xicluna419e3842010-08-08 16:16:07 +0000761 self.assertFalse(ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000762 else:
Florent Xicluna419e3842010-08-08 16:16:07 +0000763 self.assertIs(o.req, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(req.type, "ftp")
Łukasz Langad7e81cc2011-01-09 18:18:53 +0000765 self.assertEqual(req.type == "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000766
767 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000768
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000769 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000770 o = h.parent = MockOpener()
771
772 url = "http://example.com/"
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000773 for method, data in [("GET", None), ("POST", b"blah")]:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000774 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000775 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000776 req.add_unredirected_header("Spam", "eggs")
777 http = MockHTTPClass()
778 r = h.do_open(http, req)
779
780 # result attributes
781 r.read; r.readline # wrapped MockFile methods
782 r.info; r.geturl # addinfourl methods
783 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
784 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000785 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000786 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000787
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000788 self.assertEqual(http.host, "example.com")
789 self.assertEqual(http.level, 0)
790 self.assertEqual(http.method, method)
791 self.assertEqual(http.selector, "/")
792 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000793 [("Connection", "close"),
794 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000795 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000796
797 # check socket.error converted to URLError
798 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000799 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000800
Senthil Kumaran29333122011-02-11 11:25:47 +0000801 # Check for TypeError on POST data which is str.
802 req = Request("http://example.com/","badpost")
803 self.assertRaises(TypeError, h.do_request_, req)
804
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000805 # check adding of standard headers
806 o.addheaders = [("Spam", "eggs")]
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000807 for data in b"", None: # POST, GET
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808 req = Request("http://example.com/", data)
809 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000810 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000811 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000812 self.assertNotIn("Content-length", req.unredirected_hdrs)
813 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000814 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000815 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
816 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000817 "application/x-www-form-urlencoded")
818 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000819 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
820 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000821
822 # don't clobber existing headers
823 req.add_unredirected_header("Content-length", "foo")
824 req.add_unredirected_header("Content-type", "bar")
825 req.add_unredirected_header("Host", "baz")
826 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000827 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000828 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
829 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000830 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
831 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000832
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000833 # Check iterable body support
834 def iterable_body():
835 yield b"one"
836 yield b"two"
837 yield b"three"
838
839 for headers in {}, {"Content-Length": 11}:
840 req = Request("http://example.com/", iterable_body(), headers)
841 if not headers:
842 # Having an iterable body without a Content-Length should
843 # raise an exception
844 self.assertRaises(ValueError, h.do_request_, req)
845 else:
846 newreq = h.do_request_(req)
847
Senthil Kumaran29333122011-02-11 11:25:47 +0000848 # A file object.
849 # Test only Content-Length attribute of request.
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000850
Senthil Kumaran29333122011-02-11 11:25:47 +0000851 file_obj = io.BytesIO()
852 file_obj.write(b"Something\nSomething\nSomething\n")
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000853
854 for headers in {}, {"Content-Length": 30}:
855 req = Request("http://example.com/", file_obj, headers)
856 if not headers:
857 # Having an iterable body without a Content-Length should
858 # raise an exception
859 self.assertRaises(ValueError, h.do_request_, req)
860 else:
861 newreq = h.do_request_(req)
862 self.assertEqual(int(newreq.get_header('Content-length')),30)
863
864 file_obj.close()
865
866 # array.array Iterable - Content Length is calculated
867
868 iterable_array = array.array("I",[1,2,3,4])
869
870 for headers in {}, {"Content-Length": 16}:
871 req = Request("http://example.com/", iterable_array, headers)
872 newreq = h.do_request_(req)
873 self.assertEqual(int(newreq.get_header('Content-length')),16)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000874
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000875 def test_http_doubleslash(self):
876 # Checks the presence of any unnecessary double slash in url does not
877 # break anything. Previously, a double slash directly after the host
878 # could could cause incorrect parsing.
879 h = urllib.request.AbstractHTTPHandler()
880 o = h.parent = MockOpener()
881
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000882 data = b""
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000883 ds_urls = [
884 "http://example.com/foo/bar/baz.html",
885 "http://example.com//foo/bar/baz.html",
886 "http://example.com/foo//bar/baz.html",
887 "http://example.com/foo/bar//baz.html"
888 ]
889
890 for ds_url in ds_urls:
891 ds_req = Request(ds_url, data)
892
893 # Check whether host is determined correctly if there is no proxy
894 np_ds_req = h.do_request_(ds_req)
895 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
896
897 # Check whether host is determined correctly if there is a proxy
898 ds_req.set_proxy("someproxy:3128",None)
899 p_ds_req = h.do_request_(ds_req)
900 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
901
Senthil Kumaranc2958622010-11-22 04:48:26 +0000902 def test_fixpath_in_weirdurls(self):
903 # Issue4493: urllib2 to supply '/' when to urls where path does not
904 # start with'/'
905
906 h = urllib.request.AbstractHTTPHandler()
907 o = h.parent = MockOpener()
908
909 weird_url = 'http://www.python.org?getspam'
910 req = Request(weird_url)
911 newreq = h.do_request_(req)
912 self.assertEqual(newreq.host,'www.python.org')
913 self.assertEqual(newreq.selector,'/?getspam')
914
915 url_without_path = 'http://www.python.org'
916 req = Request(url_without_path)
917 newreq = h.do_request_(req)
918 self.assertEqual(newreq.host,'www.python.org')
919 self.assertEqual(newreq.selector,'')
920
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000921
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000922 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000923 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000924 o = h.parent = MockOpener()
925
926 url = "http://example.com/"
927 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000929 r = MockResponse(200, "OK", {}, "", url)
930 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000931 self.assertIs(r, newr)
932 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933 r = MockResponse(202, "Accepted", {}, "", url)
934 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000935 self.assertIs(r, newr)
936 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000937 r = MockResponse(206, "Partial content", {}, "", url)
938 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000939 self.assertIs(r, newr)
940 self.assertFalse(hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000941 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942 r = MockResponse(502, "Bad gateway", {}, "", url)
Florent Xicluna419e3842010-08-08 16:16:07 +0000943 self.assertIsNone(h.http_response(req, r))
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000944 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000946
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947 def test_cookies(self):
948 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000949 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000950 o = h.parent = MockOpener()
951
952 req = Request("http://example.com/")
953 r = MockResponse(200, "OK", {}, "")
954 newreq = h.http_request(req)
Florent Xicluna419e3842010-08-08 16:16:07 +0000955 self.assertIs(cj.ach_req, req)
956 self.assertIs(cj.ach_req, newreq)
957 self.assertEqual(req.get_origin_req_host(), "example.com")
958 self.assertFalse(req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000959 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000960 self.assertIs(cj.ec_req, req)
961 self.assertIs(cj.ec_r, r)
962 self.assertIs(r, newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000963
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000964 def test_redirect(self):
965 from_url = "http://example.com/a.html"
966 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000967 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000968 o = h.parent = MockOpener()
969
970 # ordinary redirect behaviour
971 for code in 301, 302, 303, 307:
972 for data in None, "blah\nblah\n":
973 method = getattr(h, "http_error_%s" % code)
974 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000975 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000976 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000977 if data is not None:
978 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000979 req.add_unredirected_header("Spam", "spam")
980 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000981 method(req, MockFile(), code, "Blah",
982 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000983 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000984 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000985 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000986 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000987 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000988 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000989 except AttributeError:
Florent Xicluna419e3842010-08-08 16:16:07 +0000990 self.assertFalse(o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000991
992 # now it's a GET, there should not be headers regarding content
993 # (possibly dragged from before being a POST)
994 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +0000995 self.assertNotIn("content-length", headers)
996 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +0000997
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000998 self.assertEqual(o.req.headers["Nonsense"],
999 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +00001000 self.assertNotIn("Spam", o.req.headers)
1001 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001002
1003 # loop detection
1004 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001005 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001006 def redirect(h, req, url=to_url):
1007 h.http_error_302(req, MockFile(), 302, "Blah",
1008 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001009 # Note that the *original* request shares the same record of
1010 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001011
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001012 # detect infinite loop redirect of a URL to itself
1013 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001014 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001015 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001016 try:
1017 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001018 redirect(h, req, "http://example.com/")
1019 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001021 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001022 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001023
1024 # detect endless non-repeating chain of redirects
1025 req = Request(from_url, origin_req_host="example.com")
1026 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001027 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001028 try:
1029 while 1:
1030 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001031 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001032 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001033 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001034 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001035
guido@google.coma119df92011-03-29 11:41:02 -07001036
1037 def test_invalid_redirect(self):
1038 from_url = "http://example.com/a.html"
1039 valid_schemes = ['http','https','ftp']
1040 invalid_schemes = ['file','imap','ldap']
1041 schemeless_url = "example.com/b.html"
1042 h = urllib.request.HTTPRedirectHandler()
1043 o = h.parent = MockOpener()
1044 req = Request(from_url)
1045 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1046
1047 for scheme in invalid_schemes:
1048 invalid_url = scheme + '://' + schemeless_url
1049 self.assertRaises(urllib.error.HTTPError, h.http_error_302,
1050 req, MockFile(), 302, "Security Loophole",
1051 MockHeaders({"location": invalid_url}))
1052
1053 for scheme in valid_schemes:
1054 valid_url = scheme + '://' + schemeless_url
1055 h.http_error_302(req, MockFile(), 302, "That's fine",
1056 MockHeaders({"location": valid_url}))
1057 self.assertEqual(o.req.get_full_url(), valid_url)
1058
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001059 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001060 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +00001061 from http.cookiejar import CookieJar
1062 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001063
1064 cj = CookieJar()
1065 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001066 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001067 hdeh = urllib.request.HTTPDefaultErrorHandler()
1068 hrh = urllib.request.HTTPRedirectHandler()
1069 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001070 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001071 o.open("http://www.example.com/")
Florent Xicluna419e3842010-08-08 16:16:07 +00001072 self.assertFalse(hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001073
Senthil Kumaran26430412011-04-13 07:01:19 +08001074 def test_redirect_fragment(self):
1075 redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
1076 hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
1077 hdeh = urllib.request.HTTPDefaultErrorHandler()
1078 hrh = urllib.request.HTTPRedirectHandler()
1079 o = build_test_opener(hh, hdeh, hrh)
1080 fp = o.open('http://www.example.com')
1081 self.assertEqual(fp.geturl(), redirected_url.strip())
1082
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001083 def test_proxy(self):
1084 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001085 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001086 o.add_handler(ph)
1087 meth_spec = [
1088 [("http_open", "return response")]
1089 ]
1090 handlers = add_ordered_mock_handlers(o, meth_spec)
1091
1092 req = Request("http://acme.example.com/")
1093 self.assertEqual(req.get_host(), "acme.example.com")
1094 r = o.open(req)
1095 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1096
1097 self.assertEqual([(handlers[0], "http_open")],
1098 [tup[0:2] for tup in o.calls])
1099
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001100 def test_proxy_no_proxy(self):
1101 os.environ['no_proxy'] = 'python.org'
1102 o = OpenerDirector()
1103 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1104 o.add_handler(ph)
1105 req = Request("http://www.perl.org/")
1106 self.assertEqual(req.get_host(), "www.perl.org")
1107 r = o.open(req)
1108 self.assertEqual(req.get_host(), "proxy.example.com")
1109 req = Request("http://www.python.org")
1110 self.assertEqual(req.get_host(), "www.python.org")
1111 r = o.open(req)
1112 self.assertEqual(req.get_host(), "www.python.org")
1113 del os.environ['no_proxy']
1114
Ronald Oussorene72e1612011-03-14 18:15:25 -04001115 def test_proxy_no_proxy_all(self):
1116 os.environ['no_proxy'] = '*'
1117 o = OpenerDirector()
1118 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1119 o.add_handler(ph)
1120 req = Request("http://www.python.org")
1121 self.assertEqual(req.get_host(), "www.python.org")
1122 r = o.open(req)
1123 self.assertEqual(req.get_host(), "www.python.org")
1124 del os.environ['no_proxy']
1125
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001126
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001127 def test_proxy_https(self):
1128 o = OpenerDirector()
1129 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
1130 o.add_handler(ph)
1131 meth_spec = [
1132 [("https_open", "return response")]
1133 ]
1134 handlers = add_ordered_mock_handlers(o, meth_spec)
1135
1136 req = Request("https://www.example.com/")
1137 self.assertEqual(req.get_host(), "www.example.com")
1138 r = o.open(req)
1139 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1140 self.assertEqual([(handlers[0], "https_open")],
1141 [tup[0:2] for tup in o.calls])
1142
Senthil Kumaran47fff872009-12-20 07:10:31 +00001143 def test_proxy_https_proxy_authorization(self):
1144 o = OpenerDirector()
1145 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1146 o.add_handler(ph)
1147 https_handler = MockHTTPSHandler()
1148 o.add_handler(https_handler)
1149 req = Request("https://www.example.com/")
1150 req.add_header("Proxy-Authorization","FooBar")
1151 req.add_header("User-Agent","Grail")
1152 self.assertEqual(req.get_host(), "www.example.com")
1153 self.assertIsNone(req._tunnel_host)
1154 r = o.open(req)
1155 # Verify Proxy-Authorization gets tunneled to request.
1156 # httpsconn req_headers do not have the Proxy-Authorization header but
1157 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001158 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001159 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001160 self.assertIn(("User-Agent","Grail"),
1161 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001162 self.assertIsNotNone(req._tunnel_host)
1163 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1164 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001165
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001166 # TODO: This should be only for OSX
1167 @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001168 def test_osx_proxy_bypass(self):
1169 bypass = {
1170 'exclude_simple': False,
1171 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10',
1172 '10.0/16']
1173 }
1174 # Check hosts that should trigger the proxy bypass
1175 for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1',
1176 '10.0.0.1'):
1177 self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass),
1178 'expected bypass of %s to be True' % host)
1179 # Check hosts that should not trigger the proxy bypass
1180 for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', 'test'):
1181 self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass),
1182 'expected bypass of %s to be False' % host)
1183
1184 # Check the exclude_simple flag
1185 bypass = {'exclude_simple': True, 'exceptions': []}
1186 self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass))
1187
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001188 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001189 opener = OpenerDirector()
1190 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001191 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001192 realm = "ACME Widget Store"
1193 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001194 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1195 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001196 opener.add_handler(auth_handler)
1197 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001198 self._test_basic_auth(opener, auth_handler, "Authorization",
1199 realm, http_handler, password_manager,
1200 "http://acme.example.com/protected",
1201 "http://acme.example.com/protected",
1202 )
1203
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001204 def test_basic_auth_with_single_quoted_realm(self):
1205 self.test_basic_auth(quote_char="'")
1206
Thomas Wouters477c8d52006-05-27 19:21:47 +00001207 def test_proxy_basic_auth(self):
1208 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001209 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001210 opener.add_handler(ph)
1211 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001212 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213 realm = "ACME Networks"
1214 http_handler = MockHTTPHandler(
1215 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001216 opener.add_handler(auth_handler)
1217 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001218 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001219 realm, http_handler, password_manager,
1220 "http://acme.example.com:3128/protected",
1221 "proxy.example.com:3128",
1222 )
1223
1224 def test_basic_and_digest_auth_handlers(self):
1225 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1226 # response (http://python.org/sf/1479302), where it should instead
1227 # return None to allow another handler (especially
1228 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001229
1230 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1231 # try digest first (since it's the strongest auth scheme), so we record
1232 # order of calls here to check digest comes first:
1233 class RecordingOpenerDirector(OpenerDirector):
1234 def __init__(self):
1235 OpenerDirector.__init__(self)
1236 self.recorded = []
1237 def record(self, info):
1238 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001239 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001240 def http_error_401(self, *args, **kwds):
1241 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001242 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001243 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001244 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001245 def http_error_401(self, *args, **kwds):
1246 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001247 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001248 *args, **kwds)
1249
1250 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001251 password_manager = MockPasswordManager()
1252 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001253 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001254 realm = "ACME Networks"
1255 http_handler = MockHTTPHandler(
1256 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001257 opener.add_handler(basic_handler)
1258 opener.add_handler(digest_handler)
1259 opener.add_handler(http_handler)
1260
1261 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001262 self._test_basic_auth(opener, basic_handler, "Authorization",
1263 realm, http_handler, password_manager,
1264 "http://acme.example.com/protected",
1265 "http://acme.example.com/protected",
1266 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001267 # check digest was tried before basic (twice, because
1268 # _test_basic_auth called .open() twice)
1269 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001270
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001271 def test_unsupported_auth_digest_handler(self):
1272 opener = OpenerDirector()
1273 # While using DigestAuthHandler
1274 digest_auth_handler = urllib.request.HTTPDigestAuthHandler(None)
1275 http_handler = MockHTTPHandler(
1276 401, 'WWW-Authenticate: Kerberos\r\n\r\n')
1277 opener.add_handler(digest_auth_handler)
1278 opener.add_handler(http_handler)
1279 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1280
1281 def test_unsupported_auth_basic_handler(self):
1282 # While using BasicAuthHandler
1283 opener = OpenerDirector()
1284 basic_auth_handler = urllib.request.HTTPBasicAuthHandler(None)
1285 http_handler = MockHTTPHandler(
1286 401, 'WWW-Authenticate: NTLM\r\n\r\n')
1287 opener.add_handler(basic_auth_handler)
1288 opener.add_handler(http_handler)
1289 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1290
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 def _test_basic_auth(self, opener, auth_handler, auth_header,
1292 realm, http_handler, password_manager,
1293 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001294 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001295 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001296
1297 # .add_password() fed through to password manager
1298 auth_handler.add_password(realm, request_url, user, password)
1299 self.assertEqual(realm, password_manager.realm)
1300 self.assertEqual(request_url, password_manager.url)
1301 self.assertEqual(user, password_manager.user)
1302 self.assertEqual(password, password_manager.password)
1303
1304 r = opener.open(request_url)
1305
1306 # should have asked the password manager for the username/password
1307 self.assertEqual(password_manager.target_realm, realm)
1308 self.assertEqual(password_manager.target_url, protected_url)
1309
1310 # expect one request without authorization, then one with
1311 self.assertEqual(len(http_handler.requests), 2)
1312 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001313 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001314 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001315 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001316 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1317 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001318 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1319 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001320 # if the password manager can't find a password, the handler won't
1321 # handle the HTTP auth error
1322 password_manager.user = password_manager.password = None
1323 http_handler.reset()
1324 r = opener.open(request_url)
1325 self.assertEqual(len(http_handler.requests), 1)
1326 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1327
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001328
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001329class MiscTests(unittest.TestCase):
1330
1331 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001332 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1333 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001334 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001335 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001336 def bar_open(self): pass
1337
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001338 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001339
1340 o = build_opener(FooHandler, BarHandler)
1341 self.opener_has_handler(o, FooHandler)
1342 self.opener_has_handler(o, BarHandler)
1343
1344 # can take a mix of classes and instances
1345 o = build_opener(FooHandler, BarHandler())
1346 self.opener_has_handler(o, FooHandler)
1347 self.opener_has_handler(o, BarHandler)
1348
1349 # subclasses of default handlers override default handlers
1350 o = build_opener(MyHTTPHandler)
1351 self.opener_has_handler(o, MyHTTPHandler)
1352
1353 # a particular case of overriding: default handlers can be passed
1354 # in explicitly
1355 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001356 self.opener_has_handler(o, urllib.request.HTTPHandler)
1357 o = build_opener(urllib.request.HTTPHandler)
1358 self.opener_has_handler(o, urllib.request.HTTPHandler)
1359 o = build_opener(urllib.request.HTTPHandler())
1360 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001361
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001362 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001363 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001364 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1365 self.opener_has_handler(o, MyHTTPHandler)
1366 self.opener_has_handler(o, MyOtherHTTPHandler)
1367
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001368 def opener_has_handler(self, opener, handler_class):
Florent Xicluna419e3842010-08-08 16:16:07 +00001369 self.assertTrue(any(h.__class__ == handler_class
1370 for h in opener.handlers))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001371
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001372class RequestTests(unittest.TestCase):
1373
1374 def setUp(self):
1375 self.get = Request("http://www.python.org/~jeremy/")
1376 self.post = Request("http://www.python.org/~jeremy/",
1377 "data",
1378 headers={"X-Test": "test"})
1379
1380 def test_method(self):
1381 self.assertEqual("POST", self.post.get_method())
1382 self.assertEqual("GET", self.get.get_method())
1383
1384 def test_add_data(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001385 self.assertFalse(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001386 self.assertEqual("GET", self.get.get_method())
1387 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001388 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001389 self.assertEqual("POST", self.get.get_method())
1390
1391 def test_get_full_url(self):
1392 self.assertEqual("http://www.python.org/~jeremy/",
1393 self.get.get_full_url())
1394
1395 def test_selector(self):
1396 self.assertEqual("/~jeremy/", self.get.get_selector())
1397 req = Request("http://www.python.org/")
1398 self.assertEqual("/", req.get_selector())
1399
1400 def test_get_type(self):
1401 self.assertEqual("http", self.get.get_type())
1402
1403 def test_get_host(self):
1404 self.assertEqual("www.python.org", self.get.get_host())
1405
1406 def test_get_host_unquote(self):
1407 req = Request("http://www.%70ython.org/")
1408 self.assertEqual("www.python.org", req.get_host())
1409
1410 def test_proxy(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001411 self.assertFalse(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001412 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001413 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001414 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1415 self.assertEqual("www.perl.org", self.get.get_host())
1416
Senthil Kumarand95cc752010-08-08 11:27:53 +00001417 def test_wrapped_url(self):
1418 req = Request("<URL:http://www.python.org>")
1419 self.assertEqual("www.python.org", req.get_host())
1420
Senthil Kumaran26430412011-04-13 07:01:19 +08001421 def test_url_fragment(self):
Senthil Kumarand95cc752010-08-08 11:27:53 +00001422 req = Request("http://www.python.org/?qs=query#fragment=true")
1423 self.assertEqual("/?qs=query", req.get_selector())
1424 req = Request("http://www.python.org/#fun=true")
1425 self.assertEqual("/", req.get_selector())
1426
Senthil Kumaran26430412011-04-13 07:01:19 +08001427 # Issue 11703: geturl() omits fragment in the original URL.
1428 url = 'http://docs.python.org/library/urllib2.html#OK'
1429 req = Request(url)
1430 self.assertEqual(req.get_full_url(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001431
1432def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001433 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001434 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001435 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001436 tests = (TrivialTests,
1437 OpenerDirectorTests,
1438 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001439 MiscTests,
1440 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001441 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001442
1443if __name__ == "__main__":
1444 test_main(verbose=True)