blob: 0ff318157e4549452c62b3a693f0be40522ff18e [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00007import array
Senthil Kumaran4de00a22011-05-11 21:17:57 +08008import sys
Jeremy Hyltone3e61042001-05-09 15:50:25 +00009
Jeremy Hylton1afc1692008-06-18 20:49:58 +000010import urllib.request
Ronald Oussorene72e1612011-03-14 18:15:25 -040011# The proxy bypass method imported below has logic specific to the OSX
12# proxy config data structure but is testable on all platforms.
13from urllib.request import Request, OpenerDirector, _proxy_bypass_macosx_sysconf
guido@google.coma119df92011-03-29 11:41:02 -070014import urllib.error
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016# XXX
17# Request
18# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000019# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000020
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000021class TrivialTests(unittest.TestCase):
Senthil Kumaran6c5bd402011-11-01 23:20:31 +080022
23 def test___all__(self):
24 # Verify which names are exposed
25 for module in 'request', 'response', 'parse', 'error', 'robotparser':
26 context = {}
27 exec('from urllib.%s import *' % module, context)
28 del context['__builtins__']
Florent Xicluna3dbb1f12011-11-04 22:15:37 +010029 if module == 'request' and os.name == 'nt':
30 u, p = context.pop('url2pathname'), context.pop('pathname2url')
31 self.assertEqual(u.__module__, 'nturl2path')
32 self.assertEqual(p.__module__, 'nturl2path')
Senthil Kumaran6c5bd402011-11-01 23:20:31 +080033 for k, v in context.items():
34 self.assertEqual(v.__module__, 'urllib.%s' % module,
35 "%r is exposed in 'urllib.%s' but defined in %r" %
36 (k, module, v.__module__))
37
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000038 def test_trivial(self):
39 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000040
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000042
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000043 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000045
Senthil Kumarand587e302010-01-10 17:45:52 +000046 if os.name == 'nt':
47 file_url = "file:///%s" % fname
48 else:
49 file_url = "file://%s" % fname
50
Jeremy Hylton1afc1692008-06-18 20:49:58 +000051 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
53 buf = f.read()
54 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000055
Georg Brandle1b13d22005-08-24 22:20:32 +000056 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 tests = [
58 ('a,b,c', ['a', 'b', 'c']),
59 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
60 ('a, b, "c", "d", "e,f", g, h',
61 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
62 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
63 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000064 for string, list in tests:
Florent Xicluna419e3842010-08-08 16:16:07 +000065 self.assertEqual(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000066
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000067
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000068def test_request_headers_dict():
69 """
70 The Request.headers dictionary is not a documented interface. It should
71 stay that way, because the complete set of headers are only accessible
72 through the .get_header(), .has_header(), .header_items() interface.
73 However, .headers pre-dates those methods, and so real code will be using
74 the dictionary.
75
76 The introduction in 2.4 of those methods was a mistake for the same reason:
77 code that previously saw all (urllib2 user)-provided headers in .headers
78 now sees only a subset (and the function interface is ugly and incomplete).
79 A better change would have been to replace .headers dict with a dict
80 subclass (or UserDict.DictMixin instance?) that preserved the .headers
81 interface and also provided access to the "unredirected" headers. It's
82 probably too late to fix that, though.
83
84
85 Check .capitalize() case normalization:
86
87 >>> url = "http://example.com"
88 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
89 'blah'
90 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
91 'blah'
92
93 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
94 but that could be changed in future.
95
96 """
97
98def test_request_headers_methods():
99 """
100 Note the case normalization of header names here, to .capitalize()-case.
101 This should be preserved for backwards-compatibility. (In the HTTP case,
102 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +0000103 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104
105 >>> url = "http://example.com"
106 >>> r = Request(url, headers={"Spam-eggs": "blah"})
107 >>> r.has_header("Spam-eggs")
108 True
109 >>> r.header_items()
110 [('Spam-eggs', 'blah')]
111 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000112 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000113 >>> items
114 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
115
116 Note that e.g. r.has_header("spam-EggS") is currently False, and
117 r.get_header("spam-EggS") returns None, but that could be changed in
118 future.
119
120 >>> r.has_header("Not-there")
121 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000122 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000123 None
124 >>> r.get_header("Not-there", "default")
125 'default'
126
127 """
128
129
Thomas Wouters477c8d52006-05-27 19:21:47 +0000130def test_password_manager(self):
131 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000132 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000133 >>> add = mgr.add_password
134 >>> add("Some Realm", "http://example.com/", "joe", "password")
135 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
136 >>> add("c", "http://example.com/foo", "foo", "ni")
137 >>> add("c", "http://example.com/bar", "bar", "nini")
138 >>> add("b", "http://example.com/", "first", "blah")
139 >>> add("b", "http://example.com/", "second", "spam")
140 >>> add("a", "http://example.com", "1", "a")
141 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
142 >>> add("Some Realm", "d.example.com", "4", "d")
143 >>> add("Some Realm", "e.example.com:3128", "5", "e")
144
145 >>> mgr.find_user_password("Some Realm", "example.com")
146 ('joe', 'password')
147 >>> mgr.find_user_password("Some Realm", "http://example.com")
148 ('joe', 'password')
149 >>> mgr.find_user_password("Some Realm", "http://example.com/")
150 ('joe', 'password')
151 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
152 ('joe', 'password')
153 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
154 ('joe', 'password')
155 >>> mgr.find_user_password("c", "http://example.com/foo")
156 ('foo', 'ni')
157 >>> mgr.find_user_password("c", "http://example.com/bar")
158 ('bar', 'nini')
159
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000160 Actually, this is really undefined ATM
161## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000162
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000163## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
164## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000165
166 Use latest add_password() in case of conflict:
167
168 >>> mgr.find_user_password("b", "http://example.com/")
169 ('second', 'spam')
170
171 No special relationship between a.example.com and example.com:
172
173 >>> mgr.find_user_password("a", "http://example.com/")
174 ('1', 'a')
175 >>> mgr.find_user_password("a", "http://a.example.com/")
176 (None, None)
177
178 Ports:
179
180 >>> mgr.find_user_password("Some Realm", "c.example.com")
181 (None, None)
182 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
183 ('3', 'c')
184 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
185 ('3', 'c')
186 >>> mgr.find_user_password("Some Realm", "d.example.com")
187 ('4', 'd')
188 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
189 ('5', 'e')
190
191 """
192 pass
193
194
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000195def test_password_manager_default_port(self):
196 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000197 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000198 >>> add = mgr.add_password
199
200 The point to note here is that we can't guess the default port if there's
201 no scheme. This applies to both add_password and find_user_password.
202
203 >>> add("f", "http://g.example.com:80", "10", "j")
204 >>> add("g", "http://h.example.com", "11", "k")
205 >>> add("h", "i.example.com:80", "12", "l")
206 >>> add("i", "j.example.com", "13", "m")
207 >>> mgr.find_user_password("f", "g.example.com:100")
208 (None, None)
209 >>> mgr.find_user_password("f", "g.example.com:80")
210 ('10', 'j')
211 >>> mgr.find_user_password("f", "g.example.com")
212 (None, None)
213 >>> mgr.find_user_password("f", "http://g.example.com:100")
214 (None, None)
215 >>> mgr.find_user_password("f", "http://g.example.com:80")
216 ('10', 'j')
217 >>> mgr.find_user_password("f", "http://g.example.com")
218 ('10', 'j')
219 >>> mgr.find_user_password("g", "h.example.com")
220 ('11', 'k')
221 >>> mgr.find_user_password("g", "h.example.com:80")
222 ('11', 'k')
223 >>> mgr.find_user_password("g", "http://h.example.com:80")
224 ('11', 'k')
225 >>> mgr.find_user_password("h", "i.example.com")
226 (None, None)
227 >>> mgr.find_user_password("h", "i.example.com:80")
228 ('12', 'l')
229 >>> mgr.find_user_password("h", "http://i.example.com:80")
230 ('12', 'l')
231 >>> mgr.find_user_password("i", "j.example.com")
232 ('13', 'm')
233 >>> mgr.find_user_password("i", "j.example.com:80")
234 (None, None)
235 >>> mgr.find_user_password("i", "http://j.example.com")
236 ('13', 'm')
237 >>> mgr.find_user_password("i", "http://j.example.com:80")
238 (None, None)
239
240 """
241
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242class MockOpener:
243 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000244 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
245 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000246 def error(self, proto, *args):
247 self.proto, self.args = proto, args
248
249class MockFile:
250 def read(self, count=None): pass
251 def readline(self, count=None): pass
252 def close(self): pass
253
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000254class MockHeaders(dict):
255 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000256 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000257
Guido van Rossum34d19282007-08-09 01:03:29 +0000258class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000260 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000261 self.code, self.msg, self.headers, self.url = code, msg, headers, url
262 def info(self):
263 return self.headers
264 def geturl(self):
265 return self.url
266
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000267class MockCookieJar:
268 def add_cookie_header(self, request):
269 self.ach_req = request
270 def extract_cookies(self, response, request):
271 self.ec_req, self.ec_r = request, response
272
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000273class FakeMethod:
274 def __init__(self, meth_name, action, handle):
275 self.meth_name = meth_name
276 self.handle = handle
277 self.action = action
278 def __call__(self, *args):
279 return self.handle(self.meth_name, self.action, *args)
280
Senthil Kumaran47fff872009-12-20 07:10:31 +0000281class MockHTTPResponse(io.IOBase):
282 def __init__(self, fp, msg, status, reason):
283 self.fp = fp
284 self.msg = msg
285 self.status = status
286 self.reason = reason
287 self.code = 200
288
289 def read(self):
290 return ''
291
292 def info(self):
293 return {}
294
295 def geturl(self):
296 return self.url
297
298
299class MockHTTPClass:
300 def __init__(self):
301 self.level = 0
302 self.req_headers = []
303 self.data = None
304 self.raise_on_endheaders = False
305 self._tunnel_headers = {}
306
307 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
308 self.host = host
309 self.timeout = timeout
310 return self
311
312 def set_debuglevel(self, level):
313 self.level = level
314
315 def set_tunnel(self, host, port=None, headers=None):
316 self._tunnel_host = host
317 self._tunnel_port = port
318 if headers:
319 self._tunnel_headers = headers
320 else:
321 self._tunnel_headers.clear()
322
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000323 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000324 self.method = method
325 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000326 if headers is not None:
327 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000328 self.req_headers.sort()
329 if body:
330 self.data = body
331 if self.raise_on_endheaders:
332 import socket
333 raise socket.error()
334 def getresponse(self):
335 return MockHTTPResponse(MockFile(), {}, 200, "OK")
336
Victor Stinnera4c45d72011-06-17 14:01:18 +0200337 def close(self):
338 pass
339
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000340class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000341 # useful for testing handler machinery
342 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000343 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000344 def __init__(self, methods):
345 self._define_methods(methods)
346 def _define_methods(self, methods):
347 for spec in methods:
348 if len(spec) == 2: name, action = spec
349 else: name, action = spec, None
350 meth = FakeMethod(name, action, self.handle)
351 setattr(self.__class__, name, meth)
352 def handle(self, fn_name, action, *args, **kwds):
353 self.parent.calls.append((self, fn_name, args, kwds))
354 if action is None:
355 return None
356 elif action == "return self":
357 return self
358 elif action == "return response":
359 res = MockResponse(200, "OK", {}, "")
360 return res
361 elif action == "return request":
362 return Request("http://blah/")
363 elif action.startswith("error"):
364 code = action[action.rfind(" ")+1:]
365 try:
366 code = int(code)
367 except ValueError:
368 pass
369 res = MockResponse(200, "OK", {}, "")
370 return self.parent.error("http", args[0], res, code, "", {})
371 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000372 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000373 assert False
374 def close(self): pass
375 def add_parent(self, parent):
376 self.parent = parent
377 self.parent.calls = []
378 def __lt__(self, other):
379 if not hasattr(other, "handler_order"):
380 # No handler_order, leave in original order. Yuck.
381 return True
382 return self.handler_order < other.handler_order
383
384def add_ordered_mock_handlers(opener, meth_spec):
385 """Create MockHandlers and add them to an OpenerDirector.
386
387 meth_spec: list of lists of tuples and strings defining methods to define
388 on handlers. eg:
389
390 [["http_error", "ftp_open"], ["http_open"]]
391
392 defines methods .http_error() and .ftp_open() on one handler, and
393 .http_open() on another. These methods just record their arguments and
394 return None. Using a tuple instead of a string causes the method to
395 perform some action (see MockHandler.handle()), eg:
396
397 [["http_error"], [("http_open", "return request")]]
398
399 defines .http_error() on one handler (which simply returns None), and
400 .http_open() on another handler, which returns a Request object.
401
402 """
403 handlers = []
404 count = 0
405 for meths in meth_spec:
406 class MockHandlerSubclass(MockHandler): pass
407 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000408 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000409 h.add_parent(opener)
410 count = count + 1
411 handlers.append(h)
412 opener.add_handler(h)
413 return handlers
414
Thomas Wouters477c8d52006-05-27 19:21:47 +0000415def build_test_opener(*handler_instances):
416 opener = OpenerDirector()
417 for h in handler_instances:
418 opener.add_handler(h)
419 return opener
420
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000421class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000422 # useful for testing redirections and auth
423 # sends supplied headers and code as first response
424 # sends 200 OK as second response
425 def __init__(self, code, headers):
426 self.code = code
427 self.headers = headers
428 self.reset()
429 def reset(self):
430 self._count = 0
431 self.requests = []
432 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000433 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000434 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000435 self.requests.append(copy.deepcopy(req))
436 if self._count == 0:
437 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000438 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000439 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000440 return self.parent.error(
441 "http", req, MockFile(), self.code, name, msg)
442 else:
443 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000444 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000445 return MockResponse(200, "OK", msg, "", req.get_full_url())
446
Senthil Kumaran47fff872009-12-20 07:10:31 +0000447class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
448 # Useful for testing the Proxy-Authorization request by verifying the
449 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000450
451 def __init__(self):
452 urllib.request.AbstractHTTPHandler.__init__(self)
453 self.httpconn = MockHTTPClass()
454
Senthil Kumaran47fff872009-12-20 07:10:31 +0000455 def https_open(self, req):
456 return self.do_open(self.httpconn, req)
457
Thomas Wouters477c8d52006-05-27 19:21:47 +0000458class MockPasswordManager:
459 def add_password(self, realm, uri, user, password):
460 self.realm = realm
461 self.url = uri
462 self.user = user
463 self.password = password
464 def find_user_password(self, realm, authuri):
465 self.target_realm = realm
466 self.target_url = authuri
467 return self.user, self.password
468
469
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000470class OpenerDirectorTests(unittest.TestCase):
471
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000472 def test_add_non_handler(self):
473 class NonHandler(object):
474 pass
475 self.assertRaises(TypeError,
476 OpenerDirector().add_handler, NonHandler())
477
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000478 def test_badly_named_methods(self):
479 # test work-around for three methods that accidentally follow the
480 # naming conventions for handler methods
481 # (*_open() / *_request() / *_response())
482
483 # These used to call the accidentally-named methods, causing a
484 # TypeError in real code; here, returning self from these mock
485 # methods would either cause no exception, or AttributeError.
486
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000487 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000488
489 o = OpenerDirector()
490 meth_spec = [
491 [("do_open", "return self"), ("proxy_open", "return self")],
492 [("redirect_request", "return self")],
493 ]
494 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000495 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000496 for scheme in "do", "proxy", "redirect":
497 self.assertRaises(URLError, o.open, scheme+"://example.com/")
498
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000499 def test_handled(self):
500 # handler returning non-None means no more handlers will be called
501 o = OpenerDirector()
502 meth_spec = [
503 ["http_open", "ftp_open", "http_error_302"],
504 ["ftp_open"],
505 [("http_open", "return self")],
506 [("http_open", "return self")],
507 ]
508 handlers = add_ordered_mock_handlers(o, meth_spec)
509
510 req = Request("http://example.com/")
511 r = o.open(req)
512 # Second .http_open() gets called, third doesn't, since second returned
513 # non-None. Handlers without .http_open() never get any methods called
514 # on them.
515 # In fact, second mock handler defining .http_open() returns self
516 # (instead of response), which becomes the OpenerDirector's return
517 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000518 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000519 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
520 for expected, got in zip(calls, o.calls):
521 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000522 self.assertEqual((handler, name), expected)
523 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000524
525 def test_handler_order(self):
526 o = OpenerDirector()
527 handlers = []
528 for meths, handler_order in [
529 ([("http_open", "return self")], 500),
530 (["http_open"], 0),
531 ]:
532 class MockHandlerSubclass(MockHandler): pass
533 h = MockHandlerSubclass(meths)
534 h.handler_order = handler_order
535 handlers.append(h)
536 o.add_handler(h)
537
538 r = o.open("http://example.com/")
539 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000540 self.assertEqual(o.calls[0][0], handlers[1])
541 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000542
543 def test_raise(self):
544 # raising URLError stops processing of request
545 o = OpenerDirector()
546 meth_spec = [
547 [("http_open", "raise")],
548 [("http_open", "return self")],
549 ]
550 handlers = add_ordered_mock_handlers(o, meth_spec)
551
552 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000553 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000554 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000555
556## def test_error(self):
557## # XXX this doesn't actually seem to be used in standard library,
558## # but should really be tested anyway...
559
560 def test_http_error(self):
561 # XXX http_error_default
562 # http errors are a special case
563 o = OpenerDirector()
564 meth_spec = [
565 [("http_open", "error 302")],
566 [("http_error_400", "raise"), "http_open"],
567 [("http_error_302", "return response"), "http_error_303",
568 "http_error"],
569 [("http_error_302")],
570 ]
571 handlers = add_ordered_mock_handlers(o, meth_spec)
572
573 class Unknown:
574 def __eq__(self, other): return True
575
576 req = Request("http://example.com/")
577 r = o.open(req)
578 assert len(o.calls) == 2
579 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000580 (handlers[2], "http_error_302",
581 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000582 for expected, got in zip(calls, o.calls):
583 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000584 self.assertEqual((handler, method_name), got[:2])
585 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000586
587 def test_processors(self):
588 # *_request / *_response methods get called appropriately
589 o = OpenerDirector()
590 meth_spec = [
591 [("http_request", "return request"),
592 ("http_response", "return response")],
593 [("http_request", "return request"),
594 ("http_response", "return response")],
595 ]
596 handlers = add_ordered_mock_handlers(o, meth_spec)
597
598 req = Request("http://example.com/")
599 r = o.open(req)
600 # processor methods are called on *all* handlers that define them,
601 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000602 calls = [
603 (handlers[0], "http_request"), (handlers[1], "http_request"),
604 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000605
606 for i, (handler, name, args, kwds) in enumerate(o.calls):
607 if i < 2:
608 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000609 self.assertEqual((handler, name), calls[i])
610 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000611 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000612 else:
613 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000614 self.assertEqual((handler, name), calls[i])
615 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000616 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000617 # response from opener.open is None, because there's no
618 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000619 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000620 isinstance(args[1], MockResponse))
621
622
Tim Peters58eb11c2004-01-18 20:29:55 +0000623def sanepathname2url(path):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000624 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000625 path.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000626 except UnicodeEncodeError:
627 raise unittest.SkipTest("path is not encodable to utf8")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000628 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000629 if os.name == "nt" and urlpath.startswith("///"):
630 urlpath = urlpath[2:]
631 # XXX don't ask me about the mac...
632 return urlpath
633
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000634class HandlerTests(unittest.TestCase):
635
636 def test_ftp(self):
637 class MockFTPWrapper:
638 def __init__(self, data): self.data = data
639 def retrfile(self, filename, filetype):
640 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000641 return io.StringIO(self.data), len(self.data)
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200642 def close(self): pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000643
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000644 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000645 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000646 def connect_ftp(self, user, passwd, host, port, dirs,
647 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648 self.user, self.passwd = user, passwd
649 self.host, self.port = host, port
650 self.dirs = dirs
651 self.ftpwrapper = MockFTPWrapper(self.data)
652 return self.ftpwrapper
653
Georg Brandlf78e02b2008-06-10 17:40:04 +0000654 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000655 data = "rheum rhaponicum"
656 h = NullFTPHandler(data)
657 o = h.parent = MockOpener()
658
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000659 for url, host, port, user, passwd, type_, dirs, filename, mimetype in [
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000660 ("ftp://localhost/foo/bar/baz.html",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000661 "localhost", ftplib.FTP_PORT, "", "", "I",
662 ["foo", "bar"], "baz.html", "text/html"),
663 ("ftp://parrot@localhost/foo/bar/baz.html",
664 "localhost", ftplib.FTP_PORT, "parrot", "", "I",
665 ["foo", "bar"], "baz.html", "text/html"),
666 ("ftp://%25parrot@localhost/foo/bar/baz.html",
667 "localhost", ftplib.FTP_PORT, "%parrot", "", "I",
668 ["foo", "bar"], "baz.html", "text/html"),
669 ("ftp://%2542parrot@localhost/foo/bar/baz.html",
670 "localhost", ftplib.FTP_PORT, "%42parrot", "", "I",
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000671 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000672 ("ftp://localhost:80/foo/bar/",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000673 "localhost", 80, "", "", "D",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000674 ["foo", "bar"], "", None),
675 ("ftp://localhost/baz.gif;type=a",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000676 "localhost", ftplib.FTP_PORT, "", "", "A",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000677 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000678 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000679 req = Request(url)
680 req.timeout = None
681 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000682 # ftp authentication not yet implemented by FTPHandler
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000683 self.assertEqual(h.user, user)
684 self.assertEqual(h.passwd, passwd)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000685 self.assertEqual(h.host, socket.gethostbyname(host))
686 self.assertEqual(h.port, port)
687 self.assertEqual(h.dirs, dirs)
688 self.assertEqual(h.ftpwrapper.filename, filename)
689 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000690 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000691 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000692 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000693
694 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000695 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000696 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000697 o = h.parent = MockOpener()
698
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000699 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000700 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000701 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000702 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000703 "file://localhost%s" % urlpath,
704 "file://%s" % urlpath,
705 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000706 ]
707 try:
708 localaddr = socket.gethostbyname(socket.gethostname())
709 except socket.gaierror:
710 localaddr = ''
711 if localaddr:
712 urls.append("file://%s%s" % (localaddr, urlpath))
713
714 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000715 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000716 try:
717 try:
718 f.write(towrite)
719 finally:
720 f.close()
721
722 r = h.file_open(Request(url))
723 try:
724 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000725 headers = r.info()
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000726 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000727 finally:
728 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000729 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000730 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000731 finally:
732 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000733 self.assertEqual(data, towrite)
734 self.assertEqual(headers["Content-type"], "text/plain")
735 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000736 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000737 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000738
739 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000740 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741 "file:///file_does_not_exist.txt",
742 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
743 os.getcwd(), TESTFN),
744 "file://somerandomhost.ontheinternet.com%s/%s" %
745 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000746 ]:
747 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000748 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000749 try:
750 f.write(towrite)
751 finally:
752 f.close()
753
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000754 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000755 h.file_open, Request(url))
756 finally:
757 os.remove(TESTFN)
758
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000759 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000760 o = h.parent = MockOpener()
761 # XXXX why does // mean ftp (and /// mean not ftp!), and where
762 # is file: scheme specified? I think this is really a bug, and
763 # what was intended was to distinguish between URLs like:
764 # file:/blah.txt (a file)
765 # file://localhost/blah.txt (a file)
766 # file:///blah.txt (a file)
767 # file://ftp.example.com/blah.txt (an ftp URL)
768 for url, ftp in [
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000769 ("file://ftp.example.com//foo.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000770 ("file://ftp.example.com///foo.txt", False),
771# XXXX bug: fails with OSError, should be URLError
772 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000773 ("file://somehost//foo/something.txt", False),
Senthil Kumaran2ef16322010-07-11 03:12:43 +0000774 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000775 ]:
776 req = Request(url)
777 try:
778 h.file_open(req)
779 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000780 except (urllib.error.URLError, OSError):
Florent Xicluna419e3842010-08-08 16:16:07 +0000781 self.assertFalse(ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000782 else:
Florent Xicluna419e3842010-08-08 16:16:07 +0000783 self.assertIs(o.req, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000784 self.assertEqual(req.type, "ftp")
Łukasz Langad7e81cc2011-01-09 18:18:53 +0000785 self.assertEqual(req.type == "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000786
787 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000789 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000790 o = h.parent = MockOpener()
791
792 url = "http://example.com/"
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000793 for method, data in [("GET", None), ("POST", b"blah")]:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000794 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000795 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000796 req.add_unredirected_header("Spam", "eggs")
797 http = MockHTTPClass()
798 r = h.do_open(http, req)
799
800 # result attributes
801 r.read; r.readline # wrapped MockFile methods
802 r.info; r.geturl # addinfourl methods
803 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
804 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000805 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000806 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000807
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000808 self.assertEqual(http.host, "example.com")
809 self.assertEqual(http.level, 0)
810 self.assertEqual(http.method, method)
811 self.assertEqual(http.selector, "/")
812 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000813 [("Connection", "close"),
814 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000815 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000816
817 # check socket.error converted to URLError
818 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000819 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000820
Senthil Kumaran29333122011-02-11 11:25:47 +0000821 # Check for TypeError on POST data which is str.
822 req = Request("http://example.com/","badpost")
823 self.assertRaises(TypeError, h.do_request_, req)
824
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000825 # check adding of standard headers
826 o.addheaders = [("Spam", "eggs")]
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000827 for data in b"", None: # POST, GET
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000828 req = Request("http://example.com/", data)
829 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000830 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000831 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000832 self.assertNotIn("Content-length", req.unredirected_hdrs)
833 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000834 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000835 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
836 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000837 "application/x-www-form-urlencoded")
838 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000839 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
840 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000841
842 # don't clobber existing headers
843 req.add_unredirected_header("Content-length", "foo")
844 req.add_unredirected_header("Content-type", "bar")
845 req.add_unredirected_header("Host", "baz")
846 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000847 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000848 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
849 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000850 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
851 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000852
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000853 # Check iterable body support
854 def iterable_body():
855 yield b"one"
856 yield b"two"
857 yield b"three"
858
859 for headers in {}, {"Content-Length": 11}:
860 req = Request("http://example.com/", iterable_body(), headers)
861 if not headers:
862 # Having an iterable body without a Content-Length should
863 # raise an exception
864 self.assertRaises(ValueError, h.do_request_, req)
865 else:
866 newreq = h.do_request_(req)
867
Senthil Kumaran29333122011-02-11 11:25:47 +0000868 # A file object.
869 # Test only Content-Length attribute of request.
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000870
Senthil Kumaran29333122011-02-11 11:25:47 +0000871 file_obj = io.BytesIO()
872 file_obj.write(b"Something\nSomething\nSomething\n")
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000873
874 for headers in {}, {"Content-Length": 30}:
875 req = Request("http://example.com/", file_obj, headers)
876 if not headers:
877 # Having an iterable body without a Content-Length should
878 # raise an exception
879 self.assertRaises(ValueError, h.do_request_, req)
880 else:
881 newreq = h.do_request_(req)
882 self.assertEqual(int(newreq.get_header('Content-length')),30)
883
884 file_obj.close()
885
886 # array.array Iterable - Content Length is calculated
887
888 iterable_array = array.array("I",[1,2,3,4])
889
890 for headers in {}, {"Content-Length": 16}:
891 req = Request("http://example.com/", iterable_array, headers)
892 newreq = h.do_request_(req)
893 self.assertEqual(int(newreq.get_header('Content-length')),16)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000894
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000895 def test_http_doubleslash(self):
896 # Checks the presence of any unnecessary double slash in url does not
897 # break anything. Previously, a double slash directly after the host
Ezio Melottie130a522011-10-19 10:58:56 +0300898 # could cause incorrect parsing.
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000899 h = urllib.request.AbstractHTTPHandler()
900 o = h.parent = MockOpener()
901
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000902 data = b""
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000903 ds_urls = [
904 "http://example.com/foo/bar/baz.html",
905 "http://example.com//foo/bar/baz.html",
906 "http://example.com/foo//bar/baz.html",
907 "http://example.com/foo/bar//baz.html"
908 ]
909
910 for ds_url in ds_urls:
911 ds_req = Request(ds_url, data)
912
913 # Check whether host is determined correctly if there is no proxy
914 np_ds_req = h.do_request_(ds_req)
915 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
916
917 # Check whether host is determined correctly if there is a proxy
918 ds_req.set_proxy("someproxy:3128",None)
919 p_ds_req = h.do_request_(ds_req)
920 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
921
Senthil Kumaranc2958622010-11-22 04:48:26 +0000922 def test_fixpath_in_weirdurls(self):
923 # Issue4493: urllib2 to supply '/' when to urls where path does not
924 # start with'/'
925
926 h = urllib.request.AbstractHTTPHandler()
927 o = h.parent = MockOpener()
928
929 weird_url = 'http://www.python.org?getspam'
930 req = Request(weird_url)
931 newreq = h.do_request_(req)
932 self.assertEqual(newreq.host,'www.python.org')
933 self.assertEqual(newreq.selector,'/?getspam')
934
935 url_without_path = 'http://www.python.org'
936 req = Request(url_without_path)
937 newreq = h.do_request_(req)
938 self.assertEqual(newreq.host,'www.python.org')
939 self.assertEqual(newreq.selector,'')
940
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000941
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000942 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000943 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000944 o = h.parent = MockOpener()
945
946 url = "http://example.com/"
947 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000949 r = MockResponse(200, "OK", {}, "", url)
950 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000951 self.assertIs(r, newr)
952 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000953 r = MockResponse(202, "Accepted", {}, "", url)
954 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000955 self.assertIs(r, newr)
956 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957 r = MockResponse(206, "Partial content", {}, "", url)
958 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000959 self.assertIs(r, newr)
960 self.assertFalse(hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000961 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962 r = MockResponse(502, "Bad gateway", {}, "", url)
Florent Xicluna419e3842010-08-08 16:16:07 +0000963 self.assertIsNone(h.http_response(req, r))
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000964 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000966
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000967 def test_cookies(self):
968 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000969 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000970 o = h.parent = MockOpener()
971
972 req = Request("http://example.com/")
973 r = MockResponse(200, "OK", {}, "")
974 newreq = h.http_request(req)
Florent Xicluna419e3842010-08-08 16:16:07 +0000975 self.assertIs(cj.ach_req, req)
976 self.assertIs(cj.ach_req, newreq)
977 self.assertEqual(req.get_origin_req_host(), "example.com")
978 self.assertFalse(req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000979 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000980 self.assertIs(cj.ec_req, req)
981 self.assertIs(cj.ec_r, r)
982 self.assertIs(r, newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000983
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000984 def test_redirect(self):
985 from_url = "http://example.com/a.html"
986 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000988 o = h.parent = MockOpener()
989
990 # ordinary redirect behaviour
991 for code in 301, 302, 303, 307:
992 for data in None, "blah\nblah\n":
993 method = getattr(h, "http_error_%s" % code)
994 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000995 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000996 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000997 if data is not None:
998 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000999 req.add_unredirected_header("Spam", "spam")
1000 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001001 method(req, MockFile(), code, "Blah",
1002 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001003 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001004 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001005 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001006 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001007 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001008 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001009 except AttributeError:
Florent Xicluna419e3842010-08-08 16:16:07 +00001010 self.assertFalse(o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +00001011
1012 # now it's a GET, there should not be headers regarding content
1013 # (possibly dragged from before being a POST)
1014 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +00001015 self.assertNotIn("content-length", headers)
1016 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +00001017
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001018 self.assertEqual(o.req.headers["Nonsense"],
1019 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +00001020 self.assertNotIn("Spam", o.req.headers)
1021 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001022
1023 # loop detection
1024 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001025 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001026 def redirect(h, req, url=to_url):
1027 h.http_error_302(req, MockFile(), 302, "Blah",
1028 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001029 # Note that the *original* request shares the same record of
1030 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001031
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001032 # detect infinite loop redirect of a URL to itself
1033 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001034 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001035 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001036 try:
1037 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001038 redirect(h, req, "http://example.com/")
1039 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001040 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001041 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001042 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001043
1044 # detect endless non-repeating chain of redirects
1045 req = Request(from_url, origin_req_host="example.com")
1046 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001047 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001048 try:
1049 while 1:
1050 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001051 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001052 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001053 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001054 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001055
guido@google.coma119df92011-03-29 11:41:02 -07001056
1057 def test_invalid_redirect(self):
1058 from_url = "http://example.com/a.html"
1059 valid_schemes = ['http','https','ftp']
1060 invalid_schemes = ['file','imap','ldap']
1061 schemeless_url = "example.com/b.html"
1062 h = urllib.request.HTTPRedirectHandler()
1063 o = h.parent = MockOpener()
1064 req = Request(from_url)
1065 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1066
1067 for scheme in invalid_schemes:
1068 invalid_url = scheme + '://' + schemeless_url
1069 self.assertRaises(urllib.error.HTTPError, h.http_error_302,
1070 req, MockFile(), 302, "Security Loophole",
1071 MockHeaders({"location": invalid_url}))
1072
1073 for scheme in valid_schemes:
1074 valid_url = scheme + '://' + schemeless_url
1075 h.http_error_302(req, MockFile(), 302, "That's fine",
1076 MockHeaders({"location": valid_url}))
1077 self.assertEqual(o.req.get_full_url(), valid_url)
1078
Senthil Kumaran6497aa32012-01-04 13:46:59 +08001079 def test_relative_redirect(self):
1080 from_url = "http://example.com/a.html"
1081 relative_url = "/b.html"
1082 h = urllib.request.HTTPRedirectHandler()
1083 o = h.parent = MockOpener()
1084 req = Request(from_url)
1085 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1086
1087 valid_url = urllib.parse.urljoin(from_url,relative_url)
1088 h.http_error_302(req, MockFile(), 302, "That's fine",
1089 MockHeaders({"location": valid_url}))
1090 self.assertEqual(o.req.get_full_url(), valid_url)
1091
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001092 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001093 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +00001094 from http.cookiejar import CookieJar
1095 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001096
1097 cj = CookieJar()
1098 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001100 hdeh = urllib.request.HTTPDefaultErrorHandler()
1101 hrh = urllib.request.HTTPRedirectHandler()
1102 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001103 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001104 o.open("http://www.example.com/")
Florent Xicluna419e3842010-08-08 16:16:07 +00001105 self.assertFalse(hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001106
Senthil Kumaran26430412011-04-13 07:01:19 +08001107 def test_redirect_fragment(self):
1108 redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
1109 hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
1110 hdeh = urllib.request.HTTPDefaultErrorHandler()
1111 hrh = urllib.request.HTTPRedirectHandler()
1112 o = build_test_opener(hh, hdeh, hrh)
1113 fp = o.open('http://www.example.com')
1114 self.assertEqual(fp.geturl(), redirected_url.strip())
1115
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001116 def test_proxy(self):
1117 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001118 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001119 o.add_handler(ph)
1120 meth_spec = [
1121 [("http_open", "return response")]
1122 ]
1123 handlers = add_ordered_mock_handlers(o, meth_spec)
1124
1125 req = Request("http://acme.example.com/")
1126 self.assertEqual(req.get_host(), "acme.example.com")
1127 r = o.open(req)
1128 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1129
1130 self.assertEqual([(handlers[0], "http_open")],
1131 [tup[0:2] for tup in o.calls])
1132
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001133 def test_proxy_no_proxy(self):
1134 os.environ['no_proxy'] = 'python.org'
1135 o = OpenerDirector()
1136 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1137 o.add_handler(ph)
1138 req = Request("http://www.perl.org/")
1139 self.assertEqual(req.get_host(), "www.perl.org")
1140 r = o.open(req)
1141 self.assertEqual(req.get_host(), "proxy.example.com")
1142 req = Request("http://www.python.org")
1143 self.assertEqual(req.get_host(), "www.python.org")
1144 r = o.open(req)
1145 self.assertEqual(req.get_host(), "www.python.org")
1146 del os.environ['no_proxy']
1147
Ronald Oussorene72e1612011-03-14 18:15:25 -04001148 def test_proxy_no_proxy_all(self):
1149 os.environ['no_proxy'] = '*'
1150 o = OpenerDirector()
1151 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1152 o.add_handler(ph)
1153 req = Request("http://www.python.org")
1154 self.assertEqual(req.get_host(), "www.python.org")
1155 r = o.open(req)
1156 self.assertEqual(req.get_host(), "www.python.org")
1157 del os.environ['no_proxy']
1158
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001159
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001160 def test_proxy_https(self):
1161 o = OpenerDirector()
1162 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
1163 o.add_handler(ph)
1164 meth_spec = [
1165 [("https_open", "return response")]
1166 ]
1167 handlers = add_ordered_mock_handlers(o, meth_spec)
1168
1169 req = Request("https://www.example.com/")
1170 self.assertEqual(req.get_host(), "www.example.com")
1171 r = o.open(req)
1172 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1173 self.assertEqual([(handlers[0], "https_open")],
1174 [tup[0:2] for tup in o.calls])
1175
Senthil Kumaran47fff872009-12-20 07:10:31 +00001176 def test_proxy_https_proxy_authorization(self):
1177 o = OpenerDirector()
1178 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1179 o.add_handler(ph)
1180 https_handler = MockHTTPSHandler()
1181 o.add_handler(https_handler)
1182 req = Request("https://www.example.com/")
1183 req.add_header("Proxy-Authorization","FooBar")
1184 req.add_header("User-Agent","Grail")
1185 self.assertEqual(req.get_host(), "www.example.com")
1186 self.assertIsNone(req._tunnel_host)
1187 r = o.open(req)
1188 # Verify Proxy-Authorization gets tunneled to request.
1189 # httpsconn req_headers do not have the Proxy-Authorization header but
1190 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001191 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001192 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001193 self.assertIn(("User-Agent","Grail"),
1194 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001195 self.assertIsNotNone(req._tunnel_host)
1196 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1197 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001198
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001199 # TODO: This should be only for OSX
1200 @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001201 def test_osx_proxy_bypass(self):
1202 bypass = {
1203 'exclude_simple': False,
1204 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10',
1205 '10.0/16']
1206 }
1207 # Check hosts that should trigger the proxy bypass
1208 for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1',
1209 '10.0.0.1'):
1210 self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass),
1211 'expected bypass of %s to be True' % host)
1212 # Check hosts that should not trigger the proxy bypass
1213 for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', 'test'):
1214 self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass),
1215 'expected bypass of %s to be False' % host)
1216
1217 # Check the exclude_simple flag
1218 bypass = {'exclude_simple': True, 'exceptions': []}
1219 self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass))
1220
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001221 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001222 opener = OpenerDirector()
1223 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001224 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001225 realm = "ACME Widget Store"
1226 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001227 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1228 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001229 opener.add_handler(auth_handler)
1230 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001231 self._test_basic_auth(opener, auth_handler, "Authorization",
1232 realm, http_handler, password_manager,
1233 "http://acme.example.com/protected",
1234 "http://acme.example.com/protected",
1235 )
1236
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001237 def test_basic_auth_with_single_quoted_realm(self):
1238 self.test_basic_auth(quote_char="'")
1239
Thomas Wouters477c8d52006-05-27 19:21:47 +00001240 def test_proxy_basic_auth(self):
1241 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001242 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001243 opener.add_handler(ph)
1244 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001245 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001246 realm = "ACME Networks"
1247 http_handler = MockHTTPHandler(
1248 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001249 opener.add_handler(auth_handler)
1250 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001251 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001252 realm, http_handler, password_manager,
1253 "http://acme.example.com:3128/protected",
1254 "proxy.example.com:3128",
1255 )
1256
1257 def test_basic_and_digest_auth_handlers(self):
1258 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1259 # response (http://python.org/sf/1479302), where it should instead
1260 # return None to allow another handler (especially
1261 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001262
1263 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1264 # try digest first (since it's the strongest auth scheme), so we record
1265 # order of calls here to check digest comes first:
1266 class RecordingOpenerDirector(OpenerDirector):
1267 def __init__(self):
1268 OpenerDirector.__init__(self)
1269 self.recorded = []
1270 def record(self, info):
1271 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001272 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001273 def http_error_401(self, *args, **kwds):
1274 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001275 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001276 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001277 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001278 def http_error_401(self, *args, **kwds):
1279 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001280 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001281 *args, **kwds)
1282
1283 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001284 password_manager = MockPasswordManager()
1285 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001286 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001287 realm = "ACME Networks"
1288 http_handler = MockHTTPHandler(
1289 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001290 opener.add_handler(basic_handler)
1291 opener.add_handler(digest_handler)
1292 opener.add_handler(http_handler)
1293
1294 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001295 self._test_basic_auth(opener, basic_handler, "Authorization",
1296 realm, http_handler, password_manager,
1297 "http://acme.example.com/protected",
1298 "http://acme.example.com/protected",
1299 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001300 # check digest was tried before basic (twice, because
1301 # _test_basic_auth called .open() twice)
1302 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001303
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001304 def test_unsupported_auth_digest_handler(self):
1305 opener = OpenerDirector()
1306 # While using DigestAuthHandler
1307 digest_auth_handler = urllib.request.HTTPDigestAuthHandler(None)
1308 http_handler = MockHTTPHandler(
1309 401, 'WWW-Authenticate: Kerberos\r\n\r\n')
1310 opener.add_handler(digest_auth_handler)
1311 opener.add_handler(http_handler)
1312 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1313
1314 def test_unsupported_auth_basic_handler(self):
1315 # While using BasicAuthHandler
1316 opener = OpenerDirector()
1317 basic_auth_handler = urllib.request.HTTPBasicAuthHandler(None)
1318 http_handler = MockHTTPHandler(
1319 401, 'WWW-Authenticate: NTLM\r\n\r\n')
1320 opener.add_handler(basic_auth_handler)
1321 opener.add_handler(http_handler)
1322 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1323
Thomas Wouters477c8d52006-05-27 19:21:47 +00001324 def _test_basic_auth(self, opener, auth_handler, auth_header,
1325 realm, http_handler, password_manager,
1326 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001327 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001328 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001329
1330 # .add_password() fed through to password manager
1331 auth_handler.add_password(realm, request_url, user, password)
1332 self.assertEqual(realm, password_manager.realm)
1333 self.assertEqual(request_url, password_manager.url)
1334 self.assertEqual(user, password_manager.user)
1335 self.assertEqual(password, password_manager.password)
1336
1337 r = opener.open(request_url)
1338
1339 # should have asked the password manager for the username/password
1340 self.assertEqual(password_manager.target_realm, realm)
1341 self.assertEqual(password_manager.target_url, protected_url)
1342
1343 # expect one request without authorization, then one with
1344 self.assertEqual(len(http_handler.requests), 2)
1345 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001346 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001347 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001348 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001349 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1350 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001351 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1352 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001353 # if the password manager can't find a password, the handler won't
1354 # handle the HTTP auth error
1355 password_manager.user = password_manager.password = None
1356 http_handler.reset()
1357 r = opener.open(request_url)
1358 self.assertEqual(len(http_handler.requests), 1)
1359 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1360
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001361
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001362class MiscTests(unittest.TestCase):
1363
1364 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001365 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1366 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001367 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001368 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001369 def bar_open(self): pass
1370
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001371 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001372
1373 o = build_opener(FooHandler, BarHandler)
1374 self.opener_has_handler(o, FooHandler)
1375 self.opener_has_handler(o, BarHandler)
1376
1377 # can take a mix of classes and instances
1378 o = build_opener(FooHandler, BarHandler())
1379 self.opener_has_handler(o, FooHandler)
1380 self.opener_has_handler(o, BarHandler)
1381
1382 # subclasses of default handlers override default handlers
1383 o = build_opener(MyHTTPHandler)
1384 self.opener_has_handler(o, MyHTTPHandler)
1385
1386 # a particular case of overriding: default handlers can be passed
1387 # in explicitly
1388 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001389 self.opener_has_handler(o, urllib.request.HTTPHandler)
1390 o = build_opener(urllib.request.HTTPHandler)
1391 self.opener_has_handler(o, urllib.request.HTTPHandler)
1392 o = build_opener(urllib.request.HTTPHandler())
1393 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001394
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001395 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001396 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001397 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1398 self.opener_has_handler(o, MyHTTPHandler)
1399 self.opener_has_handler(o, MyOtherHTTPHandler)
1400
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001401 def opener_has_handler(self, opener, handler_class):
Florent Xicluna419e3842010-08-08 16:16:07 +00001402 self.assertTrue(any(h.__class__ == handler_class
1403 for h in opener.handlers))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001404
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001405class RequestTests(unittest.TestCase):
1406
1407 def setUp(self):
1408 self.get = Request("http://www.python.org/~jeremy/")
1409 self.post = Request("http://www.python.org/~jeremy/",
1410 "data",
1411 headers={"X-Test": "test"})
1412
1413 def test_method(self):
1414 self.assertEqual("POST", self.post.get_method())
1415 self.assertEqual("GET", self.get.get_method())
1416
1417 def test_add_data(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001418 self.assertFalse(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001419 self.assertEqual("GET", self.get.get_method())
1420 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001421 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001422 self.assertEqual("POST", self.get.get_method())
1423
1424 def test_get_full_url(self):
1425 self.assertEqual("http://www.python.org/~jeremy/",
1426 self.get.get_full_url())
1427
1428 def test_selector(self):
1429 self.assertEqual("/~jeremy/", self.get.get_selector())
1430 req = Request("http://www.python.org/")
1431 self.assertEqual("/", req.get_selector())
1432
1433 def test_get_type(self):
1434 self.assertEqual("http", self.get.get_type())
1435
1436 def test_get_host(self):
1437 self.assertEqual("www.python.org", self.get.get_host())
1438
1439 def test_get_host_unquote(self):
1440 req = Request("http://www.%70ython.org/")
1441 self.assertEqual("www.python.org", req.get_host())
1442
1443 def test_proxy(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001444 self.assertFalse(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001445 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001446 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001447 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1448 self.assertEqual("www.perl.org", self.get.get_host())
1449
Senthil Kumarand95cc752010-08-08 11:27:53 +00001450 def test_wrapped_url(self):
1451 req = Request("<URL:http://www.python.org>")
1452 self.assertEqual("www.python.org", req.get_host())
1453
Senthil Kumaran26430412011-04-13 07:01:19 +08001454 def test_url_fragment(self):
Senthil Kumarand95cc752010-08-08 11:27:53 +00001455 req = Request("http://www.python.org/?qs=query#fragment=true")
1456 self.assertEqual("/?qs=query", req.get_selector())
1457 req = Request("http://www.python.org/#fun=true")
1458 self.assertEqual("/", req.get_selector())
1459
Senthil Kumaran26430412011-04-13 07:01:19 +08001460 # Issue 11703: geturl() omits fragment in the original URL.
1461 url = 'http://docs.python.org/library/urllib2.html#OK'
1462 req = Request(url)
1463 self.assertEqual(req.get_full_url(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001464
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001465def test_HTTPError_interface():
1466 """
1467 Issue 13211 reveals that HTTPError didn't implement the URLError
1468 interface even though HTTPError is a subclass of URLError.
1469
Jason R. Coombs7ff21d72011-12-03 23:18:11 -05001470 >>> msg = 'something bad happened'
1471 >>> url = code = hdrs = fp = None
Jason R. Coombs9c3895f2011-12-04 08:14:18 -05001472 >>> err = urllib.error.HTTPError(url, code, msg, hdrs, fp)
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001473 >>> assert hasattr(err, 'reason')
1474 >>> err.reason
1475 'something bad happened'
1476 """
1477
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001478def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001479 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001480 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001481 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001482 tests = (TrivialTests,
1483 OpenerDirectorTests,
1484 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001485 MiscTests,
1486 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001487 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001488
1489if __name__ == "__main__":
1490 test_main(verbose=True)