blob: 00ee66987422af240d91e0a93db7c65cb261f123 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00007import array
Senthil Kumaran4de00a22011-05-11 21:17:57 +08008import sys
Jeremy Hyltone3e61042001-05-09 15:50:25 +00009
Jeremy Hylton1afc1692008-06-18 20:49:58 +000010import urllib.request
Ronald Oussorene72e1612011-03-14 18:15:25 -040011# The proxy bypass method imported below has logic specific to the OSX
12# proxy config data structure but is testable on all platforms.
13from urllib.request import Request, OpenerDirector, _proxy_bypass_macosx_sysconf
guido@google.coma119df92011-03-29 11:41:02 -070014import urllib.error
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016# XXX
17# Request
18# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000019# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000020
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000021class TrivialTests(unittest.TestCase):
Senthil Kumaran6c5bd402011-11-01 23:20:31 +080022
23 def test___all__(self):
24 # Verify which names are exposed
25 for module in 'request', 'response', 'parse', 'error', 'robotparser':
26 context = {}
27 exec('from urllib.%s import *' % module, context)
28 del context['__builtins__']
Florent Xicluna3dbb1f12011-11-04 22:15:37 +010029 if module == 'request' and os.name == 'nt':
30 u, p = context.pop('url2pathname'), context.pop('pathname2url')
31 self.assertEqual(u.__module__, 'nturl2path')
32 self.assertEqual(p.__module__, 'nturl2path')
Senthil Kumaran6c5bd402011-11-01 23:20:31 +080033 for k, v in context.items():
34 self.assertEqual(v.__module__, 'urllib.%s' % module,
35 "%r is exposed in 'urllib.%s' but defined in %r" %
36 (k, module, v.__module__))
37
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000038 def test_trivial(self):
39 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000040
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000042
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000043 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000045
Senthil Kumarand587e302010-01-10 17:45:52 +000046 if os.name == 'nt':
47 file_url = "file:///%s" % fname
48 else:
49 file_url = "file://%s" % fname
50
Jeremy Hylton1afc1692008-06-18 20:49:58 +000051 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
53 buf = f.read()
54 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000055
Georg Brandle1b13d22005-08-24 22:20:32 +000056 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 tests = [
58 ('a,b,c', ['a', 'b', 'c']),
59 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
60 ('a, b, "c", "d", "e,f", g, h',
61 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
62 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
63 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000064 for string, list in tests:
Florent Xicluna419e3842010-08-08 16:16:07 +000065 self.assertEqual(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000066
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000067
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000068def test_request_headers_dict():
69 """
70 The Request.headers dictionary is not a documented interface. It should
71 stay that way, because the complete set of headers are only accessible
72 through the .get_header(), .has_header(), .header_items() interface.
73 However, .headers pre-dates those methods, and so real code will be using
74 the dictionary.
75
76 The introduction in 2.4 of those methods was a mistake for the same reason:
77 code that previously saw all (urllib2 user)-provided headers in .headers
78 now sees only a subset (and the function interface is ugly and incomplete).
79 A better change would have been to replace .headers dict with a dict
80 subclass (or UserDict.DictMixin instance?) that preserved the .headers
81 interface and also provided access to the "unredirected" headers. It's
82 probably too late to fix that, though.
83
84
85 Check .capitalize() case normalization:
86
87 >>> url = "http://example.com"
88 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
89 'blah'
90 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
91 'blah'
92
93 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
94 but that could be changed in future.
95
96 """
97
98def test_request_headers_methods():
99 """
100 Note the case normalization of header names here, to .capitalize()-case.
101 This should be preserved for backwards-compatibility. (In the HTTP case,
102 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +0000103 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104
105 >>> url = "http://example.com"
106 >>> r = Request(url, headers={"Spam-eggs": "blah"})
107 >>> r.has_header("Spam-eggs")
108 True
109 >>> r.header_items()
110 [('Spam-eggs', 'blah')]
111 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000112 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000113 >>> items
114 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
115
116 Note that e.g. r.has_header("spam-EggS") is currently False, and
117 r.get_header("spam-EggS") returns None, but that could be changed in
118 future.
119
120 >>> r.has_header("Not-there")
121 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000122 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000123 None
124 >>> r.get_header("Not-there", "default")
125 'default'
126
127 """
128
129
Thomas Wouters477c8d52006-05-27 19:21:47 +0000130def test_password_manager(self):
131 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000132 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000133 >>> add = mgr.add_password
134 >>> add("Some Realm", "http://example.com/", "joe", "password")
135 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
136 >>> add("c", "http://example.com/foo", "foo", "ni")
137 >>> add("c", "http://example.com/bar", "bar", "nini")
138 >>> add("b", "http://example.com/", "first", "blah")
139 >>> add("b", "http://example.com/", "second", "spam")
140 >>> add("a", "http://example.com", "1", "a")
141 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
142 >>> add("Some Realm", "d.example.com", "4", "d")
143 >>> add("Some Realm", "e.example.com:3128", "5", "e")
144
145 >>> mgr.find_user_password("Some Realm", "example.com")
146 ('joe', 'password')
147 >>> mgr.find_user_password("Some Realm", "http://example.com")
148 ('joe', 'password')
149 >>> mgr.find_user_password("Some Realm", "http://example.com/")
150 ('joe', 'password')
151 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
152 ('joe', 'password')
153 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
154 ('joe', 'password')
155 >>> mgr.find_user_password("c", "http://example.com/foo")
156 ('foo', 'ni')
157 >>> mgr.find_user_password("c", "http://example.com/bar")
158 ('bar', 'nini')
159
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000160 Actually, this is really undefined ATM
161## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000162
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000163## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
164## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000165
166 Use latest add_password() in case of conflict:
167
168 >>> mgr.find_user_password("b", "http://example.com/")
169 ('second', 'spam')
170
171 No special relationship between a.example.com and example.com:
172
173 >>> mgr.find_user_password("a", "http://example.com/")
174 ('1', 'a')
175 >>> mgr.find_user_password("a", "http://a.example.com/")
176 (None, None)
177
178 Ports:
179
180 >>> mgr.find_user_password("Some Realm", "c.example.com")
181 (None, None)
182 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
183 ('3', 'c')
184 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
185 ('3', 'c')
186 >>> mgr.find_user_password("Some Realm", "d.example.com")
187 ('4', 'd')
188 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
189 ('5', 'e')
190
191 """
192 pass
193
194
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000195def test_password_manager_default_port(self):
196 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000197 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000198 >>> add = mgr.add_password
199
200 The point to note here is that we can't guess the default port if there's
201 no scheme. This applies to both add_password and find_user_password.
202
203 >>> add("f", "http://g.example.com:80", "10", "j")
204 >>> add("g", "http://h.example.com", "11", "k")
205 >>> add("h", "i.example.com:80", "12", "l")
206 >>> add("i", "j.example.com", "13", "m")
207 >>> mgr.find_user_password("f", "g.example.com:100")
208 (None, None)
209 >>> mgr.find_user_password("f", "g.example.com:80")
210 ('10', 'j')
211 >>> mgr.find_user_password("f", "g.example.com")
212 (None, None)
213 >>> mgr.find_user_password("f", "http://g.example.com:100")
214 (None, None)
215 >>> mgr.find_user_password("f", "http://g.example.com:80")
216 ('10', 'j')
217 >>> mgr.find_user_password("f", "http://g.example.com")
218 ('10', 'j')
219 >>> mgr.find_user_password("g", "h.example.com")
220 ('11', 'k')
221 >>> mgr.find_user_password("g", "h.example.com:80")
222 ('11', 'k')
223 >>> mgr.find_user_password("g", "http://h.example.com:80")
224 ('11', 'k')
225 >>> mgr.find_user_password("h", "i.example.com")
226 (None, None)
227 >>> mgr.find_user_password("h", "i.example.com:80")
228 ('12', 'l')
229 >>> mgr.find_user_password("h", "http://i.example.com:80")
230 ('12', 'l')
231 >>> mgr.find_user_password("i", "j.example.com")
232 ('13', 'm')
233 >>> mgr.find_user_password("i", "j.example.com:80")
234 (None, None)
235 >>> mgr.find_user_password("i", "http://j.example.com")
236 ('13', 'm')
237 >>> mgr.find_user_password("i", "http://j.example.com:80")
238 (None, None)
239
240 """
241
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242class MockOpener:
243 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000244 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
245 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000246 def error(self, proto, *args):
247 self.proto, self.args = proto, args
248
249class MockFile:
250 def read(self, count=None): pass
251 def readline(self, count=None): pass
252 def close(self): pass
253
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000254class MockHeaders(dict):
255 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000256 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000257
Guido van Rossum34d19282007-08-09 01:03:29 +0000258class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000260 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000261 self.code, self.msg, self.headers, self.url = code, msg, headers, url
262 def info(self):
263 return self.headers
264 def geturl(self):
265 return self.url
266
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000267class MockCookieJar:
268 def add_cookie_header(self, request):
269 self.ach_req = request
270 def extract_cookies(self, response, request):
271 self.ec_req, self.ec_r = request, response
272
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000273class FakeMethod:
274 def __init__(self, meth_name, action, handle):
275 self.meth_name = meth_name
276 self.handle = handle
277 self.action = action
278 def __call__(self, *args):
279 return self.handle(self.meth_name, self.action, *args)
280
Senthil Kumaran47fff872009-12-20 07:10:31 +0000281class MockHTTPResponse(io.IOBase):
282 def __init__(self, fp, msg, status, reason):
283 self.fp = fp
284 self.msg = msg
285 self.status = status
286 self.reason = reason
287 self.code = 200
288
289 def read(self):
290 return ''
291
292 def info(self):
293 return {}
294
295 def geturl(self):
296 return self.url
297
298
299class MockHTTPClass:
300 def __init__(self):
301 self.level = 0
302 self.req_headers = []
303 self.data = None
304 self.raise_on_endheaders = False
Nadeem Vawdabd26b542012-10-21 17:37:43 +0200305 self.sock = None
Senthil Kumaran47fff872009-12-20 07:10:31 +0000306 self._tunnel_headers = {}
307
308 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
309 self.host = host
310 self.timeout = timeout
311 return self
312
313 def set_debuglevel(self, level):
314 self.level = level
315
316 def set_tunnel(self, host, port=None, headers=None):
317 self._tunnel_host = host
318 self._tunnel_port = port
319 if headers:
320 self._tunnel_headers = headers
321 else:
322 self._tunnel_headers.clear()
323
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000324 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000325 self.method = method
326 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000327 if headers is not None:
328 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000329 self.req_headers.sort()
330 if body:
331 self.data = body
332 if self.raise_on_endheaders:
333 import socket
334 raise socket.error()
335 def getresponse(self):
336 return MockHTTPResponse(MockFile(), {}, 200, "OK")
337
Victor Stinnera4c45d72011-06-17 14:01:18 +0200338 def close(self):
339 pass
340
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000341class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000342 # useful for testing handler machinery
343 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000344 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000345 def __init__(self, methods):
346 self._define_methods(methods)
347 def _define_methods(self, methods):
348 for spec in methods:
349 if len(spec) == 2: name, action = spec
350 else: name, action = spec, None
351 meth = FakeMethod(name, action, self.handle)
352 setattr(self.__class__, name, meth)
353 def handle(self, fn_name, action, *args, **kwds):
354 self.parent.calls.append((self, fn_name, args, kwds))
355 if action is None:
356 return None
357 elif action == "return self":
358 return self
359 elif action == "return response":
360 res = MockResponse(200, "OK", {}, "")
361 return res
362 elif action == "return request":
363 return Request("http://blah/")
364 elif action.startswith("error"):
365 code = action[action.rfind(" ")+1:]
366 try:
367 code = int(code)
368 except ValueError:
369 pass
370 res = MockResponse(200, "OK", {}, "")
371 return self.parent.error("http", args[0], res, code, "", {})
372 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000373 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000374 assert False
375 def close(self): pass
376 def add_parent(self, parent):
377 self.parent = parent
378 self.parent.calls = []
379 def __lt__(self, other):
380 if not hasattr(other, "handler_order"):
381 # No handler_order, leave in original order. Yuck.
382 return True
383 return self.handler_order < other.handler_order
384
385def add_ordered_mock_handlers(opener, meth_spec):
386 """Create MockHandlers and add them to an OpenerDirector.
387
388 meth_spec: list of lists of tuples and strings defining methods to define
389 on handlers. eg:
390
391 [["http_error", "ftp_open"], ["http_open"]]
392
393 defines methods .http_error() and .ftp_open() on one handler, and
394 .http_open() on another. These methods just record their arguments and
395 return None. Using a tuple instead of a string causes the method to
396 perform some action (see MockHandler.handle()), eg:
397
398 [["http_error"], [("http_open", "return request")]]
399
400 defines .http_error() on one handler (which simply returns None), and
401 .http_open() on another handler, which returns a Request object.
402
403 """
404 handlers = []
405 count = 0
406 for meths in meth_spec:
407 class MockHandlerSubclass(MockHandler): pass
408 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000409 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000410 h.add_parent(opener)
411 count = count + 1
412 handlers.append(h)
413 opener.add_handler(h)
414 return handlers
415
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416def build_test_opener(*handler_instances):
417 opener = OpenerDirector()
418 for h in handler_instances:
419 opener.add_handler(h)
420 return opener
421
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000422class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 # useful for testing redirections and auth
424 # sends supplied headers and code as first response
425 # sends 200 OK as second response
426 def __init__(self, code, headers):
427 self.code = code
428 self.headers = headers
429 self.reset()
430 def reset(self):
431 self._count = 0
432 self.requests = []
433 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000434 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000435 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000436 self.requests.append(copy.deepcopy(req))
437 if self._count == 0:
438 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000439 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000440 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000441 return self.parent.error(
442 "http", req, MockFile(), self.code, name, msg)
443 else:
444 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000445 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000446 return MockResponse(200, "OK", msg, "", req.get_full_url())
447
Senthil Kumaran47fff872009-12-20 07:10:31 +0000448class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
449 # Useful for testing the Proxy-Authorization request by verifying the
450 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000451
452 def __init__(self):
453 urllib.request.AbstractHTTPHandler.__init__(self)
454 self.httpconn = MockHTTPClass()
455
Senthil Kumaran47fff872009-12-20 07:10:31 +0000456 def https_open(self, req):
457 return self.do_open(self.httpconn, req)
458
Thomas Wouters477c8d52006-05-27 19:21:47 +0000459class MockPasswordManager:
460 def add_password(self, realm, uri, user, password):
461 self.realm = realm
462 self.url = uri
463 self.user = user
464 self.password = password
465 def find_user_password(self, realm, authuri):
466 self.target_realm = realm
467 self.target_url = authuri
468 return self.user, self.password
469
470
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000471class OpenerDirectorTests(unittest.TestCase):
472
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000473 def test_add_non_handler(self):
474 class NonHandler(object):
475 pass
476 self.assertRaises(TypeError,
477 OpenerDirector().add_handler, NonHandler())
478
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000479 def test_badly_named_methods(self):
480 # test work-around for three methods that accidentally follow the
481 # naming conventions for handler methods
482 # (*_open() / *_request() / *_response())
483
484 # These used to call the accidentally-named methods, causing a
485 # TypeError in real code; here, returning self from these mock
486 # methods would either cause no exception, or AttributeError.
487
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000489
490 o = OpenerDirector()
491 meth_spec = [
492 [("do_open", "return self"), ("proxy_open", "return self")],
493 [("redirect_request", "return self")],
494 ]
495 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000496 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000497 for scheme in "do", "proxy", "redirect":
498 self.assertRaises(URLError, o.open, scheme+"://example.com/")
499
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000500 def test_handled(self):
501 # handler returning non-None means no more handlers will be called
502 o = OpenerDirector()
503 meth_spec = [
504 ["http_open", "ftp_open", "http_error_302"],
505 ["ftp_open"],
506 [("http_open", "return self")],
507 [("http_open", "return self")],
508 ]
509 handlers = add_ordered_mock_handlers(o, meth_spec)
510
511 req = Request("http://example.com/")
512 r = o.open(req)
513 # Second .http_open() gets called, third doesn't, since second returned
514 # non-None. Handlers without .http_open() never get any methods called
515 # on them.
516 # In fact, second mock handler defining .http_open() returns self
517 # (instead of response), which becomes the OpenerDirector's return
518 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000519 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000520 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
521 for expected, got in zip(calls, o.calls):
522 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000523 self.assertEqual((handler, name), expected)
524 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000525
526 def test_handler_order(self):
527 o = OpenerDirector()
528 handlers = []
529 for meths, handler_order in [
530 ([("http_open", "return self")], 500),
531 (["http_open"], 0),
532 ]:
533 class MockHandlerSubclass(MockHandler): pass
534 h = MockHandlerSubclass(meths)
535 h.handler_order = handler_order
536 handlers.append(h)
537 o.add_handler(h)
538
539 r = o.open("http://example.com/")
540 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000541 self.assertEqual(o.calls[0][0], handlers[1])
542 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000543
544 def test_raise(self):
545 # raising URLError stops processing of request
546 o = OpenerDirector()
547 meth_spec = [
548 [("http_open", "raise")],
549 [("http_open", "return self")],
550 ]
551 handlers = add_ordered_mock_handlers(o, meth_spec)
552
553 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000554 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000555 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000556
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000557 def test_http_error(self):
558 # XXX http_error_default
559 # http errors are a special case
560 o = OpenerDirector()
561 meth_spec = [
562 [("http_open", "error 302")],
563 [("http_error_400", "raise"), "http_open"],
564 [("http_error_302", "return response"), "http_error_303",
565 "http_error"],
566 [("http_error_302")],
567 ]
568 handlers = add_ordered_mock_handlers(o, meth_spec)
569
570 class Unknown:
571 def __eq__(self, other): return True
572
573 req = Request("http://example.com/")
574 r = o.open(req)
575 assert len(o.calls) == 2
576 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000577 (handlers[2], "http_error_302",
578 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000579 for expected, got in zip(calls, o.calls):
580 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual((handler, method_name), got[:2])
582 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000583
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700584
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000585 def test_processors(self):
586 # *_request / *_response methods get called appropriately
587 o = OpenerDirector()
588 meth_spec = [
589 [("http_request", "return request"),
590 ("http_response", "return response")],
591 [("http_request", "return request"),
592 ("http_response", "return response")],
593 ]
594 handlers = add_ordered_mock_handlers(o, meth_spec)
595
596 req = Request("http://example.com/")
597 r = o.open(req)
598 # processor methods are called on *all* handlers that define them,
599 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000600 calls = [
601 (handlers[0], "http_request"), (handlers[1], "http_request"),
602 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603
604 for i, (handler, name, args, kwds) in enumerate(o.calls):
605 if i < 2:
606 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000607 self.assertEqual((handler, name), calls[i])
608 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000609 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000610 else:
611 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000612 self.assertEqual((handler, name), calls[i])
613 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000614 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000615 # response from opener.open is None, because there's no
616 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000617 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000618 isinstance(args[1], MockResponse))
619
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700620 def test_method_deprecations(self):
621 req = Request("http://www.example.com")
Senthil Kumaran08bd4aa2012-04-11 23:05:49 +0800622
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800623 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700624 req.add_data("data")
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800625 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700626 req.get_data()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800627 with self.assertWarns(DeprecationWarning):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -0700628 req.has_data()
629 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700630 req.get_host()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800631 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700632 req.get_selector()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800633 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700634 req.is_unverifiable()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800635 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700636 req.get_origin_req_host()
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -0700637 with self.assertWarns(DeprecationWarning):
638 req.get_type()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000639
Senthil Kumaran08bd4aa2012-04-11 23:05:49 +0800640
Tim Peters58eb11c2004-01-18 20:29:55 +0000641def sanepathname2url(path):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000642 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000643 path.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000644 except UnicodeEncodeError:
645 raise unittest.SkipTest("path is not encodable to utf8")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000646 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000647 if os.name == "nt" and urlpath.startswith("///"):
648 urlpath = urlpath[2:]
649 # XXX don't ask me about the mac...
650 return urlpath
651
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000652class HandlerTests(unittest.TestCase):
653
654 def test_ftp(self):
655 class MockFTPWrapper:
656 def __init__(self, data): self.data = data
657 def retrfile(self, filename, filetype):
658 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000659 return io.StringIO(self.data), len(self.data)
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200660 def close(self): pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000661
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000662 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000663 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000664 def connect_ftp(self, user, passwd, host, port, dirs,
665 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000666 self.user, self.passwd = user, passwd
667 self.host, self.port = host, port
668 self.dirs = dirs
669 self.ftpwrapper = MockFTPWrapper(self.data)
670 return self.ftpwrapper
671
Georg Brandlf78e02b2008-06-10 17:40:04 +0000672 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000673 data = "rheum rhaponicum"
674 h = NullFTPHandler(data)
675 o = h.parent = MockOpener()
676
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000677 for url, host, port, user, passwd, type_, dirs, filename, mimetype in [
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000678 ("ftp://localhost/foo/bar/baz.html",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000679 "localhost", ftplib.FTP_PORT, "", "", "I",
680 ["foo", "bar"], "baz.html", "text/html"),
681 ("ftp://parrot@localhost/foo/bar/baz.html",
682 "localhost", ftplib.FTP_PORT, "parrot", "", "I",
683 ["foo", "bar"], "baz.html", "text/html"),
684 ("ftp://%25parrot@localhost/foo/bar/baz.html",
685 "localhost", ftplib.FTP_PORT, "%parrot", "", "I",
686 ["foo", "bar"], "baz.html", "text/html"),
687 ("ftp://%2542parrot@localhost/foo/bar/baz.html",
688 "localhost", ftplib.FTP_PORT, "%42parrot", "", "I",
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000689 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000690 ("ftp://localhost:80/foo/bar/",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000691 "localhost", 80, "", "", "D",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000692 ["foo", "bar"], "", None),
693 ("ftp://localhost/baz.gif;type=a",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000694 "localhost", ftplib.FTP_PORT, "", "", "A",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000695 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000697 req = Request(url)
698 req.timeout = None
699 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000700 # ftp authentication not yet implemented by FTPHandler
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000701 self.assertEqual(h.user, user)
702 self.assertEqual(h.passwd, passwd)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000703 self.assertEqual(h.host, socket.gethostbyname(host))
704 self.assertEqual(h.port, port)
705 self.assertEqual(h.dirs, dirs)
706 self.assertEqual(h.ftpwrapper.filename, filename)
707 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000708 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000709 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000710 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000711
712 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000713 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000714 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000715 o = h.parent = MockOpener()
716
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000717 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000718 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000719 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000720 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000721 "file://localhost%s" % urlpath,
722 "file://%s" % urlpath,
723 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000724 ]
725 try:
726 localaddr = socket.gethostbyname(socket.gethostname())
727 except socket.gaierror:
728 localaddr = ''
729 if localaddr:
730 urls.append("file://%s%s" % (localaddr, urlpath))
731
732 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000733 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000734 try:
735 try:
736 f.write(towrite)
737 finally:
738 f.close()
739
740 r = h.file_open(Request(url))
741 try:
742 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000743 headers = r.info()
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000744 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000745 finally:
746 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000747 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000748 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000749 finally:
750 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000751 self.assertEqual(data, towrite)
752 self.assertEqual(headers["Content-type"], "text/plain")
753 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000754 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000755 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000756
757 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000758 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000759 "file:///file_does_not_exist.txt",
760 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
761 os.getcwd(), TESTFN),
762 "file://somerandomhost.ontheinternet.com%s/%s" %
763 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000764 ]:
765 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000766 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000767 try:
768 f.write(towrite)
769 finally:
770 f.close()
771
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000773 h.file_open, Request(url))
774 finally:
775 os.remove(TESTFN)
776
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000777 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000778 o = h.parent = MockOpener()
779 # XXXX why does // mean ftp (and /// mean not ftp!), and where
780 # is file: scheme specified? I think this is really a bug, and
781 # what was intended was to distinguish between URLs like:
782 # file:/blah.txt (a file)
783 # file://localhost/blah.txt (a file)
784 # file:///blah.txt (a file)
785 # file://ftp.example.com/blah.txt (an ftp URL)
786 for url, ftp in [
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000787 ("file://ftp.example.com//foo.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788 ("file://ftp.example.com///foo.txt", False),
789# XXXX bug: fails with OSError, should be URLError
790 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000791 ("file://somehost//foo/something.txt", False),
Senthil Kumaran2ef16322010-07-11 03:12:43 +0000792 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000793 ]:
794 req = Request(url)
795 try:
796 h.file_open(req)
797 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000798 except (urllib.error.URLError, OSError):
Florent Xicluna419e3842010-08-08 16:16:07 +0000799 self.assertFalse(ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000800 else:
Florent Xicluna419e3842010-08-08 16:16:07 +0000801 self.assertIs(o.req, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000802 self.assertEqual(req.type, "ftp")
Łukasz Langad7e81cc2011-01-09 18:18:53 +0000803 self.assertEqual(req.type == "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000804
805 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808 o = h.parent = MockOpener()
809
810 url = "http://example.com/"
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000811 for method, data in [("GET", None), ("POST", b"blah")]:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000812 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000813 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000814 req.add_unredirected_header("Spam", "eggs")
815 http = MockHTTPClass()
816 r = h.do_open(http, req)
817
818 # result attributes
819 r.read; r.readline # wrapped MockFile methods
820 r.info; r.geturl # addinfourl methods
821 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
822 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000823 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000824 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000825
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000826 self.assertEqual(http.host, "example.com")
827 self.assertEqual(http.level, 0)
828 self.assertEqual(http.method, method)
829 self.assertEqual(http.selector, "/")
830 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000831 [("Connection", "close"),
832 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000833 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000834
835 # check socket.error converted to URLError
836 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000837 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000838
Senthil Kumaran29333122011-02-11 11:25:47 +0000839 # Check for TypeError on POST data which is str.
840 req = Request("http://example.com/","badpost")
841 self.assertRaises(TypeError, h.do_request_, req)
842
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000843 # check adding of standard headers
844 o.addheaders = [("Spam", "eggs")]
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000845 for data in b"", None: # POST, GET
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000846 req = Request("http://example.com/", data)
847 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000848 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000849 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000850 self.assertNotIn("Content-length", req.unredirected_hdrs)
851 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000852 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000853 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
854 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000855 "application/x-www-form-urlencoded")
856 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000857 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
858 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000859
860 # don't clobber existing headers
861 req.add_unredirected_header("Content-length", "foo")
862 req.add_unredirected_header("Content-type", "bar")
863 req.add_unredirected_header("Host", "baz")
864 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000865 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000866 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
867 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000868 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
869 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000870
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000871 # Check iterable body support
872 def iterable_body():
873 yield b"one"
874 yield b"two"
875 yield b"three"
876
877 for headers in {}, {"Content-Length": 11}:
878 req = Request("http://example.com/", iterable_body(), headers)
879 if not headers:
880 # Having an iterable body without a Content-Length should
881 # raise an exception
882 self.assertRaises(ValueError, h.do_request_, req)
883 else:
884 newreq = h.do_request_(req)
885
Senthil Kumaran29333122011-02-11 11:25:47 +0000886 # A file object.
887 # Test only Content-Length attribute of request.
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000888
Senthil Kumaran29333122011-02-11 11:25:47 +0000889 file_obj = io.BytesIO()
890 file_obj.write(b"Something\nSomething\nSomething\n")
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000891
892 for headers in {}, {"Content-Length": 30}:
893 req = Request("http://example.com/", file_obj, headers)
894 if not headers:
895 # Having an iterable body without a Content-Length should
896 # raise an exception
897 self.assertRaises(ValueError, h.do_request_, req)
898 else:
899 newreq = h.do_request_(req)
900 self.assertEqual(int(newreq.get_header('Content-length')),30)
901
902 file_obj.close()
903
904 # array.array Iterable - Content Length is calculated
905
906 iterable_array = array.array("I",[1,2,3,4])
907
908 for headers in {}, {"Content-Length": 16}:
909 req = Request("http://example.com/", iterable_array, headers)
910 newreq = h.do_request_(req)
911 self.assertEqual(int(newreq.get_header('Content-length')),16)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000912
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000913 def test_http_doubleslash(self):
914 # Checks the presence of any unnecessary double slash in url does not
915 # break anything. Previously, a double slash directly after the host
Ezio Melottie130a522011-10-19 10:58:56 +0300916 # could cause incorrect parsing.
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000917 h = urllib.request.AbstractHTTPHandler()
918 o = h.parent = MockOpener()
919
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000920 data = b""
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000921 ds_urls = [
922 "http://example.com/foo/bar/baz.html",
923 "http://example.com//foo/bar/baz.html",
924 "http://example.com/foo//bar/baz.html",
925 "http://example.com/foo/bar//baz.html"
926 ]
927
928 for ds_url in ds_urls:
929 ds_req = Request(ds_url, data)
930
931 # Check whether host is determined correctly if there is no proxy
932 np_ds_req = h.do_request_(ds_req)
933 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
934
935 # Check whether host is determined correctly if there is a proxy
936 ds_req.set_proxy("someproxy:3128",None)
937 p_ds_req = h.do_request_(ds_req)
938 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
939
Senthil Kumaranc2958622010-11-22 04:48:26 +0000940 def test_fixpath_in_weirdurls(self):
941 # Issue4493: urllib2 to supply '/' when to urls where path does not
942 # start with'/'
943
944 h = urllib.request.AbstractHTTPHandler()
945 o = h.parent = MockOpener()
946
947 weird_url = 'http://www.python.org?getspam'
948 req = Request(weird_url)
949 newreq = h.do_request_(req)
950 self.assertEqual(newreq.host,'www.python.org')
951 self.assertEqual(newreq.selector,'/?getspam')
952
953 url_without_path = 'http://www.python.org'
954 req = Request(url_without_path)
955 newreq = h.do_request_(req)
956 self.assertEqual(newreq.host,'www.python.org')
957 self.assertEqual(newreq.selector,'')
958
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000959
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000960 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000961 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000962 o = h.parent = MockOpener()
963
964 url = "http://example.com/"
965 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000966 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000967 r = MockResponse(200, "OK", {}, "", url)
968 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000969 self.assertIs(r, newr)
970 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000971 r = MockResponse(202, "Accepted", {}, "", url)
972 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000973 self.assertIs(r, newr)
974 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000975 r = MockResponse(206, "Partial content", {}, "", url)
976 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000977 self.assertIs(r, newr)
978 self.assertFalse(hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000979 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 r = MockResponse(502, "Bad gateway", {}, "", url)
Florent Xicluna419e3842010-08-08 16:16:07 +0000981 self.assertIsNone(h.http_response(req, r))
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000982 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000984
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000985 def test_cookies(self):
986 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000988 o = h.parent = MockOpener()
989
990 req = Request("http://example.com/")
991 r = MockResponse(200, "OK", {}, "")
992 newreq = h.http_request(req)
Florent Xicluna419e3842010-08-08 16:16:07 +0000993 self.assertIs(cj.ach_req, req)
994 self.assertIs(cj.ach_req, newreq)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -0700995 self.assertEqual(req.origin_req_host, "example.com")
996 self.assertFalse(req.unverifiable)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000997 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000998 self.assertIs(cj.ec_req, req)
999 self.assertIs(cj.ec_r, r)
1000 self.assertIs(r, newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001001
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001002 def test_redirect(self):
1003 from_url = "http://example.com/a.html"
1004 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001005 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001006 o = h.parent = MockOpener()
1007
1008 # ordinary redirect behaviour
1009 for code in 301, 302, 303, 307:
1010 for data in None, "blah\nblah\n":
1011 method = getattr(h, "http_error_%s" % code)
1012 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001013 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001014 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +00001015 if data is not None:
1016 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001017 req.add_unredirected_header("Spam", "spam")
1018 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001019 method(req, MockFile(), code, "Blah",
1020 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001022 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001023 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001024 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001025 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001026 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001027 except AttributeError:
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001028 self.assertFalse(o.req.data)
Christian Heimes77c02eb2008-02-09 02:18:51 +00001029
1030 # now it's a GET, there should not be headers regarding content
1031 # (possibly dragged from before being a POST)
1032 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +00001033 self.assertNotIn("content-length", headers)
1034 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +00001035
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001036 self.assertEqual(o.req.headers["Nonsense"],
1037 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +00001038 self.assertNotIn("Spam", o.req.headers)
1039 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001040
1041 # loop detection
1042 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001043 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001044 def redirect(h, req, url=to_url):
1045 h.http_error_302(req, MockFile(), 302, "Blah",
1046 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001047 # Note that the *original* request shares the same record of
1048 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001049
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001050 # detect infinite loop redirect of a URL to itself
1051 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001052 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001053 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001054 try:
1055 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001056 redirect(h, req, "http://example.com/")
1057 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001058 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001059 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001060 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001061
1062 # detect endless non-repeating chain of redirects
1063 req = Request(from_url, origin_req_host="example.com")
1064 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001065 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001066 try:
1067 while 1:
1068 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001069 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001070 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001071 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001072 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001073
guido@google.coma119df92011-03-29 11:41:02 -07001074
1075 def test_invalid_redirect(self):
1076 from_url = "http://example.com/a.html"
1077 valid_schemes = ['http','https','ftp']
1078 invalid_schemes = ['file','imap','ldap']
1079 schemeless_url = "example.com/b.html"
1080 h = urllib.request.HTTPRedirectHandler()
1081 o = h.parent = MockOpener()
1082 req = Request(from_url)
1083 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1084
1085 for scheme in invalid_schemes:
1086 invalid_url = scheme + '://' + schemeless_url
1087 self.assertRaises(urllib.error.HTTPError, h.http_error_302,
1088 req, MockFile(), 302, "Security Loophole",
1089 MockHeaders({"location": invalid_url}))
1090
1091 for scheme in valid_schemes:
1092 valid_url = scheme + '://' + schemeless_url
1093 h.http_error_302(req, MockFile(), 302, "That's fine",
1094 MockHeaders({"location": valid_url}))
1095 self.assertEqual(o.req.get_full_url(), valid_url)
1096
Senthil Kumaran6497aa32012-01-04 13:46:59 +08001097 def test_relative_redirect(self):
1098 from_url = "http://example.com/a.html"
1099 relative_url = "/b.html"
1100 h = urllib.request.HTTPRedirectHandler()
1101 o = h.parent = MockOpener()
1102 req = Request(from_url)
1103 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1104
1105 valid_url = urllib.parse.urljoin(from_url,relative_url)
1106 h.http_error_302(req, MockFile(), 302, "That's fine",
1107 MockHeaders({"location": valid_url}))
1108 self.assertEqual(o.req.get_full_url(), valid_url)
1109
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001110 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001111 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +00001112 from http.cookiejar import CookieJar
1113 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001114
1115 cj = CookieJar()
1116 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001117 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001118 hdeh = urllib.request.HTTPDefaultErrorHandler()
1119 hrh = urllib.request.HTTPRedirectHandler()
1120 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001121 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001122 o.open("http://www.example.com/")
Florent Xicluna419e3842010-08-08 16:16:07 +00001123 self.assertFalse(hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001124
Senthil Kumaran26430412011-04-13 07:01:19 +08001125 def test_redirect_fragment(self):
1126 redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
1127 hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
1128 hdeh = urllib.request.HTTPDefaultErrorHandler()
1129 hrh = urllib.request.HTTPRedirectHandler()
1130 o = build_test_opener(hh, hdeh, hrh)
1131 fp = o.open('http://www.example.com')
1132 self.assertEqual(fp.geturl(), redirected_url.strip())
1133
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001134 def test_proxy(self):
1135 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001136 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001137 o.add_handler(ph)
1138 meth_spec = [
1139 [("http_open", "return response")]
1140 ]
1141 handlers = add_ordered_mock_handlers(o, meth_spec)
1142
1143 req = Request("http://acme.example.com/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001144 self.assertEqual(req.host, "acme.example.com")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001145 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001146 self.assertEqual(req.host, "proxy.example.com:3128")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001147
1148 self.assertEqual([(handlers[0], "http_open")],
1149 [tup[0:2] for tup in o.calls])
1150
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001151 def test_proxy_no_proxy(self):
1152 os.environ['no_proxy'] = 'python.org'
1153 o = OpenerDirector()
1154 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1155 o.add_handler(ph)
1156 req = Request("http://www.perl.org/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001157 self.assertEqual(req.host, "www.perl.org")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001158 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001159 self.assertEqual(req.host, "proxy.example.com")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001160 req = Request("http://www.python.org")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001161 self.assertEqual(req.host, "www.python.org")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001162 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001163 self.assertEqual(req.host, "www.python.org")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001164 del os.environ['no_proxy']
1165
Ronald Oussorene72e1612011-03-14 18:15:25 -04001166 def test_proxy_no_proxy_all(self):
1167 os.environ['no_proxy'] = '*'
1168 o = OpenerDirector()
1169 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1170 o.add_handler(ph)
1171 req = Request("http://www.python.org")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001172 self.assertEqual(req.host, "www.python.org")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001173 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001174 self.assertEqual(req.host, "www.python.org")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001175 del os.environ['no_proxy']
1176
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001177
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001178 def test_proxy_https(self):
1179 o = OpenerDirector()
1180 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
1181 o.add_handler(ph)
1182 meth_spec = [
1183 [("https_open", "return response")]
1184 ]
1185 handlers = add_ordered_mock_handlers(o, meth_spec)
1186
1187 req = Request("https://www.example.com/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001188 self.assertEqual(req.host, "www.example.com")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001189 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001190 self.assertEqual(req.host, "proxy.example.com:3128")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001191 self.assertEqual([(handlers[0], "https_open")],
1192 [tup[0:2] for tup in o.calls])
1193
Senthil Kumaran47fff872009-12-20 07:10:31 +00001194 def test_proxy_https_proxy_authorization(self):
1195 o = OpenerDirector()
1196 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1197 o.add_handler(ph)
1198 https_handler = MockHTTPSHandler()
1199 o.add_handler(https_handler)
1200 req = Request("https://www.example.com/")
1201 req.add_header("Proxy-Authorization","FooBar")
1202 req.add_header("User-Agent","Grail")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001203 self.assertEqual(req.host, "www.example.com")
Senthil Kumaran47fff872009-12-20 07:10:31 +00001204 self.assertIsNone(req._tunnel_host)
1205 r = o.open(req)
1206 # Verify Proxy-Authorization gets tunneled to request.
1207 # httpsconn req_headers do not have the Proxy-Authorization header but
1208 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001209 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001210 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001211 self.assertIn(("User-Agent","Grail"),
1212 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001213 self.assertIsNotNone(req._tunnel_host)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001214 self.assertEqual(req.host, "proxy.example.com:3128")
Senthil Kumaran47fff872009-12-20 07:10:31 +00001215 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001216
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001217 # TODO: This should be only for OSX
1218 @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001219 def test_osx_proxy_bypass(self):
1220 bypass = {
1221 'exclude_simple': False,
1222 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10',
1223 '10.0/16']
1224 }
1225 # Check hosts that should trigger the proxy bypass
1226 for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1',
1227 '10.0.0.1'):
1228 self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass),
1229 'expected bypass of %s to be True' % host)
1230 # Check hosts that should not trigger the proxy bypass
1231 for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', 'test'):
1232 self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass),
1233 'expected bypass of %s to be False' % host)
1234
1235 # Check the exclude_simple flag
1236 bypass = {'exclude_simple': True, 'exceptions': []}
1237 self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass))
1238
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001239 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001240 opener = OpenerDirector()
1241 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001242 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001243 realm = "ACME Widget Store"
1244 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001245 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1246 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001247 opener.add_handler(auth_handler)
1248 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001249 self._test_basic_auth(opener, auth_handler, "Authorization",
1250 realm, http_handler, password_manager,
1251 "http://acme.example.com/protected",
1252 "http://acme.example.com/protected",
1253 )
1254
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001255 def test_basic_auth_with_single_quoted_realm(self):
1256 self.test_basic_auth(quote_char="'")
1257
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +08001258 def test_basic_auth_with_unquoted_realm(self):
1259 opener = OpenerDirector()
1260 password_manager = MockPasswordManager()
1261 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
1262 realm = "ACME Widget Store"
1263 http_handler = MockHTTPHandler(
1264 401, 'WWW-Authenticate: Basic realm=%s\r\n\r\n' % realm)
1265 opener.add_handler(auth_handler)
1266 opener.add_handler(http_handler)
Senthil Kumaran0ea91cb2012-05-15 23:59:42 +08001267 with self.assertWarns(UserWarning):
1268 self._test_basic_auth(opener, auth_handler, "Authorization",
1269 realm, http_handler, password_manager,
1270 "http://acme.example.com/protected",
1271 "http://acme.example.com/protected",
1272 )
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +08001273
Thomas Wouters477c8d52006-05-27 19:21:47 +00001274 def test_proxy_basic_auth(self):
1275 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001276 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 opener.add_handler(ph)
1278 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001279 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001280 realm = "ACME Networks"
1281 http_handler = MockHTTPHandler(
1282 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001283 opener.add_handler(auth_handler)
1284 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001285 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001286 realm, http_handler, password_manager,
1287 "http://acme.example.com:3128/protected",
1288 "proxy.example.com:3128",
1289 )
1290
1291 def test_basic_and_digest_auth_handlers(self):
1292 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1293 # response (http://python.org/sf/1479302), where it should instead
1294 # return None to allow another handler (especially
1295 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001296
1297 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1298 # try digest first (since it's the strongest auth scheme), so we record
1299 # order of calls here to check digest comes first:
1300 class RecordingOpenerDirector(OpenerDirector):
1301 def __init__(self):
1302 OpenerDirector.__init__(self)
1303 self.recorded = []
1304 def record(self, info):
1305 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001306 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001307 def http_error_401(self, *args, **kwds):
1308 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001310 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001311 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001312 def http_error_401(self, *args, **kwds):
1313 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001314 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001315 *args, **kwds)
1316
1317 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001318 password_manager = MockPasswordManager()
1319 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001320 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001321 realm = "ACME Networks"
1322 http_handler = MockHTTPHandler(
1323 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001324 opener.add_handler(basic_handler)
1325 opener.add_handler(digest_handler)
1326 opener.add_handler(http_handler)
1327
1328 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001329 self._test_basic_auth(opener, basic_handler, "Authorization",
1330 realm, http_handler, password_manager,
1331 "http://acme.example.com/protected",
1332 "http://acme.example.com/protected",
1333 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001334 # check digest was tried before basic (twice, because
1335 # _test_basic_auth called .open() twice)
1336 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001337
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001338 def test_unsupported_auth_digest_handler(self):
1339 opener = OpenerDirector()
1340 # While using DigestAuthHandler
1341 digest_auth_handler = urllib.request.HTTPDigestAuthHandler(None)
1342 http_handler = MockHTTPHandler(
1343 401, 'WWW-Authenticate: Kerberos\r\n\r\n')
1344 opener.add_handler(digest_auth_handler)
1345 opener.add_handler(http_handler)
1346 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1347
1348 def test_unsupported_auth_basic_handler(self):
1349 # While using BasicAuthHandler
1350 opener = OpenerDirector()
1351 basic_auth_handler = urllib.request.HTTPBasicAuthHandler(None)
1352 http_handler = MockHTTPHandler(
1353 401, 'WWW-Authenticate: NTLM\r\n\r\n')
1354 opener.add_handler(basic_auth_handler)
1355 opener.add_handler(http_handler)
1356 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1357
Thomas Wouters477c8d52006-05-27 19:21:47 +00001358 def _test_basic_auth(self, opener, auth_handler, auth_header,
1359 realm, http_handler, password_manager,
1360 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001361 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001362 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001363
1364 # .add_password() fed through to password manager
1365 auth_handler.add_password(realm, request_url, user, password)
1366 self.assertEqual(realm, password_manager.realm)
1367 self.assertEqual(request_url, password_manager.url)
1368 self.assertEqual(user, password_manager.user)
1369 self.assertEqual(password, password_manager.password)
1370
1371 r = opener.open(request_url)
1372
1373 # should have asked the password manager for the username/password
1374 self.assertEqual(password_manager.target_realm, realm)
1375 self.assertEqual(password_manager.target_url, protected_url)
1376
1377 # expect one request without authorization, then one with
1378 self.assertEqual(len(http_handler.requests), 2)
1379 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001380 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001381 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001382 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001383 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1384 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001385 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1386 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001387 # if the password manager can't find a password, the handler won't
1388 # handle the HTTP auth error
1389 password_manager.user = password_manager.password = None
1390 http_handler.reset()
1391 r = opener.open(request_url)
1392 self.assertEqual(len(http_handler.requests), 1)
1393 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1394
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001395
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001396class MiscTests(unittest.TestCase):
1397
1398 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001399 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1400 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001401 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001402 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001403 def bar_open(self): pass
1404
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001405 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001406
1407 o = build_opener(FooHandler, BarHandler)
1408 self.opener_has_handler(o, FooHandler)
1409 self.opener_has_handler(o, BarHandler)
1410
1411 # can take a mix of classes and instances
1412 o = build_opener(FooHandler, BarHandler())
1413 self.opener_has_handler(o, FooHandler)
1414 self.opener_has_handler(o, BarHandler)
1415
1416 # subclasses of default handlers override default handlers
1417 o = build_opener(MyHTTPHandler)
1418 self.opener_has_handler(o, MyHTTPHandler)
1419
1420 # a particular case of overriding: default handlers can be passed
1421 # in explicitly
1422 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001423 self.opener_has_handler(o, urllib.request.HTTPHandler)
1424 o = build_opener(urllib.request.HTTPHandler)
1425 self.opener_has_handler(o, urllib.request.HTTPHandler)
1426 o = build_opener(urllib.request.HTTPHandler())
1427 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001428
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001429 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001430 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001431 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1432 self.opener_has_handler(o, MyHTTPHandler)
1433 self.opener_has_handler(o, MyOtherHTTPHandler)
1434
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001435 def opener_has_handler(self, opener, handler_class):
Florent Xicluna419e3842010-08-08 16:16:07 +00001436 self.assertTrue(any(h.__class__ == handler_class
1437 for h in opener.handlers))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001438
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001439class RequestTests(unittest.TestCase):
1440
1441 def setUp(self):
1442 self.get = Request("http://www.python.org/~jeremy/")
1443 self.post = Request("http://www.python.org/~jeremy/",
1444 "data",
1445 headers={"X-Test": "test"})
1446
1447 def test_method(self):
1448 self.assertEqual("POST", self.post.get_method())
1449 self.assertEqual("GET", self.get.get_method())
1450
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001451 def test_data(self):
1452 self.assertFalse(self.get.data)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001453 self.assertEqual("GET", self.get.get_method())
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001454 self.get.data = "spam"
1455 self.assertTrue(self.get.data)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001456 self.assertEqual("POST", self.get.get_method())
1457
1458 def test_get_full_url(self):
1459 self.assertEqual("http://www.python.org/~jeremy/",
1460 self.get.get_full_url())
1461
1462 def test_selector(self):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001463 self.assertEqual("/~jeremy/", self.get.selector)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001464 req = Request("http://www.python.org/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001465 self.assertEqual("/", req.selector)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001466
1467 def test_get_type(self):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001468 self.assertEqual("http", self.get.type)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001469
1470 def test_get_host(self):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001471 self.assertEqual("www.python.org", self.get.host)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001472
1473 def test_get_host_unquote(self):
1474 req = Request("http://www.%70ython.org/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001475 self.assertEqual("www.python.org", req.host)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001476
1477 def test_proxy(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001478 self.assertFalse(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001479 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001480 self.assertTrue(self.get.has_proxy())
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001481 self.assertEqual("www.python.org", self.get.origin_req_host)
1482 self.assertEqual("www.perl.org", self.get.host)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001483
Senthil Kumarand95cc752010-08-08 11:27:53 +00001484 def test_wrapped_url(self):
1485 req = Request("<URL:http://www.python.org>")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001486 self.assertEqual("www.python.org", req.host)
Senthil Kumarand95cc752010-08-08 11:27:53 +00001487
Senthil Kumaran26430412011-04-13 07:01:19 +08001488 def test_url_fragment(self):
Senthil Kumarand95cc752010-08-08 11:27:53 +00001489 req = Request("http://www.python.org/?qs=query#fragment=true")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001490 self.assertEqual("/?qs=query", req.selector)
Senthil Kumarand95cc752010-08-08 11:27:53 +00001491 req = Request("http://www.python.org/#fun=true")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001492 self.assertEqual("/", req.selector)
Senthil Kumarand95cc752010-08-08 11:27:53 +00001493
Senthil Kumaran26430412011-04-13 07:01:19 +08001494 # Issue 11703: geturl() omits fragment in the original URL.
1495 url = 'http://docs.python.org/library/urllib2.html#OK'
1496 req = Request(url)
1497 self.assertEqual(req.get_full_url(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001498
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001499def test_HTTPError_interface():
1500 """
1501 Issue 13211 reveals that HTTPError didn't implement the URLError
1502 interface even though HTTPError is a subclass of URLError.
1503
Jason R. Coombs7ff21d72011-12-03 23:18:11 -05001504 >>> msg = 'something bad happened'
1505 >>> url = code = hdrs = fp = None
Jason R. Coombs9c3895f2011-12-04 08:14:18 -05001506 >>> err = urllib.error.HTTPError(url, code, msg, hdrs, fp)
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001507 >>> assert hasattr(err, 'reason')
1508 >>> err.reason
1509 'something bad happened'
1510 """
1511
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001512def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001513 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001514 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001515 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001516 tests = (TrivialTests,
1517 OpenerDirectorTests,
1518 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001519 MiscTests,
1520 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001521 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001522
1523if __name__ == "__main__":
1524 test_main(verbose=True)