blob: 83bb0a9cb209eb5ce3dd341de889d451121fc6a3 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00007import array
Jeremy Hyltone3e61042001-05-09 15:50:25 +00008
Jeremy Hylton1afc1692008-06-18 20:49:58 +00009import urllib.request
Ronald Oussorene72e1612011-03-14 18:15:25 -040010# The proxy bypass method imported below has logic specific to the OSX
11# proxy config data structure but is testable on all platforms.
12from urllib.request import Request, OpenerDirector, _proxy_bypass_macosx_sysconf
guido@google.coma119df92011-03-29 11:41:02 -070013import urllib.error
Jeremy Hyltone3e61042001-05-09 15:50:25 +000014
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000015# XXX
16# Request
17# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000018# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000019
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000020class TrivialTests(unittest.TestCase):
21 def test_trivial(self):
22 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000023
Jeremy Hylton1afc1692008-06-18 20:49:58 +000024 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000025
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000026 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000027 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000028
Senthil Kumarand587e302010-01-10 17:45:52 +000029 if os.name == 'nt':
30 file_url = "file:///%s" % fname
31 else:
32 file_url = "file://%s" % fname
33
Jeremy Hylton1afc1692008-06-18 20:49:58 +000034 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000035
36 buf = f.read()
37 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000038
Georg Brandle1b13d22005-08-24 22:20:32 +000039 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000040 tests = [
41 ('a,b,c', ['a', 'b', 'c']),
42 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
43 ('a, b, "c", "d", "e,f", g, h',
44 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
45 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
46 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000047 for string, list in tests:
Florent Xicluna419e3842010-08-08 16:16:07 +000048 self.assertEqual(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000049
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000050
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051def test_request_headers_dict():
52 """
53 The Request.headers dictionary is not a documented interface. It should
54 stay that way, because the complete set of headers are only accessible
55 through the .get_header(), .has_header(), .header_items() interface.
56 However, .headers pre-dates those methods, and so real code will be using
57 the dictionary.
58
59 The introduction in 2.4 of those methods was a mistake for the same reason:
60 code that previously saw all (urllib2 user)-provided headers in .headers
61 now sees only a subset (and the function interface is ugly and incomplete).
62 A better change would have been to replace .headers dict with a dict
63 subclass (or UserDict.DictMixin instance?) that preserved the .headers
64 interface and also provided access to the "unredirected" headers. It's
65 probably too late to fix that, though.
66
67
68 Check .capitalize() case normalization:
69
70 >>> url = "http://example.com"
71 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
72 'blah'
73 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
74 'blah'
75
76 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
77 but that could be changed in future.
78
79 """
80
81def test_request_headers_methods():
82 """
83 Note the case normalization of header names here, to .capitalize()-case.
84 This should be preserved for backwards-compatibility. (In the HTTP case,
85 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000086 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000087
88 >>> url = "http://example.com"
89 >>> r = Request(url, headers={"Spam-eggs": "blah"})
90 >>> r.has_header("Spam-eggs")
91 True
92 >>> r.header_items()
93 [('Spam-eggs', 'blah')]
94 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000095 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000096 >>> items
97 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
98
99 Note that e.g. r.has_header("spam-EggS") is currently False, and
100 r.get_header("spam-EggS") returns None, but that could be changed in
101 future.
102
103 >>> r.has_header("Not-there")
104 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000105 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000106 None
107 >>> r.get_header("Not-there", "default")
108 'default'
109
110 """
111
112
Thomas Wouters477c8d52006-05-27 19:21:47 +0000113def test_password_manager(self):
114 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000115 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000116 >>> add = mgr.add_password
117 >>> add("Some Realm", "http://example.com/", "joe", "password")
118 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
119 >>> add("c", "http://example.com/foo", "foo", "ni")
120 >>> add("c", "http://example.com/bar", "bar", "nini")
121 >>> add("b", "http://example.com/", "first", "blah")
122 >>> add("b", "http://example.com/", "second", "spam")
123 >>> add("a", "http://example.com", "1", "a")
124 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
125 >>> add("Some Realm", "d.example.com", "4", "d")
126 >>> add("Some Realm", "e.example.com:3128", "5", "e")
127
128 >>> mgr.find_user_password("Some Realm", "example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
137 ('joe', 'password')
138 >>> mgr.find_user_password("c", "http://example.com/foo")
139 ('foo', 'ni')
140 >>> mgr.find_user_password("c", "http://example.com/bar")
141 ('bar', 'nini')
142
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000143 Actually, this is really undefined ATM
144## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000145
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000146## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
147## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000148
149 Use latest add_password() in case of conflict:
150
151 >>> mgr.find_user_password("b", "http://example.com/")
152 ('second', 'spam')
153
154 No special relationship between a.example.com and example.com:
155
156 >>> mgr.find_user_password("a", "http://example.com/")
157 ('1', 'a')
158 >>> mgr.find_user_password("a", "http://a.example.com/")
159 (None, None)
160
161 Ports:
162
163 >>> mgr.find_user_password("Some Realm", "c.example.com")
164 (None, None)
165 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
168 ('3', 'c')
169 >>> mgr.find_user_password("Some Realm", "d.example.com")
170 ('4', 'd')
171 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
172 ('5', 'e')
173
174 """
175 pass
176
177
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000178def test_password_manager_default_port(self):
179 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000180 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000181 >>> add = mgr.add_password
182
183 The point to note here is that we can't guess the default port if there's
184 no scheme. This applies to both add_password and find_user_password.
185
186 >>> add("f", "http://g.example.com:80", "10", "j")
187 >>> add("g", "http://h.example.com", "11", "k")
188 >>> add("h", "i.example.com:80", "12", "l")
189 >>> add("i", "j.example.com", "13", "m")
190 >>> mgr.find_user_password("f", "g.example.com:100")
191 (None, None)
192 >>> mgr.find_user_password("f", "g.example.com:80")
193 ('10', 'j')
194 >>> mgr.find_user_password("f", "g.example.com")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:100")
197 (None, None)
198 >>> mgr.find_user_password("f", "http://g.example.com:80")
199 ('10', 'j')
200 >>> mgr.find_user_password("f", "http://g.example.com")
201 ('10', 'j')
202 >>> mgr.find_user_password("g", "h.example.com")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("g", "http://h.example.com:80")
207 ('11', 'k')
208 >>> mgr.find_user_password("h", "i.example.com")
209 (None, None)
210 >>> mgr.find_user_password("h", "i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("h", "http://i.example.com:80")
213 ('12', 'l')
214 >>> mgr.find_user_password("i", "j.example.com")
215 ('13', 'm')
216 >>> mgr.find_user_password("i", "j.example.com:80")
217 (None, None)
218 >>> mgr.find_user_password("i", "http://j.example.com")
219 ('13', 'm')
220 >>> mgr.find_user_password("i", "http://j.example.com:80")
221 (None, None)
222
223 """
224
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000225class MockOpener:
226 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000227 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
228 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000229 def error(self, proto, *args):
230 self.proto, self.args = proto, args
231
232class MockFile:
233 def read(self, count=None): pass
234 def readline(self, count=None): pass
235 def close(self): pass
236
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000237class MockHeaders(dict):
238 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000239 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000240
Guido van Rossum34d19282007-08-09 01:03:29 +0000241class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000243 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000244 self.code, self.msg, self.headers, self.url = code, msg, headers, url
245 def info(self):
246 return self.headers
247 def geturl(self):
248 return self.url
249
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000250class MockCookieJar:
251 def add_cookie_header(self, request):
252 self.ach_req = request
253 def extract_cookies(self, response, request):
254 self.ec_req, self.ec_r = request, response
255
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000256class FakeMethod:
257 def __init__(self, meth_name, action, handle):
258 self.meth_name = meth_name
259 self.handle = handle
260 self.action = action
261 def __call__(self, *args):
262 return self.handle(self.meth_name, self.action, *args)
263
Senthil Kumaran47fff872009-12-20 07:10:31 +0000264class MockHTTPResponse(io.IOBase):
265 def __init__(self, fp, msg, status, reason):
266 self.fp = fp
267 self.msg = msg
268 self.status = status
269 self.reason = reason
270 self.code = 200
271
272 def read(self):
273 return ''
274
275 def info(self):
276 return {}
277
278 def geturl(self):
279 return self.url
280
281
282class MockHTTPClass:
283 def __init__(self):
284 self.level = 0
285 self.req_headers = []
286 self.data = None
287 self.raise_on_endheaders = False
288 self._tunnel_headers = {}
289
290 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
291 self.host = host
292 self.timeout = timeout
293 return self
294
295 def set_debuglevel(self, level):
296 self.level = level
297
298 def set_tunnel(self, host, port=None, headers=None):
299 self._tunnel_host = host
300 self._tunnel_port = port
301 if headers:
302 self._tunnel_headers = headers
303 else:
304 self._tunnel_headers.clear()
305
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000306 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000307 self.method = method
308 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000309 if headers is not None:
310 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000311 self.req_headers.sort()
312 if body:
313 self.data = body
314 if self.raise_on_endheaders:
315 import socket
316 raise socket.error()
317 def getresponse(self):
318 return MockHTTPResponse(MockFile(), {}, 200, "OK")
319
Victor Stinnera4c45d72011-06-17 14:01:18 +0200320 def close(self):
321 pass
322
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000323class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000324 # useful for testing handler machinery
325 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000326 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000327 def __init__(self, methods):
328 self._define_methods(methods)
329 def _define_methods(self, methods):
330 for spec in methods:
331 if len(spec) == 2: name, action = spec
332 else: name, action = spec, None
333 meth = FakeMethod(name, action, self.handle)
334 setattr(self.__class__, name, meth)
335 def handle(self, fn_name, action, *args, **kwds):
336 self.parent.calls.append((self, fn_name, args, kwds))
337 if action is None:
338 return None
339 elif action == "return self":
340 return self
341 elif action == "return response":
342 res = MockResponse(200, "OK", {}, "")
343 return res
344 elif action == "return request":
345 return Request("http://blah/")
346 elif action.startswith("error"):
347 code = action[action.rfind(" ")+1:]
348 try:
349 code = int(code)
350 except ValueError:
351 pass
352 res = MockResponse(200, "OK", {}, "")
353 return self.parent.error("http", args[0], res, code, "", {})
354 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000355 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000356 assert False
357 def close(self): pass
358 def add_parent(self, parent):
359 self.parent = parent
360 self.parent.calls = []
361 def __lt__(self, other):
362 if not hasattr(other, "handler_order"):
363 # No handler_order, leave in original order. Yuck.
364 return True
365 return self.handler_order < other.handler_order
366
367def add_ordered_mock_handlers(opener, meth_spec):
368 """Create MockHandlers and add them to an OpenerDirector.
369
370 meth_spec: list of lists of tuples and strings defining methods to define
371 on handlers. eg:
372
373 [["http_error", "ftp_open"], ["http_open"]]
374
375 defines methods .http_error() and .ftp_open() on one handler, and
376 .http_open() on another. These methods just record their arguments and
377 return None. Using a tuple instead of a string causes the method to
378 perform some action (see MockHandler.handle()), eg:
379
380 [["http_error"], [("http_open", "return request")]]
381
382 defines .http_error() on one handler (which simply returns None), and
383 .http_open() on another handler, which returns a Request object.
384
385 """
386 handlers = []
387 count = 0
388 for meths in meth_spec:
389 class MockHandlerSubclass(MockHandler): pass
390 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000391 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000392 h.add_parent(opener)
393 count = count + 1
394 handlers.append(h)
395 opener.add_handler(h)
396 return handlers
397
Thomas Wouters477c8d52006-05-27 19:21:47 +0000398def build_test_opener(*handler_instances):
399 opener = OpenerDirector()
400 for h in handler_instances:
401 opener.add_handler(h)
402 return opener
403
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000404class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000405 # useful for testing redirections and auth
406 # sends supplied headers and code as first response
407 # sends 200 OK as second response
408 def __init__(self, code, headers):
409 self.code = code
410 self.headers = headers
411 self.reset()
412 def reset(self):
413 self._count = 0
414 self.requests = []
415 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000416 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000417 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000418 self.requests.append(copy.deepcopy(req))
419 if self._count == 0:
420 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000421 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000422 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 return self.parent.error(
424 "http", req, MockFile(), self.code, name, msg)
425 else:
426 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000427 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000428 return MockResponse(200, "OK", msg, "", req.get_full_url())
429
Senthil Kumaran47fff872009-12-20 07:10:31 +0000430class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
431 # Useful for testing the Proxy-Authorization request by verifying the
432 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000433
434 def __init__(self):
435 urllib.request.AbstractHTTPHandler.__init__(self)
436 self.httpconn = MockHTTPClass()
437
Senthil Kumaran47fff872009-12-20 07:10:31 +0000438 def https_open(self, req):
439 return self.do_open(self.httpconn, req)
440
Thomas Wouters477c8d52006-05-27 19:21:47 +0000441class MockPasswordManager:
442 def add_password(self, realm, uri, user, password):
443 self.realm = realm
444 self.url = uri
445 self.user = user
446 self.password = password
447 def find_user_password(self, realm, authuri):
448 self.target_realm = realm
449 self.target_url = authuri
450 return self.user, self.password
451
452
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000453class OpenerDirectorTests(unittest.TestCase):
454
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000455 def test_add_non_handler(self):
456 class NonHandler(object):
457 pass
458 self.assertRaises(TypeError,
459 OpenerDirector().add_handler, NonHandler())
460
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000461 def test_badly_named_methods(self):
462 # test work-around for three methods that accidentally follow the
463 # naming conventions for handler methods
464 # (*_open() / *_request() / *_response())
465
466 # These used to call the accidentally-named methods, causing a
467 # TypeError in real code; here, returning self from these mock
468 # methods would either cause no exception, or AttributeError.
469
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000470 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000471
472 o = OpenerDirector()
473 meth_spec = [
474 [("do_open", "return self"), ("proxy_open", "return self")],
475 [("redirect_request", "return self")],
476 ]
477 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000478 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000479 for scheme in "do", "proxy", "redirect":
480 self.assertRaises(URLError, o.open, scheme+"://example.com/")
481
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000482 def test_handled(self):
483 # handler returning non-None means no more handlers will be called
484 o = OpenerDirector()
485 meth_spec = [
486 ["http_open", "ftp_open", "http_error_302"],
487 ["ftp_open"],
488 [("http_open", "return self")],
489 [("http_open", "return self")],
490 ]
491 handlers = add_ordered_mock_handlers(o, meth_spec)
492
493 req = Request("http://example.com/")
494 r = o.open(req)
495 # Second .http_open() gets called, third doesn't, since second returned
496 # non-None. Handlers without .http_open() never get any methods called
497 # on them.
498 # In fact, second mock handler defining .http_open() returns self
499 # (instead of response), which becomes the OpenerDirector's return
500 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000501 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000502 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
503 for expected, got in zip(calls, o.calls):
504 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000505 self.assertEqual((handler, name), expected)
506 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000507
508 def test_handler_order(self):
509 o = OpenerDirector()
510 handlers = []
511 for meths, handler_order in [
512 ([("http_open", "return self")], 500),
513 (["http_open"], 0),
514 ]:
515 class MockHandlerSubclass(MockHandler): pass
516 h = MockHandlerSubclass(meths)
517 h.handler_order = handler_order
518 handlers.append(h)
519 o.add_handler(h)
520
521 r = o.open("http://example.com/")
522 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000523 self.assertEqual(o.calls[0][0], handlers[1])
524 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000525
526 def test_raise(self):
527 # raising URLError stops processing of request
528 o = OpenerDirector()
529 meth_spec = [
530 [("http_open", "raise")],
531 [("http_open", "return self")],
532 ]
533 handlers = add_ordered_mock_handlers(o, meth_spec)
534
535 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000536 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000537 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000538
539## def test_error(self):
540## # XXX this doesn't actually seem to be used in standard library,
541## # but should really be tested anyway...
542
543 def test_http_error(self):
544 # XXX http_error_default
545 # http errors are a special case
546 o = OpenerDirector()
547 meth_spec = [
548 [("http_open", "error 302")],
549 [("http_error_400", "raise"), "http_open"],
550 [("http_error_302", "return response"), "http_error_303",
551 "http_error"],
552 [("http_error_302")],
553 ]
554 handlers = add_ordered_mock_handlers(o, meth_spec)
555
556 class Unknown:
557 def __eq__(self, other): return True
558
559 req = Request("http://example.com/")
560 r = o.open(req)
561 assert len(o.calls) == 2
562 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000563 (handlers[2], "http_error_302",
564 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000565 for expected, got in zip(calls, o.calls):
566 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000567 self.assertEqual((handler, method_name), got[:2])
568 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000569
570 def test_processors(self):
571 # *_request / *_response methods get called appropriately
572 o = OpenerDirector()
573 meth_spec = [
574 [("http_request", "return request"),
575 ("http_response", "return response")],
576 [("http_request", "return request"),
577 ("http_response", "return response")],
578 ]
579 handlers = add_ordered_mock_handlers(o, meth_spec)
580
581 req = Request("http://example.com/")
582 r = o.open(req)
583 # processor methods are called on *all* handlers that define them,
584 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000585 calls = [
586 (handlers[0], "http_request"), (handlers[1], "http_request"),
587 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000588
589 for i, (handler, name, args, kwds) in enumerate(o.calls):
590 if i < 2:
591 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000592 self.assertEqual((handler, name), calls[i])
593 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000594 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000595 else:
596 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000597 self.assertEqual((handler, name), calls[i])
598 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000599 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000600 # response from opener.open is None, because there's no
601 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000602 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603 isinstance(args[1], MockResponse))
604
605
Tim Peters58eb11c2004-01-18 20:29:55 +0000606def sanepathname2url(path):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000607 try:
608 path.encode("utf8")
609 except UnicodeEncodeError:
610 raise unittest.SkipTest("path is not encodable to utf8")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000611 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000612 if os.name == "nt" and urlpath.startswith("///"):
613 urlpath = urlpath[2:]
614 # XXX don't ask me about the mac...
615 return urlpath
616
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000617class HandlerTests(unittest.TestCase):
618
619 def test_ftp(self):
620 class MockFTPWrapper:
621 def __init__(self, data): self.data = data
622 def retrfile(self, filename, filetype):
623 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000624 return io.StringIO(self.data), len(self.data)
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200625 def close(self): pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000626
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000627 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000628 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000629 def connect_ftp(self, user, passwd, host, port, dirs,
630 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000631 self.user, self.passwd = user, passwd
632 self.host, self.port = host, port
633 self.dirs = dirs
634 self.ftpwrapper = MockFTPWrapper(self.data)
635 return self.ftpwrapper
636
Georg Brandlf78e02b2008-06-10 17:40:04 +0000637 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000638 data = "rheum rhaponicum"
639 h = NullFTPHandler(data)
640 o = h.parent = MockOpener()
641
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000642 for url, host, port, user, passwd, type_, dirs, filename, mimetype in [
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000643 ("ftp://localhost/foo/bar/baz.html",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000644 "localhost", ftplib.FTP_PORT, "", "", "I",
645 ["foo", "bar"], "baz.html", "text/html"),
646 ("ftp://parrot@localhost/foo/bar/baz.html",
647 "localhost", ftplib.FTP_PORT, "parrot", "", "I",
648 ["foo", "bar"], "baz.html", "text/html"),
649 ("ftp://%25parrot@localhost/foo/bar/baz.html",
650 "localhost", ftplib.FTP_PORT, "%parrot", "", "I",
651 ["foo", "bar"], "baz.html", "text/html"),
652 ("ftp://%2542parrot@localhost/foo/bar/baz.html",
653 "localhost", ftplib.FTP_PORT, "%42parrot", "", "I",
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000655 ("ftp://localhost:80/foo/bar/",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000656 "localhost", 80, "", "", "D",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000657 ["foo", "bar"], "", None),
658 ("ftp://localhost/baz.gif;type=a",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000659 "localhost", ftplib.FTP_PORT, "", "", "A",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000660 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000661 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000662 req = Request(url)
663 req.timeout = None
664 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000665 # ftp authentication not yet implemented by FTPHandler
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000666 self.assertEqual(h.user, user)
667 self.assertEqual(h.passwd, passwd)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000668 self.assertEqual(h.host, socket.gethostbyname(host))
669 self.assertEqual(h.port, port)
670 self.assertEqual(h.dirs, dirs)
671 self.assertEqual(h.ftpwrapper.filename, filename)
672 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000673 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000674 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000675 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000676
677 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000678 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000680 o = h.parent = MockOpener()
681
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000682 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000683 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000684 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000685 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000686 "file://localhost%s" % urlpath,
687 "file://%s" % urlpath,
688 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000689 ]
690 try:
691 localaddr = socket.gethostbyname(socket.gethostname())
692 except socket.gaierror:
693 localaddr = ''
694 if localaddr:
695 urls.append("file://%s%s" % (localaddr, urlpath))
696
697 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000698 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000699 try:
700 try:
701 f.write(towrite)
702 finally:
703 f.close()
704
705 r = h.file_open(Request(url))
706 try:
707 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000708 headers = r.info()
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000709 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000710 finally:
711 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000712 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000713 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000714 finally:
715 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000716 self.assertEqual(data, towrite)
717 self.assertEqual(headers["Content-type"], "text/plain")
718 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000719 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000720 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000721
722 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000723 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000724 "file:///file_does_not_exist.txt",
725 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
726 os.getcwd(), TESTFN),
727 "file://somerandomhost.ontheinternet.com%s/%s" %
728 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000729 ]:
730 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000731 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000732 try:
733 f.write(towrite)
734 finally:
735 f.close()
736
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000737 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000738 h.file_open, Request(url))
739 finally:
740 os.remove(TESTFN)
741
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000742 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000743 o = h.parent = MockOpener()
744 # XXXX why does // mean ftp (and /// mean not ftp!), and where
745 # is file: scheme specified? I think this is really a bug, and
746 # what was intended was to distinguish between URLs like:
747 # file:/blah.txt (a file)
748 # file://localhost/blah.txt (a file)
749 # file:///blah.txt (a file)
750 # file://ftp.example.com/blah.txt (an ftp URL)
751 for url, ftp in [
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000752 ("file://ftp.example.com//foo.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000753 ("file://ftp.example.com///foo.txt", False),
754# XXXX bug: fails with OSError, should be URLError
755 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000756 ("file://somehost//foo/something.txt", False),
Senthil Kumaran2ef16322010-07-11 03:12:43 +0000757 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000758 ]:
759 req = Request(url)
760 try:
761 h.file_open(req)
762 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000763 except (urllib.error.URLError, OSError):
Florent Xicluna419e3842010-08-08 16:16:07 +0000764 self.assertFalse(ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000765 else:
Florent Xicluna419e3842010-08-08 16:16:07 +0000766 self.assertIs(o.req, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000767 self.assertEqual(req.type, "ftp")
Łukasz Langad7e81cc2011-01-09 18:18:53 +0000768 self.assertEqual(req.type == "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000769
770 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000771
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000773 o = h.parent = MockOpener()
774
775 url = "http://example.com/"
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000776 for method, data in [("GET", None), ("POST", b"blah")]:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000777 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000778 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000779 req.add_unredirected_header("Spam", "eggs")
780 http = MockHTTPClass()
781 r = h.do_open(http, req)
782
783 # result attributes
784 r.read; r.readline # wrapped MockFile methods
785 r.info; r.geturl # addinfourl methods
786 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
787 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000788 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000789 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000790
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000791 self.assertEqual(http.host, "example.com")
792 self.assertEqual(http.level, 0)
793 self.assertEqual(http.method, method)
794 self.assertEqual(http.selector, "/")
795 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000796 [("Connection", "close"),
797 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000798 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000799
800 # check socket.error converted to URLError
801 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000803
Senthil Kumaran29333122011-02-11 11:25:47 +0000804 # Check for TypeError on POST data which is str.
805 req = Request("http://example.com/","badpost")
806 self.assertRaises(TypeError, h.do_request_, req)
807
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808 # check adding of standard headers
809 o.addheaders = [("Spam", "eggs")]
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000810 for data in b"", None: # POST, GET
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000811 req = Request("http://example.com/", data)
812 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000813 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000814 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000815 self.assertNotIn("Content-length", req.unredirected_hdrs)
816 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000817 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000818 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
819 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000820 "application/x-www-form-urlencoded")
821 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000822 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
823 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000824
825 # don't clobber existing headers
826 req.add_unredirected_header("Content-length", "foo")
827 req.add_unredirected_header("Content-type", "bar")
828 req.add_unredirected_header("Host", "baz")
829 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000830 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000831 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
832 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000833 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
834 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000835
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000836 # Check iterable body support
837 def iterable_body():
838 yield b"one"
839 yield b"two"
840 yield b"three"
841
842 for headers in {}, {"Content-Length": 11}:
843 req = Request("http://example.com/", iterable_body(), headers)
844 if not headers:
845 # Having an iterable body without a Content-Length should
846 # raise an exception
847 self.assertRaises(ValueError, h.do_request_, req)
848 else:
849 newreq = h.do_request_(req)
850
Senthil Kumaran29333122011-02-11 11:25:47 +0000851 # A file object.
852 # Test only Content-Length attribute of request.
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000853
Senthil Kumaran29333122011-02-11 11:25:47 +0000854 file_obj = io.BytesIO()
855 file_obj.write(b"Something\nSomething\nSomething\n")
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000856
857 for headers in {}, {"Content-Length": 30}:
858 req = Request("http://example.com/", file_obj, headers)
859 if not headers:
860 # Having an iterable body without a Content-Length should
861 # raise an exception
862 self.assertRaises(ValueError, h.do_request_, req)
863 else:
864 newreq = h.do_request_(req)
865 self.assertEqual(int(newreq.get_header('Content-length')),30)
866
867 file_obj.close()
868
869 # array.array Iterable - Content Length is calculated
870
871 iterable_array = array.array("I",[1,2,3,4])
872
873 for headers in {}, {"Content-Length": 16}:
874 req = Request("http://example.com/", iterable_array, headers)
875 newreq = h.do_request_(req)
876 self.assertEqual(int(newreq.get_header('Content-length')),16)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000877
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000878 def test_http_doubleslash(self):
879 # Checks the presence of any unnecessary double slash in url does not
880 # break anything. Previously, a double slash directly after the host
Ezio Melottie130a522011-10-19 10:58:56 +0300881 # could cause incorrect parsing.
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000882 h = urllib.request.AbstractHTTPHandler()
883 o = h.parent = MockOpener()
884
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000885 data = b""
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000886 ds_urls = [
887 "http://example.com/foo/bar/baz.html",
888 "http://example.com//foo/bar/baz.html",
889 "http://example.com/foo//bar/baz.html",
890 "http://example.com/foo/bar//baz.html"
891 ]
892
893 for ds_url in ds_urls:
894 ds_req = Request(ds_url, data)
895
896 # Check whether host is determined correctly if there is no proxy
897 np_ds_req = h.do_request_(ds_req)
898 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
899
900 # Check whether host is determined correctly if there is a proxy
901 ds_req.set_proxy("someproxy:3128",None)
902 p_ds_req = h.do_request_(ds_req)
903 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
904
Senthil Kumaranc2958622010-11-22 04:48:26 +0000905 def test_fixpath_in_weirdurls(self):
906 # Issue4493: urllib2 to supply '/' when to urls where path does not
907 # start with'/'
908
909 h = urllib.request.AbstractHTTPHandler()
910 o = h.parent = MockOpener()
911
912 weird_url = 'http://www.python.org?getspam'
913 req = Request(weird_url)
914 newreq = h.do_request_(req)
915 self.assertEqual(newreq.host,'www.python.org')
916 self.assertEqual(newreq.selector,'/?getspam')
917
918 url_without_path = 'http://www.python.org'
919 req = Request(url_without_path)
920 newreq = h.do_request_(req)
921 self.assertEqual(newreq.host,'www.python.org')
922 self.assertEqual(newreq.selector,'')
923
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000924
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000925 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000927 o = h.parent = MockOpener()
928
929 url = "http://example.com/"
930 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000932 r = MockResponse(200, "OK", {}, "", url)
933 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000934 self.assertIs(r, newr)
935 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 r = MockResponse(202, "Accepted", {}, "", url)
937 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000938 self.assertIs(r, newr)
939 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000940 r = MockResponse(206, "Partial content", {}, "", url)
941 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000942 self.assertIs(r, newr)
943 self.assertFalse(hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000944 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945 r = MockResponse(502, "Bad gateway", {}, "", url)
Florent Xicluna419e3842010-08-08 16:16:07 +0000946 self.assertIsNone(h.http_response(req, r))
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000947 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000949
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000950 def test_cookies(self):
951 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000952 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000953 o = h.parent = MockOpener()
954
955 req = Request("http://example.com/")
956 r = MockResponse(200, "OK", {}, "")
957 newreq = h.http_request(req)
Florent Xicluna419e3842010-08-08 16:16:07 +0000958 self.assertIs(cj.ach_req, req)
959 self.assertIs(cj.ach_req, newreq)
960 self.assertEqual(req.get_origin_req_host(), "example.com")
961 self.assertFalse(req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000962 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000963 self.assertIs(cj.ec_req, req)
964 self.assertIs(cj.ec_r, r)
965 self.assertIs(r, newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000966
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000967 def test_redirect(self):
968 from_url = "http://example.com/a.html"
969 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000970 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000971 o = h.parent = MockOpener()
972
973 # ordinary redirect behaviour
974 for code in 301, 302, 303, 307:
975 for data in None, "blah\nblah\n":
976 method = getattr(h, "http_error_%s" % code)
977 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000978 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000979 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000980 if data is not None:
981 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000982 req.add_unredirected_header("Spam", "spam")
983 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000984 method(req, MockFile(), code, "Blah",
985 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000986 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000987 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000988 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000989 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000990 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000991 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000992 except AttributeError:
Florent Xicluna419e3842010-08-08 16:16:07 +0000993 self.assertFalse(o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000994
995 # now it's a GET, there should not be headers regarding content
996 # (possibly dragged from before being a POST)
997 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +0000998 self.assertNotIn("content-length", headers)
999 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +00001000
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001001 self.assertEqual(o.req.headers["Nonsense"],
1002 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +00001003 self.assertNotIn("Spam", o.req.headers)
1004 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001005
1006 # loop detection
1007 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001008 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001009 def redirect(h, req, url=to_url):
1010 h.http_error_302(req, MockFile(), 302, "Blah",
1011 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001012 # Note that the *original* request shares the same record of
1013 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001014
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001015 # detect infinite loop redirect of a URL to itself
1016 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001017 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001018 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001019 try:
1020 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001021 redirect(h, req, "http://example.com/")
1022 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001024 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001025 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001026
1027 # detect endless non-repeating chain of redirects
1028 req = Request(from_url, origin_req_host="example.com")
1029 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001030 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001031 try:
1032 while 1:
1033 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001034 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001035 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001036 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001037 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001038
guido@google.coma119df92011-03-29 11:41:02 -07001039
1040 def test_invalid_redirect(self):
1041 from_url = "http://example.com/a.html"
1042 valid_schemes = ['http','https','ftp']
1043 invalid_schemes = ['file','imap','ldap']
1044 schemeless_url = "example.com/b.html"
1045 h = urllib.request.HTTPRedirectHandler()
1046 o = h.parent = MockOpener()
1047 req = Request(from_url)
1048 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1049
1050 for scheme in invalid_schemes:
1051 invalid_url = scheme + '://' + schemeless_url
1052 self.assertRaises(urllib.error.HTTPError, h.http_error_302,
1053 req, MockFile(), 302, "Security Loophole",
1054 MockHeaders({"location": invalid_url}))
1055
1056 for scheme in valid_schemes:
1057 valid_url = scheme + '://' + schemeless_url
1058 h.http_error_302(req, MockFile(), 302, "That's fine",
1059 MockHeaders({"location": valid_url}))
1060 self.assertEqual(o.req.get_full_url(), valid_url)
1061
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001062 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001063 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +00001064 from http.cookiejar import CookieJar
1065 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001066
1067 cj = CookieJar()
1068 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001070 hdeh = urllib.request.HTTPDefaultErrorHandler()
1071 hrh = urllib.request.HTTPRedirectHandler()
1072 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001073 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001074 o.open("http://www.example.com/")
Florent Xicluna419e3842010-08-08 16:16:07 +00001075 self.assertFalse(hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001076
Senthil Kumaran26430412011-04-13 07:01:19 +08001077 def test_redirect_fragment(self):
1078 redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
1079 hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
1080 hdeh = urllib.request.HTTPDefaultErrorHandler()
1081 hrh = urllib.request.HTTPRedirectHandler()
1082 o = build_test_opener(hh, hdeh, hrh)
1083 fp = o.open('http://www.example.com')
1084 self.assertEqual(fp.geturl(), redirected_url.strip())
1085
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001086 def test_proxy(self):
1087 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001088 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001089 o.add_handler(ph)
1090 meth_spec = [
1091 [("http_open", "return response")]
1092 ]
1093 handlers = add_ordered_mock_handlers(o, meth_spec)
1094
1095 req = Request("http://acme.example.com/")
1096 self.assertEqual(req.get_host(), "acme.example.com")
1097 r = o.open(req)
1098 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1099
1100 self.assertEqual([(handlers[0], "http_open")],
1101 [tup[0:2] for tup in o.calls])
1102
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001103 def test_proxy_no_proxy(self):
1104 os.environ['no_proxy'] = 'python.org'
1105 o = OpenerDirector()
1106 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1107 o.add_handler(ph)
1108 req = Request("http://www.perl.org/")
1109 self.assertEqual(req.get_host(), "www.perl.org")
1110 r = o.open(req)
1111 self.assertEqual(req.get_host(), "proxy.example.com")
1112 req = Request("http://www.python.org")
1113 self.assertEqual(req.get_host(), "www.python.org")
1114 r = o.open(req)
1115 self.assertEqual(req.get_host(), "www.python.org")
1116 del os.environ['no_proxy']
1117
Ronald Oussorene72e1612011-03-14 18:15:25 -04001118 def test_proxy_no_proxy_all(self):
1119 os.environ['no_proxy'] = '*'
1120 o = OpenerDirector()
1121 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1122 o.add_handler(ph)
1123 req = Request("http://www.python.org")
1124 self.assertEqual(req.get_host(), "www.python.org")
1125 r = o.open(req)
1126 self.assertEqual(req.get_host(), "www.python.org")
1127 del os.environ['no_proxy']
1128
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001129
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001130 def test_proxy_https(self):
1131 o = OpenerDirector()
1132 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
1133 o.add_handler(ph)
1134 meth_spec = [
1135 [("https_open", "return response")]
1136 ]
1137 handlers = add_ordered_mock_handlers(o, meth_spec)
1138
1139 req = Request("https://www.example.com/")
1140 self.assertEqual(req.get_host(), "www.example.com")
1141 r = o.open(req)
1142 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1143 self.assertEqual([(handlers[0], "https_open")],
1144 [tup[0:2] for tup in o.calls])
1145
Senthil Kumaran47fff872009-12-20 07:10:31 +00001146 def test_proxy_https_proxy_authorization(self):
1147 o = OpenerDirector()
1148 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1149 o.add_handler(ph)
1150 https_handler = MockHTTPSHandler()
1151 o.add_handler(https_handler)
1152 req = Request("https://www.example.com/")
1153 req.add_header("Proxy-Authorization","FooBar")
1154 req.add_header("User-Agent","Grail")
1155 self.assertEqual(req.get_host(), "www.example.com")
1156 self.assertIsNone(req._tunnel_host)
1157 r = o.open(req)
1158 # Verify Proxy-Authorization gets tunneled to request.
1159 # httpsconn req_headers do not have the Proxy-Authorization header but
1160 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001161 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001162 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001163 self.assertIn(("User-Agent","Grail"),
1164 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001165 self.assertIsNotNone(req._tunnel_host)
1166 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1167 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001168
Ronald Oussorene72e1612011-03-14 18:15:25 -04001169 def test_osx_proxy_bypass(self):
1170 bypass = {
1171 'exclude_simple': False,
1172 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10',
1173 '10.0/16']
1174 }
1175 # Check hosts that should trigger the proxy bypass
1176 for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1',
1177 '10.0.0.1'):
1178 self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass),
1179 'expected bypass of %s to be True' % host)
1180 # Check hosts that should not trigger the proxy bypass
1181 for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', 'test'):
1182 self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass),
1183 'expected bypass of %s to be False' % host)
1184
1185 # Check the exclude_simple flag
1186 bypass = {'exclude_simple': True, 'exceptions': []}
1187 self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass))
1188
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001189 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001190 opener = OpenerDirector()
1191 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001192 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001193 realm = "ACME Widget Store"
1194 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001195 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1196 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001197 opener.add_handler(auth_handler)
1198 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001199 self._test_basic_auth(opener, auth_handler, "Authorization",
1200 realm, http_handler, password_manager,
1201 "http://acme.example.com/protected",
1202 "http://acme.example.com/protected",
1203 )
1204
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001205 def test_basic_auth_with_single_quoted_realm(self):
1206 self.test_basic_auth(quote_char="'")
1207
Thomas Wouters477c8d52006-05-27 19:21:47 +00001208 def test_proxy_basic_auth(self):
1209 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001210 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001211 opener.add_handler(ph)
1212 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001213 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001214 realm = "ACME Networks"
1215 http_handler = MockHTTPHandler(
1216 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001217 opener.add_handler(auth_handler)
1218 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001219 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001220 realm, http_handler, password_manager,
1221 "http://acme.example.com:3128/protected",
1222 "proxy.example.com:3128",
1223 )
1224
1225 def test_basic_and_digest_auth_handlers(self):
1226 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1227 # response (http://python.org/sf/1479302), where it should instead
1228 # return None to allow another handler (especially
1229 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001230
1231 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1232 # try digest first (since it's the strongest auth scheme), so we record
1233 # order of calls here to check digest comes first:
1234 class RecordingOpenerDirector(OpenerDirector):
1235 def __init__(self):
1236 OpenerDirector.__init__(self)
1237 self.recorded = []
1238 def record(self, info):
1239 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001240 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001241 def http_error_401(self, *args, **kwds):
1242 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001243 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001244 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001245 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001246 def http_error_401(self, *args, **kwds):
1247 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001248 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001249 *args, **kwds)
1250
1251 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001252 password_manager = MockPasswordManager()
1253 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001254 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001255 realm = "ACME Networks"
1256 http_handler = MockHTTPHandler(
1257 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001258 opener.add_handler(basic_handler)
1259 opener.add_handler(digest_handler)
1260 opener.add_handler(http_handler)
1261
1262 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001263 self._test_basic_auth(opener, basic_handler, "Authorization",
1264 realm, http_handler, password_manager,
1265 "http://acme.example.com/protected",
1266 "http://acme.example.com/protected",
1267 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001268 # check digest was tried before basic (twice, because
1269 # _test_basic_auth called .open() twice)
1270 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001271
1272 def _test_basic_auth(self, opener, auth_handler, auth_header,
1273 realm, http_handler, password_manager,
1274 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001275 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001276 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277
1278 # .add_password() fed through to password manager
1279 auth_handler.add_password(realm, request_url, user, password)
1280 self.assertEqual(realm, password_manager.realm)
1281 self.assertEqual(request_url, password_manager.url)
1282 self.assertEqual(user, password_manager.user)
1283 self.assertEqual(password, password_manager.password)
1284
1285 r = opener.open(request_url)
1286
1287 # should have asked the password manager for the username/password
1288 self.assertEqual(password_manager.target_realm, realm)
1289 self.assertEqual(password_manager.target_url, protected_url)
1290
1291 # expect one request without authorization, then one with
1292 self.assertEqual(len(http_handler.requests), 2)
1293 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001294 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001295 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001296 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001297 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1298 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001299 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1300 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001301 # if the password manager can't find a password, the handler won't
1302 # handle the HTTP auth error
1303 password_manager.user = password_manager.password = None
1304 http_handler.reset()
1305 r = opener.open(request_url)
1306 self.assertEqual(len(http_handler.requests), 1)
1307 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1308
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001309class MiscTests(unittest.TestCase):
1310
1311 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001312 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1313 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001314 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001315 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001316 def bar_open(self): pass
1317
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001318 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001319
1320 o = build_opener(FooHandler, BarHandler)
1321 self.opener_has_handler(o, FooHandler)
1322 self.opener_has_handler(o, BarHandler)
1323
1324 # can take a mix of classes and instances
1325 o = build_opener(FooHandler, BarHandler())
1326 self.opener_has_handler(o, FooHandler)
1327 self.opener_has_handler(o, BarHandler)
1328
1329 # subclasses of default handlers override default handlers
1330 o = build_opener(MyHTTPHandler)
1331 self.opener_has_handler(o, MyHTTPHandler)
1332
1333 # a particular case of overriding: default handlers can be passed
1334 # in explicitly
1335 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001336 self.opener_has_handler(o, urllib.request.HTTPHandler)
1337 o = build_opener(urllib.request.HTTPHandler)
1338 self.opener_has_handler(o, urllib.request.HTTPHandler)
1339 o = build_opener(urllib.request.HTTPHandler())
1340 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001341
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001342 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001343 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001344 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1345 self.opener_has_handler(o, MyHTTPHandler)
1346 self.opener_has_handler(o, MyOtherHTTPHandler)
1347
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001348 def opener_has_handler(self, opener, handler_class):
Florent Xicluna419e3842010-08-08 16:16:07 +00001349 self.assertTrue(any(h.__class__ == handler_class
1350 for h in opener.handlers))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001351
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001352class RequestTests(unittest.TestCase):
1353
1354 def setUp(self):
1355 self.get = Request("http://www.python.org/~jeremy/")
1356 self.post = Request("http://www.python.org/~jeremy/",
1357 "data",
1358 headers={"X-Test": "test"})
1359
1360 def test_method(self):
1361 self.assertEqual("POST", self.post.get_method())
1362 self.assertEqual("GET", self.get.get_method())
1363
1364 def test_add_data(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001365 self.assertFalse(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001366 self.assertEqual("GET", self.get.get_method())
1367 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001368 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001369 self.assertEqual("POST", self.get.get_method())
1370
1371 def test_get_full_url(self):
1372 self.assertEqual("http://www.python.org/~jeremy/",
1373 self.get.get_full_url())
1374
1375 def test_selector(self):
1376 self.assertEqual("/~jeremy/", self.get.get_selector())
1377 req = Request("http://www.python.org/")
1378 self.assertEqual("/", req.get_selector())
1379
1380 def test_get_type(self):
1381 self.assertEqual("http", self.get.get_type())
1382
1383 def test_get_host(self):
1384 self.assertEqual("www.python.org", self.get.get_host())
1385
1386 def test_get_host_unquote(self):
1387 req = Request("http://www.%70ython.org/")
1388 self.assertEqual("www.python.org", req.get_host())
1389
1390 def test_proxy(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001391 self.assertFalse(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001392 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001393 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001394 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1395 self.assertEqual("www.perl.org", self.get.get_host())
1396
Senthil Kumarand95cc752010-08-08 11:27:53 +00001397 def test_wrapped_url(self):
1398 req = Request("<URL:http://www.python.org>")
1399 self.assertEqual("www.python.org", req.get_host())
1400
Senthil Kumaran26430412011-04-13 07:01:19 +08001401 def test_url_fragment(self):
Senthil Kumarand95cc752010-08-08 11:27:53 +00001402 req = Request("http://www.python.org/?qs=query#fragment=true")
1403 self.assertEqual("/?qs=query", req.get_selector())
1404 req = Request("http://www.python.org/#fun=true")
1405 self.assertEqual("/", req.get_selector())
1406
Senthil Kumaran26430412011-04-13 07:01:19 +08001407 # Issue 11703: geturl() omits fragment in the original URL.
1408 url = 'http://docs.python.org/library/urllib2.html#OK'
1409 req = Request(url)
1410 self.assertEqual(req.get_full_url(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001411
1412def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001413 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001414 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001415 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001416 tests = (TrivialTests,
1417 OpenerDirectorTests,
1418 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001419 MiscTests,
1420 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001421 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001422
1423if __name__ == "__main__":
1424 test_main(verbose=True)