blob: f77537d61ecc67d08b5243213b44ad8046c45b4e [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000024
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000025 # And more hacking to get it to work on MacOS. This assumes
26 # urllib.pathname2url works, unfortunately...
27 if os.name == 'mac':
28 fname = '/' + fname.replace(':', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000029
Senthil Kumarand587e302010-01-10 17:45:52 +000030 if os.name == 'nt':
31 file_url = "file:///%s" % fname
32 else:
33 file_url = "file://%s" % fname
34
Jeremy Hylton1afc1692008-06-18 20:49:58 +000035 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000036
37 buf = f.read()
38 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000039
Georg Brandle1b13d22005-08-24 22:20:32 +000040 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 tests = [
42 ('a,b,c', ['a', 'b', 'c']),
43 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
44 ('a, b, "c", "d", "e,f", g, h',
45 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
46 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
47 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000048 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000050
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000051
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052def test_request_headers_dict():
53 """
54 The Request.headers dictionary is not a documented interface. It should
55 stay that way, because the complete set of headers are only accessible
56 through the .get_header(), .has_header(), .header_items() interface.
57 However, .headers pre-dates those methods, and so real code will be using
58 the dictionary.
59
60 The introduction in 2.4 of those methods was a mistake for the same reason:
61 code that previously saw all (urllib2 user)-provided headers in .headers
62 now sees only a subset (and the function interface is ugly and incomplete).
63 A better change would have been to replace .headers dict with a dict
64 subclass (or UserDict.DictMixin instance?) that preserved the .headers
65 interface and also provided access to the "unredirected" headers. It's
66 probably too late to fix that, though.
67
68
69 Check .capitalize() case normalization:
70
71 >>> url = "http://example.com"
72 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
73 'blah'
74 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
75 'blah'
76
77 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
78 but that could be changed in future.
79
80 """
81
82def test_request_headers_methods():
83 """
84 Note the case normalization of header names here, to .capitalize()-case.
85 This should be preserved for backwards-compatibility. (In the HTTP case,
86 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000087 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000088
89 >>> url = "http://example.com"
90 >>> r = Request(url, headers={"Spam-eggs": "blah"})
91 >>> r.has_header("Spam-eggs")
92 True
93 >>> r.header_items()
94 [('Spam-eggs', 'blah')]
95 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000096 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000097 >>> items
98 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
99
100 Note that e.g. r.has_header("spam-EggS") is currently False, and
101 r.get_header("spam-EggS") returns None, but that could be changed in
102 future.
103
104 >>> r.has_header("Not-there")
105 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000106 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000107 None
108 >>> r.get_header("Not-there", "default")
109 'default'
110
111 """
112
113
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114def test_password_manager(self):
115 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000117 >>> add = mgr.add_password
118 >>> add("Some Realm", "http://example.com/", "joe", "password")
119 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
120 >>> add("c", "http://example.com/foo", "foo", "ni")
121 >>> add("c", "http://example.com/bar", "bar", "nini")
122 >>> add("b", "http://example.com/", "first", "blah")
123 >>> add("b", "http://example.com/", "second", "spam")
124 >>> add("a", "http://example.com", "1", "a")
125 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
126 >>> add("Some Realm", "d.example.com", "4", "d")
127 >>> add("Some Realm", "e.example.com:3128", "5", "e")
128
129 >>> mgr.find_user_password("Some Realm", "example.com")
130 ('joe', 'password')
131 >>> mgr.find_user_password("Some Realm", "http://example.com")
132 ('joe', 'password')
133 >>> mgr.find_user_password("Some Realm", "http://example.com/")
134 ('joe', 'password')
135 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
136 ('joe', 'password')
137 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
138 ('joe', 'password')
139 >>> mgr.find_user_password("c", "http://example.com/foo")
140 ('foo', 'ni')
141 >>> mgr.find_user_password("c", "http://example.com/bar")
142 ('bar', 'nini')
143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144 Actually, this is really undefined ATM
145## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000147## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
148## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000149
150 Use latest add_password() in case of conflict:
151
152 >>> mgr.find_user_password("b", "http://example.com/")
153 ('second', 'spam')
154
155 No special relationship between a.example.com and example.com:
156
157 >>> mgr.find_user_password("a", "http://example.com/")
158 ('1', 'a')
159 >>> mgr.find_user_password("a", "http://a.example.com/")
160 (None, None)
161
162 Ports:
163
164 >>> mgr.find_user_password("Some Realm", "c.example.com")
165 (None, None)
166 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
167 ('3', 'c')
168 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
169 ('3', 'c')
170 >>> mgr.find_user_password("Some Realm", "d.example.com")
171 ('4', 'd')
172 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
173 ('5', 'e')
174
175 """
176 pass
177
178
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179def test_password_manager_default_port(self):
180 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000181 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000182 >>> add = mgr.add_password
183
184 The point to note here is that we can't guess the default port if there's
185 no scheme. This applies to both add_password and find_user_password.
186
187 >>> add("f", "http://g.example.com:80", "10", "j")
188 >>> add("g", "http://h.example.com", "11", "k")
189 >>> add("h", "i.example.com:80", "12", "l")
190 >>> add("i", "j.example.com", "13", "m")
191 >>> mgr.find_user_password("f", "g.example.com:100")
192 (None, None)
193 >>> mgr.find_user_password("f", "g.example.com:80")
194 ('10', 'j')
195 >>> mgr.find_user_password("f", "g.example.com")
196 (None, None)
197 >>> mgr.find_user_password("f", "http://g.example.com:100")
198 (None, None)
199 >>> mgr.find_user_password("f", "http://g.example.com:80")
200 ('10', 'j')
201 >>> mgr.find_user_password("f", "http://g.example.com")
202 ('10', 'j')
203 >>> mgr.find_user_password("g", "h.example.com")
204 ('11', 'k')
205 >>> mgr.find_user_password("g", "h.example.com:80")
206 ('11', 'k')
207 >>> mgr.find_user_password("g", "http://h.example.com:80")
208 ('11', 'k')
209 >>> mgr.find_user_password("h", "i.example.com")
210 (None, None)
211 >>> mgr.find_user_password("h", "i.example.com:80")
212 ('12', 'l')
213 >>> mgr.find_user_password("h", "http://i.example.com:80")
214 ('12', 'l')
215 >>> mgr.find_user_password("i", "j.example.com")
216 ('13', 'm')
217 >>> mgr.find_user_password("i", "j.example.com:80")
218 (None, None)
219 >>> mgr.find_user_password("i", "http://j.example.com")
220 ('13', 'm')
221 >>> mgr.find_user_password("i", "http://j.example.com:80")
222 (None, None)
223
224 """
225
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000226class MockOpener:
227 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000228 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
229 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000230 def error(self, proto, *args):
231 self.proto, self.args = proto, args
232
233class MockFile:
234 def read(self, count=None): pass
235 def readline(self, count=None): pass
236 def close(self): pass
237
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238class MockHeaders(dict):
239 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000240 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000241
Guido van Rossum34d19282007-08-09 01:03:29 +0000242class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000243 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000244 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000245 self.code, self.msg, self.headers, self.url = code, msg, headers, url
246 def info(self):
247 return self.headers
248 def geturl(self):
249 return self.url
250
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000251class MockCookieJar:
252 def add_cookie_header(self, request):
253 self.ach_req = request
254 def extract_cookies(self, response, request):
255 self.ec_req, self.ec_r = request, response
256
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000257class FakeMethod:
258 def __init__(self, meth_name, action, handle):
259 self.meth_name = meth_name
260 self.handle = handle
261 self.action = action
262 def __call__(self, *args):
263 return self.handle(self.meth_name, self.action, *args)
264
Senthil Kumaran47fff872009-12-20 07:10:31 +0000265class MockHTTPResponse(io.IOBase):
266 def __init__(self, fp, msg, status, reason):
267 self.fp = fp
268 self.msg = msg
269 self.status = status
270 self.reason = reason
271 self.code = 200
272
273 def read(self):
274 return ''
275
276 def info(self):
277 return {}
278
279 def geturl(self):
280 return self.url
281
282
283class MockHTTPClass:
284 def __init__(self):
285 self.level = 0
286 self.req_headers = []
287 self.data = None
288 self.raise_on_endheaders = False
289 self._tunnel_headers = {}
290
291 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
292 self.host = host
293 self.timeout = timeout
294 return self
295
296 def set_debuglevel(self, level):
297 self.level = level
298
299 def set_tunnel(self, host, port=None, headers=None):
300 self._tunnel_host = host
301 self._tunnel_port = port
302 if headers:
303 self._tunnel_headers = headers
304 else:
305 self._tunnel_headers.clear()
306
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000307 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000308 self.method = method
309 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000310 if headers is not None:
311 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000312 self.req_headers.sort()
313 if body:
314 self.data = body
315 if self.raise_on_endheaders:
316 import socket
317 raise socket.error()
318 def getresponse(self):
319 return MockHTTPResponse(MockFile(), {}, 200, "OK")
320
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000321class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000322 # useful for testing handler machinery
323 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000324 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000325 def __init__(self, methods):
326 self._define_methods(methods)
327 def _define_methods(self, methods):
328 for spec in methods:
329 if len(spec) == 2: name, action = spec
330 else: name, action = spec, None
331 meth = FakeMethod(name, action, self.handle)
332 setattr(self.__class__, name, meth)
333 def handle(self, fn_name, action, *args, **kwds):
334 self.parent.calls.append((self, fn_name, args, kwds))
335 if action is None:
336 return None
337 elif action == "return self":
338 return self
339 elif action == "return response":
340 res = MockResponse(200, "OK", {}, "")
341 return res
342 elif action == "return request":
343 return Request("http://blah/")
344 elif action.startswith("error"):
345 code = action[action.rfind(" ")+1:]
346 try:
347 code = int(code)
348 except ValueError:
349 pass
350 res = MockResponse(200, "OK", {}, "")
351 return self.parent.error("http", args[0], res, code, "", {})
352 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000353 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000354 assert False
355 def close(self): pass
356 def add_parent(self, parent):
357 self.parent = parent
358 self.parent.calls = []
359 def __lt__(self, other):
360 if not hasattr(other, "handler_order"):
361 # No handler_order, leave in original order. Yuck.
362 return True
363 return self.handler_order < other.handler_order
364
365def add_ordered_mock_handlers(opener, meth_spec):
366 """Create MockHandlers and add them to an OpenerDirector.
367
368 meth_spec: list of lists of tuples and strings defining methods to define
369 on handlers. eg:
370
371 [["http_error", "ftp_open"], ["http_open"]]
372
373 defines methods .http_error() and .ftp_open() on one handler, and
374 .http_open() on another. These methods just record their arguments and
375 return None. Using a tuple instead of a string causes the method to
376 perform some action (see MockHandler.handle()), eg:
377
378 [["http_error"], [("http_open", "return request")]]
379
380 defines .http_error() on one handler (which simply returns None), and
381 .http_open() on another handler, which returns a Request object.
382
383 """
384 handlers = []
385 count = 0
386 for meths in meth_spec:
387 class MockHandlerSubclass(MockHandler): pass
388 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000389 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000390 h.add_parent(opener)
391 count = count + 1
392 handlers.append(h)
393 opener.add_handler(h)
394 return handlers
395
Thomas Wouters477c8d52006-05-27 19:21:47 +0000396def build_test_opener(*handler_instances):
397 opener = OpenerDirector()
398 for h in handler_instances:
399 opener.add_handler(h)
400 return opener
401
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000402class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000403 # useful for testing redirections and auth
404 # sends supplied headers and code as first response
405 # sends 200 OK as second response
406 def __init__(self, code, headers):
407 self.code = code
408 self.headers = headers
409 self.reset()
410 def reset(self):
411 self._count = 0
412 self.requests = []
413 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000414 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000415 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416 self.requests.append(copy.deepcopy(req))
417 if self._count == 0:
418 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000419 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000420 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000421 return self.parent.error(
422 "http", req, MockFile(), self.code, name, msg)
423 else:
424 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000425 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000426 return MockResponse(200, "OK", msg, "", req.get_full_url())
427
Senthil Kumaran47fff872009-12-20 07:10:31 +0000428class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
429 # Useful for testing the Proxy-Authorization request by verifying the
430 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000431
432 def __init__(self):
433 urllib.request.AbstractHTTPHandler.__init__(self)
434 self.httpconn = MockHTTPClass()
435
Senthil Kumaran47fff872009-12-20 07:10:31 +0000436 def https_open(self, req):
437 return self.do_open(self.httpconn, req)
438
Thomas Wouters477c8d52006-05-27 19:21:47 +0000439class MockPasswordManager:
440 def add_password(self, realm, uri, user, password):
441 self.realm = realm
442 self.url = uri
443 self.user = user
444 self.password = password
445 def find_user_password(self, realm, authuri):
446 self.target_realm = realm
447 self.target_url = authuri
448 return self.user, self.password
449
450
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000451class OpenerDirectorTests(unittest.TestCase):
452
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000453 def test_add_non_handler(self):
454 class NonHandler(object):
455 pass
456 self.assertRaises(TypeError,
457 OpenerDirector().add_handler, NonHandler())
458
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000459 def test_badly_named_methods(self):
460 # test work-around for three methods that accidentally follow the
461 # naming conventions for handler methods
462 # (*_open() / *_request() / *_response())
463
464 # These used to call the accidentally-named methods, causing a
465 # TypeError in real code; here, returning self from these mock
466 # methods would either cause no exception, or AttributeError.
467
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000468 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000469
470 o = OpenerDirector()
471 meth_spec = [
472 [("do_open", "return self"), ("proxy_open", "return self")],
473 [("redirect_request", "return self")],
474 ]
475 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000476 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000477 for scheme in "do", "proxy", "redirect":
478 self.assertRaises(URLError, o.open, scheme+"://example.com/")
479
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000480 def test_handled(self):
481 # handler returning non-None means no more handlers will be called
482 o = OpenerDirector()
483 meth_spec = [
484 ["http_open", "ftp_open", "http_error_302"],
485 ["ftp_open"],
486 [("http_open", "return self")],
487 [("http_open", "return self")],
488 ]
489 handlers = add_ordered_mock_handlers(o, meth_spec)
490
491 req = Request("http://example.com/")
492 r = o.open(req)
493 # Second .http_open() gets called, third doesn't, since second returned
494 # non-None. Handlers without .http_open() never get any methods called
495 # on them.
496 # In fact, second mock handler defining .http_open() returns self
497 # (instead of response), which becomes the OpenerDirector's return
498 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000499 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000500 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
501 for expected, got in zip(calls, o.calls):
502 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000503 self.assertEqual((handler, name), expected)
504 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000505
506 def test_handler_order(self):
507 o = OpenerDirector()
508 handlers = []
509 for meths, handler_order in [
510 ([("http_open", "return self")], 500),
511 (["http_open"], 0),
512 ]:
513 class MockHandlerSubclass(MockHandler): pass
514 h = MockHandlerSubclass(meths)
515 h.handler_order = handler_order
516 handlers.append(h)
517 o.add_handler(h)
518
519 r = o.open("http://example.com/")
520 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000521 self.assertEqual(o.calls[0][0], handlers[1])
522 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000523
524 def test_raise(self):
525 # raising URLError stops processing of request
526 o = OpenerDirector()
527 meth_spec = [
528 [("http_open", "raise")],
529 [("http_open", "return self")],
530 ]
531 handlers = add_ordered_mock_handlers(o, meth_spec)
532
533 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000534 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000535 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000536
537## def test_error(self):
538## # XXX this doesn't actually seem to be used in standard library,
539## # but should really be tested anyway...
540
541 def test_http_error(self):
542 # XXX http_error_default
543 # http errors are a special case
544 o = OpenerDirector()
545 meth_spec = [
546 [("http_open", "error 302")],
547 [("http_error_400", "raise"), "http_open"],
548 [("http_error_302", "return response"), "http_error_303",
549 "http_error"],
550 [("http_error_302")],
551 ]
552 handlers = add_ordered_mock_handlers(o, meth_spec)
553
554 class Unknown:
555 def __eq__(self, other): return True
556
557 req = Request("http://example.com/")
558 r = o.open(req)
559 assert len(o.calls) == 2
560 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000561 (handlers[2], "http_error_302",
562 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000563 for expected, got in zip(calls, o.calls):
564 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000565 self.assertEqual((handler, method_name), got[:2])
566 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000567
568 def test_processors(self):
569 # *_request / *_response methods get called appropriately
570 o = OpenerDirector()
571 meth_spec = [
572 [("http_request", "return request"),
573 ("http_response", "return response")],
574 [("http_request", "return request"),
575 ("http_response", "return response")],
576 ]
577 handlers = add_ordered_mock_handlers(o, meth_spec)
578
579 req = Request("http://example.com/")
580 r = o.open(req)
581 # processor methods are called on *all* handlers that define them,
582 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000583 calls = [
584 (handlers[0], "http_request"), (handlers[1], "http_request"),
585 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000586
587 for i, (handler, name, args, kwds) in enumerate(o.calls):
588 if i < 2:
589 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000590 self.assertEqual((handler, name), calls[i])
591 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000592 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 else:
594 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000595 self.assertEqual((handler, name), calls[i])
596 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000597 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000598 # response from opener.open is None, because there's no
599 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000600 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000601 isinstance(args[1], MockResponse))
602
603
Tim Peters58eb11c2004-01-18 20:29:55 +0000604def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000605 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000606 if os.name == "nt" and urlpath.startswith("///"):
607 urlpath = urlpath[2:]
608 # XXX don't ask me about the mac...
609 return urlpath
610
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000611class HandlerTests(unittest.TestCase):
612
613 def test_ftp(self):
614 class MockFTPWrapper:
615 def __init__(self, data): self.data = data
616 def retrfile(self, filename, filetype):
617 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000618 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000619
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000620 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000621 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000622 def connect_ftp(self, user, passwd, host, port, dirs,
623 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000624 self.user, self.passwd = user, passwd
625 self.host, self.port = host, port
626 self.dirs = dirs
627 self.ftpwrapper = MockFTPWrapper(self.data)
628 return self.ftpwrapper
629
Georg Brandlf78e02b2008-06-10 17:40:04 +0000630 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000631 data = "rheum rhaponicum"
632 h = NullFTPHandler(data)
633 o = h.parent = MockOpener()
634
635 for url, host, port, type_, dirs, filename, mimetype in [
636 ("ftp://localhost/foo/bar/baz.html",
637 "localhost", ftplib.FTP_PORT, "I",
638 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000639 ("ftp://localhost:80/foo/bar/",
640 "localhost", 80, "D",
641 ["foo", "bar"], "", None),
642 ("ftp://localhost/baz.gif;type=a",
643 "localhost", ftplib.FTP_PORT, "A",
644 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000645 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000646 req = Request(url)
647 req.timeout = None
648 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000649 # ftp authentication not yet implemented by FTPHandler
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000650 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000651 self.assertEqual(h.host, socket.gethostbyname(host))
652 self.assertEqual(h.port, port)
653 self.assertEqual(h.dirs, dirs)
654 self.assertEqual(h.ftpwrapper.filename, filename)
655 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000656 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000657 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000658 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000659
660 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000661 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000662 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000663 o = h.parent = MockOpener()
664
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000665 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000666 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000667 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000668 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000669 "file://localhost%s" % urlpath,
670 "file://%s" % urlpath,
671 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000672 ]
673 try:
674 localaddr = socket.gethostbyname(socket.gethostname())
675 except socket.gaierror:
676 localaddr = ''
677 if localaddr:
678 urls.append("file://%s%s" % (localaddr, urlpath))
679
680 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000681 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000682 try:
683 try:
684 f.write(towrite)
685 finally:
686 f.close()
687
688 r = h.file_open(Request(url))
689 try:
690 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000691 headers = r.info()
692 newurl = r.geturl()
693 finally:
694 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000695 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000696 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000697 finally:
698 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000699 self.assertEqual(data, towrite)
700 self.assertEqual(headers["Content-type"], "text/plain")
701 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000702 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000703
704 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000705 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000706 "file:///file_does_not_exist.txt",
707 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
708 os.getcwd(), TESTFN),
709 "file://somerandomhost.ontheinternet.com%s/%s" %
710 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000711 ]:
712 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000713 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000714 try:
715 f.write(towrite)
716 finally:
717 f.close()
718
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000719 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000720 h.file_open, Request(url))
721 finally:
722 os.remove(TESTFN)
723
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000724 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000725 o = h.parent = MockOpener()
726 # XXXX why does // mean ftp (and /// mean not ftp!), and where
727 # is file: scheme specified? I think this is really a bug, and
728 # what was intended was to distinguish between URLs like:
729 # file:/blah.txt (a file)
730 # file://localhost/blah.txt (a file)
731 # file:///blah.txt (a file)
732 # file://ftp.example.com/blah.txt (an ftp URL)
733 for url, ftp in [
734 ("file://ftp.example.com//foo.txt", True),
735 ("file://ftp.example.com///foo.txt", False),
736# XXXX bug: fails with OSError, should be URLError
737 ("file://ftp.example.com/foo.txt", False),
738 ]:
739 req = Request(url)
740 try:
741 h.file_open(req)
742 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000743 except (urllib.error.URLError, OSError):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000744 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000745 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000746 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000747 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000748
749 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000750
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000751 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000752 o = h.parent = MockOpener()
753
754 url = "http://example.com/"
755 for method, data in [("GET", None), ("POST", "blah")]:
756 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000757 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000758 req.add_unredirected_header("Spam", "eggs")
759 http = MockHTTPClass()
760 r = h.do_open(http, req)
761
762 # result attributes
763 r.read; r.readline # wrapped MockFile methods
764 r.info; r.geturl # addinfourl methods
765 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
766 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000767 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000768 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000769
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000770 self.assertEqual(http.host, "example.com")
771 self.assertEqual(http.level, 0)
772 self.assertEqual(http.method, method)
773 self.assertEqual(http.selector, "/")
774 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000775 [("Connection", "close"),
776 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000777 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000778
779 # check socket.error converted to URLError
780 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000781 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000782
783 # check adding of standard headers
784 o.addheaders = [("Spam", "eggs")]
785 for data in "", None: # POST, GET
786 req = Request("http://example.com/", data)
787 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000788 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000789 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000790 self.assertNotIn("Content-length", req.unredirected_hdrs)
791 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000792 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000793 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
794 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000795 "application/x-www-form-urlencoded")
796 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000797 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
798 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000799
800 # don't clobber existing headers
801 req.add_unredirected_header("Content-length", "foo")
802 req.add_unredirected_header("Content-type", "bar")
803 req.add_unredirected_header("Host", "baz")
804 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000805 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000806 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
807 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000808 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
809 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000810
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000811 def test_http_doubleslash(self):
812 # Checks the presence of any unnecessary double slash in url does not
813 # break anything. Previously, a double slash directly after the host
814 # could could cause incorrect parsing.
815 h = urllib.request.AbstractHTTPHandler()
816 o = h.parent = MockOpener()
817
818 data = ""
819 ds_urls = [
820 "http://example.com/foo/bar/baz.html",
821 "http://example.com//foo/bar/baz.html",
822 "http://example.com/foo//bar/baz.html",
823 "http://example.com/foo/bar//baz.html"
824 ]
825
826 for ds_url in ds_urls:
827 ds_req = Request(ds_url, data)
828
829 # Check whether host is determined correctly if there is no proxy
830 np_ds_req = h.do_request_(ds_req)
831 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
832
833 # Check whether host is determined correctly if there is a proxy
834 ds_req.set_proxy("someproxy:3128",None)
835 p_ds_req = h.do_request_(ds_req)
836 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
837
838
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000839 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000840 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000841 o = h.parent = MockOpener()
842
843 url = "http://example.com/"
844 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000846 r = MockResponse(200, "OK", {}, "", url)
847 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000848 self.assertTrue(r is newr)
849 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 r = MockResponse(202, "Accepted", {}, "", url)
851 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000852 self.assertTrue(r is newr)
853 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854 r = MockResponse(206, "Partial content", {}, "", url)
855 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000856 self.assertTrue(r is newr)
857 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000858 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000860 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000861 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000863
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000864 def test_cookies(self):
865 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000866 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000867 o = h.parent = MockOpener()
868
869 req = Request("http://example.com/")
870 r = MockResponse(200, "OK", {}, "")
871 newreq = h.http_request(req)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000872 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000873 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000874 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000875 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000876 self.assertTrue(cj.ec_req is req)
877 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000878
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000879 def test_redirect(self):
880 from_url = "http://example.com/a.html"
881 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000882 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000883 o = h.parent = MockOpener()
884
885 # ordinary redirect behaviour
886 for code in 301, 302, 303, 307:
887 for data in None, "blah\nblah\n":
888 method = getattr(h, "http_error_%s" % code)
889 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000890 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000891 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000892 if data is not None:
893 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000894 req.add_unredirected_header("Spam", "spam")
895 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000896 method(req, MockFile(), code, "Blah",
897 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000898 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000899 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000900 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000901 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000902 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000903 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000904 except AttributeError:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000905 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000906
907 # now it's a GET, there should not be headers regarding content
908 # (possibly dragged from before being a POST)
909 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +0000910 self.assertNotIn("content-length", headers)
911 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +0000912
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000913 self.assertEqual(o.req.headers["Nonsense"],
914 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +0000915 self.assertNotIn("Spam", o.req.headers)
916 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000917
918 # loop detection
919 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000920 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000921 def redirect(h, req, url=to_url):
922 h.http_error_302(req, MockFile(), 302, "Blah",
923 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000924 # Note that the *original* request shares the same record of
925 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000926
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000927 # detect infinite loop redirect of a URL to itself
928 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000929 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000930 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000931 try:
932 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000933 redirect(h, req, "http://example.com/")
934 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000935 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000936 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000937 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000938
939 # detect endless non-repeating chain of redirects
940 req = Request(from_url, origin_req_host="example.com")
941 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000942 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000943 try:
944 while 1:
945 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000946 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000947 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000948 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000949 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000950
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000951 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000952 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000953 from http.cookiejar import CookieJar
954 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000955
956 cj = CookieJar()
957 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000958 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000959 hdeh = urllib.request.HTTPDefaultErrorHandler()
960 hrh = urllib.request.HTTPRedirectHandler()
961 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000962 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000963 o.open("http://www.example.com/")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000964 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000965
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000966 def test_proxy(self):
967 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000968 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000969 o.add_handler(ph)
970 meth_spec = [
971 [("http_open", "return response")]
972 ]
973 handlers = add_ordered_mock_handlers(o, meth_spec)
974
975 req = Request("http://acme.example.com/")
976 self.assertEqual(req.get_host(), "acme.example.com")
977 r = o.open(req)
978 self.assertEqual(req.get_host(), "proxy.example.com:3128")
979
980 self.assertEqual([(handlers[0], "http_open")],
981 [tup[0:2] for tup in o.calls])
982
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000983 def test_proxy_no_proxy(self):
984 os.environ['no_proxy'] = 'python.org'
985 o = OpenerDirector()
986 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
987 o.add_handler(ph)
988 req = Request("http://www.perl.org/")
989 self.assertEqual(req.get_host(), "www.perl.org")
990 r = o.open(req)
991 self.assertEqual(req.get_host(), "proxy.example.com")
992 req = Request("http://www.python.org")
993 self.assertEqual(req.get_host(), "www.python.org")
994 r = o.open(req)
995 self.assertEqual(req.get_host(), "www.python.org")
996 del os.environ['no_proxy']
997
998
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000999 def test_proxy_https(self):
1000 o = OpenerDirector()
1001 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
1002 o.add_handler(ph)
1003 meth_spec = [
1004 [("https_open", "return response")]
1005 ]
1006 handlers = add_ordered_mock_handlers(o, meth_spec)
1007
1008 req = Request("https://www.example.com/")
1009 self.assertEqual(req.get_host(), "www.example.com")
1010 r = o.open(req)
1011 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1012 self.assertEqual([(handlers[0], "https_open")],
1013 [tup[0:2] for tup in o.calls])
1014
Senthil Kumaran47fff872009-12-20 07:10:31 +00001015 def test_proxy_https_proxy_authorization(self):
1016 o = OpenerDirector()
1017 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1018 o.add_handler(ph)
1019 https_handler = MockHTTPSHandler()
1020 o.add_handler(https_handler)
1021 req = Request("https://www.example.com/")
1022 req.add_header("Proxy-Authorization","FooBar")
1023 req.add_header("User-Agent","Grail")
1024 self.assertEqual(req.get_host(), "www.example.com")
1025 self.assertIsNone(req._tunnel_host)
1026 r = o.open(req)
1027 # Verify Proxy-Authorization gets tunneled to request.
1028 # httpsconn req_headers do not have the Proxy-Authorization header but
1029 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001030 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001031 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001032 self.assertIn(("User-Agent","Grail"),
1033 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001034 self.assertIsNotNone(req._tunnel_host)
1035 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1036 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001037
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001038 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001039 opener = OpenerDirector()
1040 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001041 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 realm = "ACME Widget Store"
1043 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001044 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1045 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001046 opener.add_handler(auth_handler)
1047 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048 self._test_basic_auth(opener, auth_handler, "Authorization",
1049 realm, http_handler, password_manager,
1050 "http://acme.example.com/protected",
1051 "http://acme.example.com/protected",
1052 )
1053
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001054 def test_basic_auth_with_single_quoted_realm(self):
1055 self.test_basic_auth(quote_char="'")
1056
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 def test_proxy_basic_auth(self):
1058 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001059 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001060 opener.add_handler(ph)
1061 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001062 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001063 realm = "ACME Networks"
1064 http_handler = MockHTTPHandler(
1065 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001066 opener.add_handler(auth_handler)
1067 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001068 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 realm, http_handler, password_manager,
1070 "http://acme.example.com:3128/protected",
1071 "proxy.example.com:3128",
1072 )
1073
1074 def test_basic_and_digest_auth_handlers(self):
1075 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1076 # response (http://python.org/sf/1479302), where it should instead
1077 # return None to allow another handler (especially
1078 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001079
1080 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1081 # try digest first (since it's the strongest auth scheme), so we record
1082 # order of calls here to check digest comes first:
1083 class RecordingOpenerDirector(OpenerDirector):
1084 def __init__(self):
1085 OpenerDirector.__init__(self)
1086 self.recorded = []
1087 def record(self, info):
1088 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001089 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001090 def http_error_401(self, *args, **kwds):
1091 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001092 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001093 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001094 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001095 def http_error_401(self, *args, **kwds):
1096 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001097 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001098 *args, **kwds)
1099
1100 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001101 password_manager = MockPasswordManager()
1102 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001103 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001104 realm = "ACME Networks"
1105 http_handler = MockHTTPHandler(
1106 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001107 opener.add_handler(basic_handler)
1108 opener.add_handler(digest_handler)
1109 opener.add_handler(http_handler)
1110
1111 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001112 self._test_basic_auth(opener, basic_handler, "Authorization",
1113 realm, http_handler, password_manager,
1114 "http://acme.example.com/protected",
1115 "http://acme.example.com/protected",
1116 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001117 # check digest was tried before basic (twice, because
1118 # _test_basic_auth called .open() twice)
1119 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001120
1121 def _test_basic_auth(self, opener, auth_handler, auth_header,
1122 realm, http_handler, password_manager,
1123 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001124 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001125 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126
1127 # .add_password() fed through to password manager
1128 auth_handler.add_password(realm, request_url, user, password)
1129 self.assertEqual(realm, password_manager.realm)
1130 self.assertEqual(request_url, password_manager.url)
1131 self.assertEqual(user, password_manager.user)
1132 self.assertEqual(password, password_manager.password)
1133
1134 r = opener.open(request_url)
1135
1136 # should have asked the password manager for the username/password
1137 self.assertEqual(password_manager.target_realm, realm)
1138 self.assertEqual(password_manager.target_url, protected_url)
1139
1140 # expect one request without authorization, then one with
1141 self.assertEqual(len(http_handler.requests), 2)
1142 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001143 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001144 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001145 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001146 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1147 auth_hdr_value)
1148
1149 # if the password manager can't find a password, the handler won't
1150 # handle the HTTP auth error
1151 password_manager.user = password_manager.password = None
1152 http_handler.reset()
1153 r = opener.open(request_url)
1154 self.assertEqual(len(http_handler.requests), 1)
1155 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1156
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001157
1158class MiscTests(unittest.TestCase):
1159
1160 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001161 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1162 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001163 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001164 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001165 def bar_open(self): pass
1166
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001167 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001168
1169 o = build_opener(FooHandler, BarHandler)
1170 self.opener_has_handler(o, FooHandler)
1171 self.opener_has_handler(o, BarHandler)
1172
1173 # can take a mix of classes and instances
1174 o = build_opener(FooHandler, BarHandler())
1175 self.opener_has_handler(o, FooHandler)
1176 self.opener_has_handler(o, BarHandler)
1177
1178 # subclasses of default handlers override default handlers
1179 o = build_opener(MyHTTPHandler)
1180 self.opener_has_handler(o, MyHTTPHandler)
1181
1182 # a particular case of overriding: default handlers can be passed
1183 # in explicitly
1184 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001185 self.opener_has_handler(o, urllib.request.HTTPHandler)
1186 o = build_opener(urllib.request.HTTPHandler)
1187 self.opener_has_handler(o, urllib.request.HTTPHandler)
1188 o = build_opener(urllib.request.HTTPHandler())
1189 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001190
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001191 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001192 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001193 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1194 self.opener_has_handler(o, MyHTTPHandler)
1195 self.opener_has_handler(o, MyOtherHTTPHandler)
1196
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001197 def opener_has_handler(self, opener, handler_class):
1198 for h in opener.handlers:
1199 if h.__class__ == handler_class:
1200 break
1201 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001202 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001203
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001204class RequestTests(unittest.TestCase):
1205
1206 def setUp(self):
1207 self.get = Request("http://www.python.org/~jeremy/")
1208 self.post = Request("http://www.python.org/~jeremy/",
1209 "data",
1210 headers={"X-Test": "test"})
1211
1212 def test_method(self):
1213 self.assertEqual("POST", self.post.get_method())
1214 self.assertEqual("GET", self.get.get_method())
1215
1216 def test_add_data(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001217 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001218 self.assertEqual("GET", self.get.get_method())
1219 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001220 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001221 self.assertEqual("POST", self.get.get_method())
1222
1223 def test_get_full_url(self):
1224 self.assertEqual("http://www.python.org/~jeremy/",
1225 self.get.get_full_url())
1226
1227 def test_selector(self):
1228 self.assertEqual("/~jeremy/", self.get.get_selector())
1229 req = Request("http://www.python.org/")
1230 self.assertEqual("/", req.get_selector())
1231
1232 def test_get_type(self):
1233 self.assertEqual("http", self.get.get_type())
1234
1235 def test_get_host(self):
1236 self.assertEqual("www.python.org", self.get.get_host())
1237
1238 def test_get_host_unquote(self):
1239 req = Request("http://www.%70ython.org/")
1240 self.assertEqual("www.python.org", req.get_host())
1241
1242 def test_proxy(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001243 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001244 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001245 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001246 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1247 self.assertEqual("www.perl.org", self.get.get_host())
1248
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001249
1250def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001251 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001252 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001253 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001254 tests = (TrivialTests,
1255 OpenerDirectorTests,
1256 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001257 MiscTests,
1258 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001259 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001260
1261if __name__ == "__main__":
1262 test_main(verbose=True)