blob: 080daa4f2733a51a7c5f7e3ca519f425f25559f4 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000024
Senthil Kumarand587e302010-01-10 17:45:52 +000025 if os.name == 'nt':
26 file_url = "file:///%s" % fname
27 else:
28 file_url = "file://%s" % fname
29
Jeremy Hylton1afc1692008-06-18 20:49:58 +000030 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000031
32 buf = f.read()
33 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000034
Georg Brandle1b13d22005-08-24 22:20:32 +000035 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036 tests = [
37 ('a,b,c', ['a', 'b', 'c']),
38 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
39 ('a, b, "c", "d", "e,f", g, h',
40 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
41 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
42 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000043 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000045
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000046
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000047def test_request_headers_dict():
48 """
49 The Request.headers dictionary is not a documented interface. It should
50 stay that way, because the complete set of headers are only accessible
51 through the .get_header(), .has_header(), .header_items() interface.
52 However, .headers pre-dates those methods, and so real code will be using
53 the dictionary.
54
55 The introduction in 2.4 of those methods was a mistake for the same reason:
56 code that previously saw all (urllib2 user)-provided headers in .headers
57 now sees only a subset (and the function interface is ugly and incomplete).
58 A better change would have been to replace .headers dict with a dict
59 subclass (or UserDict.DictMixin instance?) that preserved the .headers
60 interface and also provided access to the "unredirected" headers. It's
61 probably too late to fix that, though.
62
63
64 Check .capitalize() case normalization:
65
66 >>> url = "http://example.com"
67 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
68 'blah'
69 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
70 'blah'
71
72 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
73 but that could be changed in future.
74
75 """
76
77def test_request_headers_methods():
78 """
79 Note the case normalization of header names here, to .capitalize()-case.
80 This should be preserved for backwards-compatibility. (In the HTTP case,
81 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000082 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000083
84 >>> url = "http://example.com"
85 >>> r = Request(url, headers={"Spam-eggs": "blah"})
86 >>> r.has_header("Spam-eggs")
87 True
88 >>> r.header_items()
89 [('Spam-eggs', 'blah')]
90 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000091 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000092 >>> items
93 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
94
95 Note that e.g. r.has_header("spam-EggS") is currently False, and
96 r.get_header("spam-EggS") returns None, but that could be changed in
97 future.
98
99 >>> r.has_header("Not-there")
100 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000101 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000102 None
103 >>> r.get_header("Not-there", "default")
104 'default'
105
106 """
107
108
Thomas Wouters477c8d52006-05-27 19:21:47 +0000109def test_password_manager(self):
110 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000111 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000112 >>> add = mgr.add_password
113 >>> add("Some Realm", "http://example.com/", "joe", "password")
114 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
115 >>> add("c", "http://example.com/foo", "foo", "ni")
116 >>> add("c", "http://example.com/bar", "bar", "nini")
117 >>> add("b", "http://example.com/", "first", "blah")
118 >>> add("b", "http://example.com/", "second", "spam")
119 >>> add("a", "http://example.com", "1", "a")
120 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
121 >>> add("Some Realm", "d.example.com", "4", "d")
122 >>> add("Some Realm", "e.example.com:3128", "5", "e")
123
124 >>> mgr.find_user_password("Some Realm", "example.com")
125 ('joe', 'password')
126 >>> mgr.find_user_password("Some Realm", "http://example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com/")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("c", "http://example.com/foo")
135 ('foo', 'ni')
136 >>> mgr.find_user_password("c", "http://example.com/bar")
137 ('bar', 'nini')
138
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Actually, this is really undefined ATM
140## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000141
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000142## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
143## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000144
145 Use latest add_password() in case of conflict:
146
147 >>> mgr.find_user_password("b", "http://example.com/")
148 ('second', 'spam')
149
150 No special relationship between a.example.com and example.com:
151
152 >>> mgr.find_user_password("a", "http://example.com/")
153 ('1', 'a')
154 >>> mgr.find_user_password("a", "http://a.example.com/")
155 (None, None)
156
157 Ports:
158
159 >>> mgr.find_user_password("Some Realm", "c.example.com")
160 (None, None)
161 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
162 ('3', 'c')
163 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "d.example.com")
166 ('4', 'd')
167 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
168 ('5', 'e')
169
170 """
171 pass
172
173
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000174def test_password_manager_default_port(self):
175 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000176 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000177 >>> add = mgr.add_password
178
179 The point to note here is that we can't guess the default port if there's
180 no scheme. This applies to both add_password and find_user_password.
181
182 >>> add("f", "http://g.example.com:80", "10", "j")
183 >>> add("g", "http://h.example.com", "11", "k")
184 >>> add("h", "i.example.com:80", "12", "l")
185 >>> add("i", "j.example.com", "13", "m")
186 >>> mgr.find_user_password("f", "g.example.com:100")
187 (None, None)
188 >>> mgr.find_user_password("f", "g.example.com:80")
189 ('10', 'j')
190 >>> mgr.find_user_password("f", "g.example.com")
191 (None, None)
192 >>> mgr.find_user_password("f", "http://g.example.com:100")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:80")
195 ('10', 'j')
196 >>> mgr.find_user_password("f", "http://g.example.com")
197 ('10', 'j')
198 >>> mgr.find_user_password("g", "h.example.com")
199 ('11', 'k')
200 >>> mgr.find_user_password("g", "h.example.com:80")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "http://h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("h", "i.example.com")
205 (None, None)
206 >>> mgr.find_user_password("h", "i.example.com:80")
207 ('12', 'l')
208 >>> mgr.find_user_password("h", "http://i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("i", "j.example.com")
211 ('13', 'm')
212 >>> mgr.find_user_password("i", "j.example.com:80")
213 (None, None)
214 >>> mgr.find_user_password("i", "http://j.example.com")
215 ('13', 'm')
216 >>> mgr.find_user_password("i", "http://j.example.com:80")
217 (None, None)
218
219 """
220
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000221class MockOpener:
222 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000223 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
224 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000225 def error(self, proto, *args):
226 self.proto, self.args = proto, args
227
228class MockFile:
229 def read(self, count=None): pass
230 def readline(self, count=None): pass
231 def close(self): pass
232
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000233class MockHeaders(dict):
234 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000235 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000236
Guido van Rossum34d19282007-08-09 01:03:29 +0000237class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000238 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000239 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 self.code, self.msg, self.headers, self.url = code, msg, headers, url
241 def info(self):
242 return self.headers
243 def geturl(self):
244 return self.url
245
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000246class MockCookieJar:
247 def add_cookie_header(self, request):
248 self.ach_req = request
249 def extract_cookies(self, response, request):
250 self.ec_req, self.ec_r = request, response
251
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000252class FakeMethod:
253 def __init__(self, meth_name, action, handle):
254 self.meth_name = meth_name
255 self.handle = handle
256 self.action = action
257 def __call__(self, *args):
258 return self.handle(self.meth_name, self.action, *args)
259
Senthil Kumaran47fff872009-12-20 07:10:31 +0000260class MockHTTPResponse(io.IOBase):
261 def __init__(self, fp, msg, status, reason):
262 self.fp = fp
263 self.msg = msg
264 self.status = status
265 self.reason = reason
266 self.code = 200
267
268 def read(self):
269 return ''
270
271 def info(self):
272 return {}
273
274 def geturl(self):
275 return self.url
276
277
278class MockHTTPClass:
279 def __init__(self):
280 self.level = 0
281 self.req_headers = []
282 self.data = None
283 self.raise_on_endheaders = False
284 self._tunnel_headers = {}
285
286 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
287 self.host = host
288 self.timeout = timeout
289 return self
290
291 def set_debuglevel(self, level):
292 self.level = level
293
294 def set_tunnel(self, host, port=None, headers=None):
295 self._tunnel_host = host
296 self._tunnel_port = port
297 if headers:
298 self._tunnel_headers = headers
299 else:
300 self._tunnel_headers.clear()
301
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000302 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000303 self.method = method
304 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000305 if headers is not None:
306 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000307 self.req_headers.sort()
308 if body:
309 self.data = body
310 if self.raise_on_endheaders:
311 import socket
312 raise socket.error()
313 def getresponse(self):
314 return MockHTTPResponse(MockFile(), {}, 200, "OK")
315
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000316class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000317 # useful for testing handler machinery
318 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000319 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000320 def __init__(self, methods):
321 self._define_methods(methods)
322 def _define_methods(self, methods):
323 for spec in methods:
324 if len(spec) == 2: name, action = spec
325 else: name, action = spec, None
326 meth = FakeMethod(name, action, self.handle)
327 setattr(self.__class__, name, meth)
328 def handle(self, fn_name, action, *args, **kwds):
329 self.parent.calls.append((self, fn_name, args, kwds))
330 if action is None:
331 return None
332 elif action == "return self":
333 return self
334 elif action == "return response":
335 res = MockResponse(200, "OK", {}, "")
336 return res
337 elif action == "return request":
338 return Request("http://blah/")
339 elif action.startswith("error"):
340 code = action[action.rfind(" ")+1:]
341 try:
342 code = int(code)
343 except ValueError:
344 pass
345 res = MockResponse(200, "OK", {}, "")
346 return self.parent.error("http", args[0], res, code, "", {})
347 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000348 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000349 assert False
350 def close(self): pass
351 def add_parent(self, parent):
352 self.parent = parent
353 self.parent.calls = []
354 def __lt__(self, other):
355 if not hasattr(other, "handler_order"):
356 # No handler_order, leave in original order. Yuck.
357 return True
358 return self.handler_order < other.handler_order
359
360def add_ordered_mock_handlers(opener, meth_spec):
361 """Create MockHandlers and add them to an OpenerDirector.
362
363 meth_spec: list of lists of tuples and strings defining methods to define
364 on handlers. eg:
365
366 [["http_error", "ftp_open"], ["http_open"]]
367
368 defines methods .http_error() and .ftp_open() on one handler, and
369 .http_open() on another. These methods just record their arguments and
370 return None. Using a tuple instead of a string causes the method to
371 perform some action (see MockHandler.handle()), eg:
372
373 [["http_error"], [("http_open", "return request")]]
374
375 defines .http_error() on one handler (which simply returns None), and
376 .http_open() on another handler, which returns a Request object.
377
378 """
379 handlers = []
380 count = 0
381 for meths in meth_spec:
382 class MockHandlerSubclass(MockHandler): pass
383 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000384 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000385 h.add_parent(opener)
386 count = count + 1
387 handlers.append(h)
388 opener.add_handler(h)
389 return handlers
390
Thomas Wouters477c8d52006-05-27 19:21:47 +0000391def build_test_opener(*handler_instances):
392 opener = OpenerDirector()
393 for h in handler_instances:
394 opener.add_handler(h)
395 return opener
396
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000397class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000398 # useful for testing redirections and auth
399 # sends supplied headers and code as first response
400 # sends 200 OK as second response
401 def __init__(self, code, headers):
402 self.code = code
403 self.headers = headers
404 self.reset()
405 def reset(self):
406 self._count = 0
407 self.requests = []
408 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000409 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000410 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000411 self.requests.append(copy.deepcopy(req))
412 if self._count == 0:
413 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000414 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000415 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416 return self.parent.error(
417 "http", req, MockFile(), self.code, name, msg)
418 else:
419 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000420 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000421 return MockResponse(200, "OK", msg, "", req.get_full_url())
422
Senthil Kumaran47fff872009-12-20 07:10:31 +0000423class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
424 # Useful for testing the Proxy-Authorization request by verifying the
425 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000426
427 def __init__(self):
428 urllib.request.AbstractHTTPHandler.__init__(self)
429 self.httpconn = MockHTTPClass()
430
Senthil Kumaran47fff872009-12-20 07:10:31 +0000431 def https_open(self, req):
432 return self.do_open(self.httpconn, req)
433
Thomas Wouters477c8d52006-05-27 19:21:47 +0000434class MockPasswordManager:
435 def add_password(self, realm, uri, user, password):
436 self.realm = realm
437 self.url = uri
438 self.user = user
439 self.password = password
440 def find_user_password(self, realm, authuri):
441 self.target_realm = realm
442 self.target_url = authuri
443 return self.user, self.password
444
445
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000446class OpenerDirectorTests(unittest.TestCase):
447
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000448 def test_add_non_handler(self):
449 class NonHandler(object):
450 pass
451 self.assertRaises(TypeError,
452 OpenerDirector().add_handler, NonHandler())
453
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000454 def test_badly_named_methods(self):
455 # test work-around for three methods that accidentally follow the
456 # naming conventions for handler methods
457 # (*_open() / *_request() / *_response())
458
459 # These used to call the accidentally-named methods, causing a
460 # TypeError in real code; here, returning self from these mock
461 # methods would either cause no exception, or AttributeError.
462
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000463 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000464
465 o = OpenerDirector()
466 meth_spec = [
467 [("do_open", "return self"), ("proxy_open", "return self")],
468 [("redirect_request", "return self")],
469 ]
470 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000471 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000472 for scheme in "do", "proxy", "redirect":
473 self.assertRaises(URLError, o.open, scheme+"://example.com/")
474
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000475 def test_handled(self):
476 # handler returning non-None means no more handlers will be called
477 o = OpenerDirector()
478 meth_spec = [
479 ["http_open", "ftp_open", "http_error_302"],
480 ["ftp_open"],
481 [("http_open", "return self")],
482 [("http_open", "return self")],
483 ]
484 handlers = add_ordered_mock_handlers(o, meth_spec)
485
486 req = Request("http://example.com/")
487 r = o.open(req)
488 # Second .http_open() gets called, third doesn't, since second returned
489 # non-None. Handlers without .http_open() never get any methods called
490 # on them.
491 # In fact, second mock handler defining .http_open() returns self
492 # (instead of response), which becomes the OpenerDirector's return
493 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000494 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000495 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
496 for expected, got in zip(calls, o.calls):
497 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000498 self.assertEqual((handler, name), expected)
499 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000500
501 def test_handler_order(self):
502 o = OpenerDirector()
503 handlers = []
504 for meths, handler_order in [
505 ([("http_open", "return self")], 500),
506 (["http_open"], 0),
507 ]:
508 class MockHandlerSubclass(MockHandler): pass
509 h = MockHandlerSubclass(meths)
510 h.handler_order = handler_order
511 handlers.append(h)
512 o.add_handler(h)
513
514 r = o.open("http://example.com/")
515 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000516 self.assertEqual(o.calls[0][0], handlers[1])
517 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000518
519 def test_raise(self):
520 # raising URLError stops processing of request
521 o = OpenerDirector()
522 meth_spec = [
523 [("http_open", "raise")],
524 [("http_open", "return self")],
525 ]
526 handlers = add_ordered_mock_handlers(o, meth_spec)
527
528 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000529 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000530 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000531
532## def test_error(self):
533## # XXX this doesn't actually seem to be used in standard library,
534## # but should really be tested anyway...
535
536 def test_http_error(self):
537 # XXX http_error_default
538 # http errors are a special case
539 o = OpenerDirector()
540 meth_spec = [
541 [("http_open", "error 302")],
542 [("http_error_400", "raise"), "http_open"],
543 [("http_error_302", "return response"), "http_error_303",
544 "http_error"],
545 [("http_error_302")],
546 ]
547 handlers = add_ordered_mock_handlers(o, meth_spec)
548
549 class Unknown:
550 def __eq__(self, other): return True
551
552 req = Request("http://example.com/")
553 r = o.open(req)
554 assert len(o.calls) == 2
555 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000556 (handlers[2], "http_error_302",
557 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000558 for expected, got in zip(calls, o.calls):
559 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000560 self.assertEqual((handler, method_name), got[:2])
561 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000562
563 def test_processors(self):
564 # *_request / *_response methods get called appropriately
565 o = OpenerDirector()
566 meth_spec = [
567 [("http_request", "return request"),
568 ("http_response", "return response")],
569 [("http_request", "return request"),
570 ("http_response", "return response")],
571 ]
572 handlers = add_ordered_mock_handlers(o, meth_spec)
573
574 req = Request("http://example.com/")
575 r = o.open(req)
576 # processor methods are called on *all* handlers that define them,
577 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000578 calls = [
579 (handlers[0], "http_request"), (handlers[1], "http_request"),
580 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000581
582 for i, (handler, name, args, kwds) in enumerate(o.calls):
583 if i < 2:
584 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000585 self.assertEqual((handler, name), calls[i])
586 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000587 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000588 else:
589 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000590 self.assertEqual((handler, name), calls[i])
591 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000592 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 # response from opener.open is None, because there's no
594 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000595 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000596 isinstance(args[1], MockResponse))
597
598
Tim Peters58eb11c2004-01-18 20:29:55 +0000599def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000601 if os.name == "nt" and urlpath.startswith("///"):
602 urlpath = urlpath[2:]
603 # XXX don't ask me about the mac...
604 return urlpath
605
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000606class HandlerTests(unittest.TestCase):
607
608 def test_ftp(self):
609 class MockFTPWrapper:
610 def __init__(self, data): self.data = data
611 def retrfile(self, filename, filetype):
612 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000613 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000614
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000615 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000617 def connect_ftp(self, user, passwd, host, port, dirs,
618 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000619 self.user, self.passwd = user, passwd
620 self.host, self.port = host, port
621 self.dirs = dirs
622 self.ftpwrapper = MockFTPWrapper(self.data)
623 return self.ftpwrapper
624
Georg Brandlf78e02b2008-06-10 17:40:04 +0000625 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000626 data = "rheum rhaponicum"
627 h = NullFTPHandler(data)
628 o = h.parent = MockOpener()
629
630 for url, host, port, type_, dirs, filename, mimetype in [
631 ("ftp://localhost/foo/bar/baz.html",
632 "localhost", ftplib.FTP_PORT, "I",
633 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000634 ("ftp://localhost:80/foo/bar/",
635 "localhost", 80, "D",
636 ["foo", "bar"], "", None),
637 ("ftp://localhost/baz.gif;type=a",
638 "localhost", ftplib.FTP_PORT, "A",
639 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000640 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000641 req = Request(url)
642 req.timeout = None
643 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000644 # ftp authentication not yet implemented by FTPHandler
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000645 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000646 self.assertEqual(h.host, socket.gethostbyname(host))
647 self.assertEqual(h.port, port)
648 self.assertEqual(h.dirs, dirs)
649 self.assertEqual(h.ftpwrapper.filename, filename)
650 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000652 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000653 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654
655 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000656 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000657 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000658 o = h.parent = MockOpener()
659
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000660 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000662 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000663 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000664 "file://localhost%s" % urlpath,
665 "file://%s" % urlpath,
666 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000667 ]
668 try:
669 localaddr = socket.gethostbyname(socket.gethostname())
670 except socket.gaierror:
671 localaddr = ''
672 if localaddr:
673 urls.append("file://%s%s" % (localaddr, urlpath))
674
675 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000676 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000677 try:
678 try:
679 f.write(towrite)
680 finally:
681 f.close()
682
683 r = h.file_open(Request(url))
684 try:
685 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000686 headers = r.info()
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000687 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000688 finally:
689 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000690 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000691 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000692 finally:
693 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000694 self.assertEqual(data, towrite)
695 self.assertEqual(headers["Content-type"], "text/plain")
696 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000697 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000698 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000699
700 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000701 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000702 "file:///file_does_not_exist.txt",
703 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
704 os.getcwd(), TESTFN),
705 "file://somerandomhost.ontheinternet.com%s/%s" %
706 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000707 ]:
708 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000709 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000710 try:
711 f.write(towrite)
712 finally:
713 f.close()
714
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000715 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000716 h.file_open, Request(url))
717 finally:
718 os.remove(TESTFN)
719
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000720 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000721 o = h.parent = MockOpener()
722 # XXXX why does // mean ftp (and /// mean not ftp!), and where
723 # is file: scheme specified? I think this is really a bug, and
724 # what was intended was to distinguish between URLs like:
725 # file:/blah.txt (a file)
726 # file://localhost/blah.txt (a file)
727 # file:///blah.txt (a file)
728 # file://ftp.example.com/blah.txt (an ftp URL)
729 for url, ftp in [
730 ("file://ftp.example.com//foo.txt", True),
731 ("file://ftp.example.com///foo.txt", False),
732# XXXX bug: fails with OSError, should be URLError
733 ("file://ftp.example.com/foo.txt", False),
734 ]:
735 req = Request(url)
736 try:
737 h.file_open(req)
738 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000739 except (urllib.error.URLError, OSError):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000740 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000741 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000742 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000743 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000744
745 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000746
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000748 o = h.parent = MockOpener()
749
750 url = "http://example.com/"
751 for method, data in [("GET", None), ("POST", "blah")]:
752 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000753 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000754 req.add_unredirected_header("Spam", "eggs")
755 http = MockHTTPClass()
756 r = h.do_open(http, req)
757
758 # result attributes
759 r.read; r.readline # wrapped MockFile methods
760 r.info; r.geturl # addinfourl methods
761 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
762 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000763 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000765
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000766 self.assertEqual(http.host, "example.com")
767 self.assertEqual(http.level, 0)
768 self.assertEqual(http.method, method)
769 self.assertEqual(http.selector, "/")
770 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000771 [("Connection", "close"),
772 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000773 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000774
775 # check socket.error converted to URLError
776 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000777 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000778
779 # check adding of standard headers
780 o.addheaders = [("Spam", "eggs")]
781 for data in "", None: # POST, GET
782 req = Request("http://example.com/", data)
783 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000784 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000785 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000786 self.assertNotIn("Content-length", req.unredirected_hdrs)
787 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000788 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000789 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
790 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000791 "application/x-www-form-urlencoded")
792 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000793 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
794 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000795
796 # don't clobber existing headers
797 req.add_unredirected_header("Content-length", "foo")
798 req.add_unredirected_header("Content-type", "bar")
799 req.add_unredirected_header("Host", "baz")
800 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000801 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000802 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
803 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000804 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
805 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000807 def test_http_doubleslash(self):
808 # Checks the presence of any unnecessary double slash in url does not
809 # break anything. Previously, a double slash directly after the host
810 # could could cause incorrect parsing.
811 h = urllib.request.AbstractHTTPHandler()
812 o = h.parent = MockOpener()
813
814 data = ""
815 ds_urls = [
816 "http://example.com/foo/bar/baz.html",
817 "http://example.com//foo/bar/baz.html",
818 "http://example.com/foo//bar/baz.html",
819 "http://example.com/foo/bar//baz.html"
820 ]
821
822 for ds_url in ds_urls:
823 ds_req = Request(ds_url, data)
824
825 # Check whether host is determined correctly if there is no proxy
826 np_ds_req = h.do_request_(ds_req)
827 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
828
829 # Check whether host is determined correctly if there is a proxy
830 ds_req.set_proxy("someproxy:3128",None)
831 p_ds_req = h.do_request_(ds_req)
832 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
833
834
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000835 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000836 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000837 o = h.parent = MockOpener()
838
839 url = "http://example.com/"
840 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000842 r = MockResponse(200, "OK", {}, "", url)
843 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000844 self.assertTrue(r is newr)
845 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 r = MockResponse(202, "Accepted", {}, "", url)
847 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000848 self.assertTrue(r is newr)
849 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 r = MockResponse(206, "Partial content", {}, "", url)
851 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000852 self.assertTrue(r is newr)
853 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000854 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000856 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000857 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000859
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000860 def test_cookies(self):
861 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000862 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000863 o = h.parent = MockOpener()
864
865 req = Request("http://example.com/")
866 r = MockResponse(200, "OK", {}, "")
867 newreq = h.http_request(req)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000868 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000869 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000870 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000871 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000872 self.assertTrue(cj.ec_req is req)
873 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000874
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000875 def test_redirect(self):
876 from_url = "http://example.com/a.html"
877 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000878 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000879 o = h.parent = MockOpener()
880
881 # ordinary redirect behaviour
882 for code in 301, 302, 303, 307:
883 for data in None, "blah\nblah\n":
884 method = getattr(h, "http_error_%s" % code)
885 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000886 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000887 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000888 if data is not None:
889 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000890 req.add_unredirected_header("Spam", "spam")
891 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000892 method(req, MockFile(), code, "Blah",
893 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000894 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000895 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000896 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000897 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000898 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000899 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000900 except AttributeError:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000901 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000902
903 # now it's a GET, there should not be headers regarding content
904 # (possibly dragged from before being a POST)
905 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +0000906 self.assertNotIn("content-length", headers)
907 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +0000908
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000909 self.assertEqual(o.req.headers["Nonsense"],
910 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +0000911 self.assertNotIn("Spam", o.req.headers)
912 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000913
914 # loop detection
915 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000916 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000917 def redirect(h, req, url=to_url):
918 h.http_error_302(req, MockFile(), 302, "Blah",
919 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000920 # Note that the *original* request shares the same record of
921 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000922
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000923 # detect infinite loop redirect of a URL to itself
924 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000925 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000926 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000927 try:
928 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000929 redirect(h, req, "http://example.com/")
930 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000931 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000932 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000934
935 # detect endless non-repeating chain of redirects
936 req = Request(from_url, origin_req_host="example.com")
937 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000938 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000939 try:
940 while 1:
941 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000942 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000943 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000944 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000945 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000946
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000948 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000949 from http.cookiejar import CookieJar
950 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000951
952 cj = CookieJar()
953 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000954 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000955 hdeh = urllib.request.HTTPDefaultErrorHandler()
956 hrh = urllib.request.HTTPRedirectHandler()
957 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000958 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000959 o.open("http://www.example.com/")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000960 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000961
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000962 def test_proxy(self):
963 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000964 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000965 o.add_handler(ph)
966 meth_spec = [
967 [("http_open", "return response")]
968 ]
969 handlers = add_ordered_mock_handlers(o, meth_spec)
970
971 req = Request("http://acme.example.com/")
972 self.assertEqual(req.get_host(), "acme.example.com")
973 r = o.open(req)
974 self.assertEqual(req.get_host(), "proxy.example.com:3128")
975
976 self.assertEqual([(handlers[0], "http_open")],
977 [tup[0:2] for tup in o.calls])
978
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000979 def test_proxy_no_proxy(self):
980 os.environ['no_proxy'] = 'python.org'
981 o = OpenerDirector()
982 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
983 o.add_handler(ph)
984 req = Request("http://www.perl.org/")
985 self.assertEqual(req.get_host(), "www.perl.org")
986 r = o.open(req)
987 self.assertEqual(req.get_host(), "proxy.example.com")
988 req = Request("http://www.python.org")
989 self.assertEqual(req.get_host(), "www.python.org")
990 r = o.open(req)
991 self.assertEqual(req.get_host(), "www.python.org")
992 del os.environ['no_proxy']
993
994
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000995 def test_proxy_https(self):
996 o = OpenerDirector()
997 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
998 o.add_handler(ph)
999 meth_spec = [
1000 [("https_open", "return response")]
1001 ]
1002 handlers = add_ordered_mock_handlers(o, meth_spec)
1003
1004 req = Request("https://www.example.com/")
1005 self.assertEqual(req.get_host(), "www.example.com")
1006 r = o.open(req)
1007 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1008 self.assertEqual([(handlers[0], "https_open")],
1009 [tup[0:2] for tup in o.calls])
1010
Senthil Kumaran47fff872009-12-20 07:10:31 +00001011 def test_proxy_https_proxy_authorization(self):
1012 o = OpenerDirector()
1013 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1014 o.add_handler(ph)
1015 https_handler = MockHTTPSHandler()
1016 o.add_handler(https_handler)
1017 req = Request("https://www.example.com/")
1018 req.add_header("Proxy-Authorization","FooBar")
1019 req.add_header("User-Agent","Grail")
1020 self.assertEqual(req.get_host(), "www.example.com")
1021 self.assertIsNone(req._tunnel_host)
1022 r = o.open(req)
1023 # Verify Proxy-Authorization gets tunneled to request.
1024 # httpsconn req_headers do not have the Proxy-Authorization header but
1025 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001026 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001027 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001028 self.assertIn(("User-Agent","Grail"),
1029 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001030 self.assertIsNotNone(req._tunnel_host)
1031 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1032 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001033
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001034 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001035 opener = OpenerDirector()
1036 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001037 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001038 realm = "ACME Widget Store"
1039 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001040 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1041 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001042 opener.add_handler(auth_handler)
1043 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001044 self._test_basic_auth(opener, auth_handler, "Authorization",
1045 realm, http_handler, password_manager,
1046 "http://acme.example.com/protected",
1047 "http://acme.example.com/protected",
1048 )
1049
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001050 def test_basic_auth_with_single_quoted_realm(self):
1051 self.test_basic_auth(quote_char="'")
1052
Thomas Wouters477c8d52006-05-27 19:21:47 +00001053 def test_proxy_basic_auth(self):
1054 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001055 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056 opener.add_handler(ph)
1057 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001058 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001059 realm = "ACME Networks"
1060 http_handler = MockHTTPHandler(
1061 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001062 opener.add_handler(auth_handler)
1063 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001064 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001065 realm, http_handler, password_manager,
1066 "http://acme.example.com:3128/protected",
1067 "proxy.example.com:3128",
1068 )
1069
1070 def test_basic_and_digest_auth_handlers(self):
1071 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1072 # response (http://python.org/sf/1479302), where it should instead
1073 # return None to allow another handler (especially
1074 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001075
1076 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1077 # try digest first (since it's the strongest auth scheme), so we record
1078 # order of calls here to check digest comes first:
1079 class RecordingOpenerDirector(OpenerDirector):
1080 def __init__(self):
1081 OpenerDirector.__init__(self)
1082 self.recorded = []
1083 def record(self, info):
1084 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001085 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001086 def http_error_401(self, *args, **kwds):
1087 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001088 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001089 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001091 def http_error_401(self, *args, **kwds):
1092 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001093 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001094 *args, **kwds)
1095
1096 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001097 password_manager = MockPasswordManager()
1098 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001099 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001100 realm = "ACME Networks"
1101 http_handler = MockHTTPHandler(
1102 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001103 opener.add_handler(basic_handler)
1104 opener.add_handler(digest_handler)
1105 opener.add_handler(http_handler)
1106
1107 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108 self._test_basic_auth(opener, basic_handler, "Authorization",
1109 realm, http_handler, password_manager,
1110 "http://acme.example.com/protected",
1111 "http://acme.example.com/protected",
1112 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001113 # check digest was tried before basic (twice, because
1114 # _test_basic_auth called .open() twice)
1115 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116
1117 def _test_basic_auth(self, opener, auth_handler, auth_header,
1118 realm, http_handler, password_manager,
1119 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001120 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001121 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001122
1123 # .add_password() fed through to password manager
1124 auth_handler.add_password(realm, request_url, user, password)
1125 self.assertEqual(realm, password_manager.realm)
1126 self.assertEqual(request_url, password_manager.url)
1127 self.assertEqual(user, password_manager.user)
1128 self.assertEqual(password, password_manager.password)
1129
1130 r = opener.open(request_url)
1131
1132 # should have asked the password manager for the username/password
1133 self.assertEqual(password_manager.target_realm, realm)
1134 self.assertEqual(password_manager.target_url, protected_url)
1135
1136 # expect one request without authorization, then one with
1137 self.assertEqual(len(http_handler.requests), 2)
1138 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001139 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001140 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001141 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001142 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1143 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001144 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1145 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001146 # if the password manager can't find a password, the handler won't
1147 # handle the HTTP auth error
1148 password_manager.user = password_manager.password = None
1149 http_handler.reset()
1150 r = opener.open(request_url)
1151 self.assertEqual(len(http_handler.requests), 1)
1152 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1153
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001154class MiscTests(unittest.TestCase):
1155
1156 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001157 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1158 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001159 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001161 def bar_open(self): pass
1162
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001163 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001164
1165 o = build_opener(FooHandler, BarHandler)
1166 self.opener_has_handler(o, FooHandler)
1167 self.opener_has_handler(o, BarHandler)
1168
1169 # can take a mix of classes and instances
1170 o = build_opener(FooHandler, BarHandler())
1171 self.opener_has_handler(o, FooHandler)
1172 self.opener_has_handler(o, BarHandler)
1173
1174 # subclasses of default handlers override default handlers
1175 o = build_opener(MyHTTPHandler)
1176 self.opener_has_handler(o, MyHTTPHandler)
1177
1178 # a particular case of overriding: default handlers can be passed
1179 # in explicitly
1180 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 self.opener_has_handler(o, urllib.request.HTTPHandler)
1182 o = build_opener(urllib.request.HTTPHandler)
1183 self.opener_has_handler(o, urllib.request.HTTPHandler)
1184 o = build_opener(urllib.request.HTTPHandler())
1185 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001186
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001187 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001188 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001189 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1190 self.opener_has_handler(o, MyHTTPHandler)
1191 self.opener_has_handler(o, MyOtherHTTPHandler)
1192
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001193 def opener_has_handler(self, opener, handler_class):
1194 for h in opener.handlers:
1195 if h.__class__ == handler_class:
1196 break
1197 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001198 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001199
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001200class RequestTests(unittest.TestCase):
1201
1202 def setUp(self):
1203 self.get = Request("http://www.python.org/~jeremy/")
1204 self.post = Request("http://www.python.org/~jeremy/",
1205 "data",
1206 headers={"X-Test": "test"})
1207
1208 def test_method(self):
1209 self.assertEqual("POST", self.post.get_method())
1210 self.assertEqual("GET", self.get.get_method())
1211
1212 def test_add_data(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001213 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001214 self.assertEqual("GET", self.get.get_method())
1215 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001216 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001217 self.assertEqual("POST", self.get.get_method())
1218
1219 def test_get_full_url(self):
1220 self.assertEqual("http://www.python.org/~jeremy/",
1221 self.get.get_full_url())
1222
1223 def test_selector(self):
1224 self.assertEqual("/~jeremy/", self.get.get_selector())
1225 req = Request("http://www.python.org/")
1226 self.assertEqual("/", req.get_selector())
1227
1228 def test_get_type(self):
1229 self.assertEqual("http", self.get.get_type())
1230
1231 def test_get_host(self):
1232 self.assertEqual("www.python.org", self.get.get_host())
1233
1234 def test_get_host_unquote(self):
1235 req = Request("http://www.%70ython.org/")
1236 self.assertEqual("www.python.org", req.get_host())
1237
1238 def test_proxy(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001239 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001240 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001241 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001242 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1243 self.assertEqual("www.perl.org", self.get.get_host())
1244
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001245
1246def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001247 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001248 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001249 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001250 tests = (TrivialTests,
1251 OpenerDirectorTests,
1252 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001253 MiscTests,
1254 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001255 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001256
1257if __name__ == "__main__":
1258 test_main(verbose=True)