blob: 8093f0e2f227f3e7360b00c744f0f1311fdb64f6 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000024 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000030
31 file_url = "file://%s" % fname
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000033
34 buf = f.read()
35 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000036
Georg Brandle1b13d22005-08-24 22:20:32 +000037 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038 tests = [
39 ('a,b,c', ['a', 'b', 'c']),
40 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
41 ('a, b, "c", "d", "e,f", g, h',
42 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
43 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
44 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000045 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000047
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000048
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000049def test_request_headers_dict():
50 """
51 The Request.headers dictionary is not a documented interface. It should
52 stay that way, because the complete set of headers are only accessible
53 through the .get_header(), .has_header(), .header_items() interface.
54 However, .headers pre-dates those methods, and so real code will be using
55 the dictionary.
56
57 The introduction in 2.4 of those methods was a mistake for the same reason:
58 code that previously saw all (urllib2 user)-provided headers in .headers
59 now sees only a subset (and the function interface is ugly and incomplete).
60 A better change would have been to replace .headers dict with a dict
61 subclass (or UserDict.DictMixin instance?) that preserved the .headers
62 interface and also provided access to the "unredirected" headers. It's
63 probably too late to fix that, though.
64
65
66 Check .capitalize() case normalization:
67
68 >>> url = "http://example.com"
69 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
70 'blah'
71 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
72 'blah'
73
74 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
75 but that could be changed in future.
76
77 """
78
79def test_request_headers_methods():
80 """
81 Note the case normalization of header names here, to .capitalize()-case.
82 This should be preserved for backwards-compatibility. (In the HTTP case,
83 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000084 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000085
86 >>> url = "http://example.com"
87 >>> r = Request(url, headers={"Spam-eggs": "blah"})
88 >>> r.has_header("Spam-eggs")
89 True
90 >>> r.header_items()
91 [('Spam-eggs', 'blah')]
92 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000093 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000094 >>> items
95 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
96
97 Note that e.g. r.has_header("spam-EggS") is currently False, and
98 r.get_header("spam-EggS") returns None, but that could be changed in
99 future.
100
101 >>> r.has_header("Not-there")
102 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000103 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104 None
105 >>> r.get_header("Not-there", "default")
106 'default'
107
108 """
109
110
Thomas Wouters477c8d52006-05-27 19:21:47 +0000111def test_password_manager(self):
112 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114 >>> add = mgr.add_password
115 >>> add("Some Realm", "http://example.com/", "joe", "password")
116 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
117 >>> add("c", "http://example.com/foo", "foo", "ni")
118 >>> add("c", "http://example.com/bar", "bar", "nini")
119 >>> add("b", "http://example.com/", "first", "blah")
120 >>> add("b", "http://example.com/", "second", "spam")
121 >>> add("a", "http://example.com", "1", "a")
122 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
123 >>> add("Some Realm", "d.example.com", "4", "d")
124 >>> add("Some Realm", "e.example.com:3128", "5", "e")
125
126 >>> mgr.find_user_password("Some Realm", "example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("c", "http://example.com/foo")
137 ('foo', 'ni')
138 >>> mgr.find_user_password("c", "http://example.com/bar")
139 ('bar', 'nini')
140
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 Actually, this is really undefined ATM
142## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
145## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
147 Use latest add_password() in case of conflict:
148
149 >>> mgr.find_user_password("b", "http://example.com/")
150 ('second', 'spam')
151
152 No special relationship between a.example.com and example.com:
153
154 >>> mgr.find_user_password("a", "http://example.com/")
155 ('1', 'a')
156 >>> mgr.find_user_password("a", "http://a.example.com/")
157 (None, None)
158
159 Ports:
160
161 >>> mgr.find_user_password("Some Realm", "c.example.com")
162 (None, None)
163 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "d.example.com")
168 ('4', 'd')
169 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
170 ('5', 'e')
171
172 """
173 pass
174
175
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000176def test_password_manager_default_port(self):
177 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000178 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179 >>> add = mgr.add_password
180
181 The point to note here is that we can't guess the default port if there's
182 no scheme. This applies to both add_password and find_user_password.
183
184 >>> add("f", "http://g.example.com:80", "10", "j")
185 >>> add("g", "http://h.example.com", "11", "k")
186 >>> add("h", "i.example.com:80", "12", "l")
187 >>> add("i", "j.example.com", "13", "m")
188 >>> mgr.find_user_password("f", "g.example.com:100")
189 (None, None)
190 >>> mgr.find_user_password("f", "g.example.com:80")
191 ('10', 'j')
192 >>> mgr.find_user_password("f", "g.example.com")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:100")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:80")
197 ('10', 'j')
198 >>> mgr.find_user_password("f", "http://g.example.com")
199 ('10', 'j')
200 >>> mgr.find_user_password("g", "h.example.com")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "http://h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("h", "i.example.com")
207 (None, None)
208 >>> mgr.find_user_password("h", "i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("h", "http://i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("i", "j.example.com")
213 ('13', 'm')
214 >>> mgr.find_user_password("i", "j.example.com:80")
215 (None, None)
216 >>> mgr.find_user_password("i", "http://j.example.com")
217 ('13', 'm')
218 >>> mgr.find_user_password("i", "http://j.example.com:80")
219 (None, None)
220
221 """
222
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000223class MockOpener:
224 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000225 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
226 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000227 def error(self, proto, *args):
228 self.proto, self.args = proto, args
229
230class MockFile:
231 def read(self, count=None): pass
232 def readline(self, count=None): pass
233 def close(self): pass
234
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000235class MockHeaders(dict):
236 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000237 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238
Guido van Rossum34d19282007-08-09 01:03:29 +0000239class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000241 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242 self.code, self.msg, self.headers, self.url = code, msg, headers, url
243 def info(self):
244 return self.headers
245 def geturl(self):
246 return self.url
247
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000248class MockCookieJar:
249 def add_cookie_header(self, request):
250 self.ach_req = request
251 def extract_cookies(self, response, request):
252 self.ec_req, self.ec_r = request, response
253
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000254class FakeMethod:
255 def __init__(self, meth_name, action, handle):
256 self.meth_name = meth_name
257 self.handle = handle
258 self.action = action
259 def __call__(self, *args):
260 return self.handle(self.meth_name, self.action, *args)
261
262class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 # useful for testing handler machinery
264 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000265 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000266 def __init__(self, methods):
267 self._define_methods(methods)
268 def _define_methods(self, methods):
269 for spec in methods:
270 if len(spec) == 2: name, action = spec
271 else: name, action = spec, None
272 meth = FakeMethod(name, action, self.handle)
273 setattr(self.__class__, name, meth)
274 def handle(self, fn_name, action, *args, **kwds):
275 self.parent.calls.append((self, fn_name, args, kwds))
276 if action is None:
277 return None
278 elif action == "return self":
279 return self
280 elif action == "return response":
281 res = MockResponse(200, "OK", {}, "")
282 return res
283 elif action == "return request":
284 return Request("http://blah/")
285 elif action.startswith("error"):
286 code = action[action.rfind(" ")+1:]
287 try:
288 code = int(code)
289 except ValueError:
290 pass
291 res = MockResponse(200, "OK", {}, "")
292 return self.parent.error("http", args[0], res, code, "", {})
293 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000295 assert False
296 def close(self): pass
297 def add_parent(self, parent):
298 self.parent = parent
299 self.parent.calls = []
300 def __lt__(self, other):
301 if not hasattr(other, "handler_order"):
302 # No handler_order, leave in original order. Yuck.
303 return True
304 return self.handler_order < other.handler_order
305
306def add_ordered_mock_handlers(opener, meth_spec):
307 """Create MockHandlers and add them to an OpenerDirector.
308
309 meth_spec: list of lists of tuples and strings defining methods to define
310 on handlers. eg:
311
312 [["http_error", "ftp_open"], ["http_open"]]
313
314 defines methods .http_error() and .ftp_open() on one handler, and
315 .http_open() on another. These methods just record their arguments and
316 return None. Using a tuple instead of a string causes the method to
317 perform some action (see MockHandler.handle()), eg:
318
319 [["http_error"], [("http_open", "return request")]]
320
321 defines .http_error() on one handler (which simply returns None), and
322 .http_open() on another handler, which returns a Request object.
323
324 """
325 handlers = []
326 count = 0
327 for meths in meth_spec:
328 class MockHandlerSubclass(MockHandler): pass
329 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000330 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000331 h.add_parent(opener)
332 count = count + 1
333 handlers.append(h)
334 opener.add_handler(h)
335 return handlers
336
Thomas Wouters477c8d52006-05-27 19:21:47 +0000337def build_test_opener(*handler_instances):
338 opener = OpenerDirector()
339 for h in handler_instances:
340 opener.add_handler(h)
341 return opener
342
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000343class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000344 # useful for testing redirections and auth
345 # sends supplied headers and code as first response
346 # sends 200 OK as second response
347 def __init__(self, code, headers):
348 self.code = code
349 self.headers = headers
350 self.reset()
351 def reset(self):
352 self._count = 0
353 self.requests = []
354 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000355 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000356 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000357 self.requests.append(copy.deepcopy(req))
358 if self._count == 0:
359 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000360 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000361 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000362 return self.parent.error(
363 "http", req, MockFile(), self.code, name, msg)
364 else:
365 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000366 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000367 return MockResponse(200, "OK", msg, "", req.get_full_url())
368
369class MockPasswordManager:
370 def add_password(self, realm, uri, user, password):
371 self.realm = realm
372 self.url = uri
373 self.user = user
374 self.password = password
375 def find_user_password(self, realm, authuri):
376 self.target_realm = realm
377 self.target_url = authuri
378 return self.user, self.password
379
380
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381class OpenerDirectorTests(unittest.TestCase):
382
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000383 def test_add_non_handler(self):
384 class NonHandler(object):
385 pass
386 self.assertRaises(TypeError,
387 OpenerDirector().add_handler, NonHandler())
388
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000389 def test_badly_named_methods(self):
390 # test work-around for three methods that accidentally follow the
391 # naming conventions for handler methods
392 # (*_open() / *_request() / *_response())
393
394 # These used to call the accidentally-named methods, causing a
395 # TypeError in real code; here, returning self from these mock
396 # methods would either cause no exception, or AttributeError.
397
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000399
400 o = OpenerDirector()
401 meth_spec = [
402 [("do_open", "return self"), ("proxy_open", "return self")],
403 [("redirect_request", "return self")],
404 ]
405 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000406 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000407 for scheme in "do", "proxy", "redirect":
408 self.assertRaises(URLError, o.open, scheme+"://example.com/")
409
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000410 def test_handled(self):
411 # handler returning non-None means no more handlers will be called
412 o = OpenerDirector()
413 meth_spec = [
414 ["http_open", "ftp_open", "http_error_302"],
415 ["ftp_open"],
416 [("http_open", "return self")],
417 [("http_open", "return self")],
418 ]
419 handlers = add_ordered_mock_handlers(o, meth_spec)
420
421 req = Request("http://example.com/")
422 r = o.open(req)
423 # Second .http_open() gets called, third doesn't, since second returned
424 # non-None. Handlers without .http_open() never get any methods called
425 # on them.
426 # In fact, second mock handler defining .http_open() returns self
427 # (instead of response), which becomes the OpenerDirector's return
428 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000429 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000430 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
431 for expected, got in zip(calls, o.calls):
432 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000433 self.assertEqual((handler, name), expected)
434 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000435
436 def test_handler_order(self):
437 o = OpenerDirector()
438 handlers = []
439 for meths, handler_order in [
440 ([("http_open", "return self")], 500),
441 (["http_open"], 0),
442 ]:
443 class MockHandlerSubclass(MockHandler): pass
444 h = MockHandlerSubclass(meths)
445 h.handler_order = handler_order
446 handlers.append(h)
447 o.add_handler(h)
448
449 r = o.open("http://example.com/")
450 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000451 self.assertEqual(o.calls[0][0], handlers[1])
452 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000453
454 def test_raise(self):
455 # raising URLError stops processing of request
456 o = OpenerDirector()
457 meth_spec = [
458 [("http_open", "raise")],
459 [("http_open", "return self")],
460 ]
461 handlers = add_ordered_mock_handlers(o, meth_spec)
462
463 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000464 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000465 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000466
467## def test_error(self):
468## # XXX this doesn't actually seem to be used in standard library,
469## # but should really be tested anyway...
470
471 def test_http_error(self):
472 # XXX http_error_default
473 # http errors are a special case
474 o = OpenerDirector()
475 meth_spec = [
476 [("http_open", "error 302")],
477 [("http_error_400", "raise"), "http_open"],
478 [("http_error_302", "return response"), "http_error_303",
479 "http_error"],
480 [("http_error_302")],
481 ]
482 handlers = add_ordered_mock_handlers(o, meth_spec)
483
484 class Unknown:
485 def __eq__(self, other): return True
486
487 req = Request("http://example.com/")
488 r = o.open(req)
489 assert len(o.calls) == 2
490 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000491 (handlers[2], "http_error_302",
492 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000493 for expected, got in zip(calls, o.calls):
494 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000495 self.assertEqual((handler, method_name), got[:2])
496 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000497
498 def test_processors(self):
499 # *_request / *_response methods get called appropriately
500 o = OpenerDirector()
501 meth_spec = [
502 [("http_request", "return request"),
503 ("http_response", "return response")],
504 [("http_request", "return request"),
505 ("http_response", "return response")],
506 ]
507 handlers = add_ordered_mock_handlers(o, meth_spec)
508
509 req = Request("http://example.com/")
510 r = o.open(req)
511 # processor methods are called on *all* handlers that define them,
512 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000513 calls = [
514 (handlers[0], "http_request"), (handlers[1], "http_request"),
515 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000516
517 for i, (handler, name, args, kwds) in enumerate(o.calls):
518 if i < 2:
519 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000520 self.assertEqual((handler, name), calls[i])
521 self.assertEqual(len(args), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000522 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000523 else:
524 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000525 self.assertEqual((handler, name), calls[i])
526 self.assertEqual(len(args), 2)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000527 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000528 # response from opener.open is None, because there's no
529 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000530 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000531 isinstance(args[1], MockResponse))
532
533
Tim Peters58eb11c2004-01-18 20:29:55 +0000534def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000535 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000536 if os.name == "nt" and urlpath.startswith("///"):
537 urlpath = urlpath[2:]
538 # XXX don't ask me about the mac...
539 return urlpath
540
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000541class HandlerTests(unittest.TestCase):
542
543 def test_ftp(self):
544 class MockFTPWrapper:
545 def __init__(self, data): self.data = data
546 def retrfile(self, filename, filetype):
547 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000548 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000549
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000550 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000551 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000552 def connect_ftp(self, user, passwd, host, port, dirs,
553 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 self.user, self.passwd = user, passwd
555 self.host, self.port = host, port
556 self.dirs = dirs
557 self.ftpwrapper = MockFTPWrapper(self.data)
558 return self.ftpwrapper
559
Georg Brandlf78e02b2008-06-10 17:40:04 +0000560 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000561 data = "rheum rhaponicum"
562 h = NullFTPHandler(data)
563 o = h.parent = MockOpener()
564
565 for url, host, port, type_, dirs, filename, mimetype in [
566 ("ftp://localhost/foo/bar/baz.html",
567 "localhost", ftplib.FTP_PORT, "I",
568 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000569 ("ftp://localhost:80/foo/bar/",
570 "localhost", 80, "D",
571 ["foo", "bar"], "", None),
572 ("ftp://localhost/baz.gif;type=a",
573 "localhost", ftplib.FTP_PORT, "A",
574 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000575 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000576 req = Request(url)
577 req.timeout = None
578 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000579 # ftp authentication not yet implemented by FTPHandler
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000580 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual(h.host, socket.gethostbyname(host))
582 self.assertEqual(h.port, port)
583 self.assertEqual(h.dirs, dirs)
584 self.assertEqual(h.ftpwrapper.filename, filename)
585 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000586 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000587 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000588 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000589
590 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000591 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000592 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 o = h.parent = MockOpener()
594
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000595 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000596 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000597 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000598 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000599 "file://localhost%s" % urlpath,
600 "file://%s" % urlpath,
601 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000602 ]
603 try:
604 localaddr = socket.gethostbyname(socket.gethostname())
605 except socket.gaierror:
606 localaddr = ''
607 if localaddr:
608 urls.append("file://%s%s" % (localaddr, urlpath))
609
610 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000611 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000612 try:
613 try:
614 f.write(towrite)
615 finally:
616 f.close()
617
618 r = h.file_open(Request(url))
619 try:
620 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000621 headers = r.info()
622 newurl = r.geturl()
623 finally:
624 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000625 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000626 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000627 finally:
628 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000629 self.assertEqual(data, towrite)
630 self.assertEqual(headers["Content-type"], "text/plain")
631 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000632 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000633
634 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000635 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636 "file:///file_does_not_exist.txt",
637 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
638 os.getcwd(), TESTFN),
639 "file://somerandomhost.ontheinternet.com%s/%s" %
640 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 ]:
642 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000643 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000644 try:
645 f.write(towrite)
646 finally:
647 f.close()
648
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000649 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000650 h.file_open, Request(url))
651 finally:
652 os.remove(TESTFN)
653
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000655 o = h.parent = MockOpener()
656 # XXXX why does // mean ftp (and /// mean not ftp!), and where
657 # is file: scheme specified? I think this is really a bug, and
658 # what was intended was to distinguish between URLs like:
659 # file:/blah.txt (a file)
660 # file://localhost/blah.txt (a file)
661 # file:///blah.txt (a file)
662 # file://ftp.example.com/blah.txt (an ftp URL)
663 for url, ftp in [
664 ("file://ftp.example.com//foo.txt", True),
665 ("file://ftp.example.com///foo.txt", False),
666# XXXX bug: fails with OSError, should be URLError
667 ("file://ftp.example.com/foo.txt", False),
668 ]:
669 req = Request(url)
670 try:
671 h.file_open(req)
672 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000673 except (urllib.error.URLError, OSError):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000674 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000675 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000676 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000677 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000678
679 def test_http(self):
Guido van Rossum700bd922007-08-27 18:10:06 +0000680 class MockHTTPResponse(io.IOBase):
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000681 def __init__(self, fp, msg, status, reason):
682 self.fp = fp
683 self.msg = msg
684 self.status = status
685 self.reason = reason
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000686 self.code = 200
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000687 def read(self):
688 return ''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000689 def info(self):
690 return {}
691 def geturl(self):
692 return self.url
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000693 class MockHTTPClass:
694 def __init__(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000695 self.level = 0
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696 self.req_headers = []
697 self.data = None
698 self.raise_on_endheaders = False
Georg Brandlf78e02b2008-06-10 17:40:04 +0000699 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000700 self.host = host
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000701 self.timeout = timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000702 return self
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000703 def set_debuglevel(self, level):
704 self.level = level
705 def request(self, method, url, body=None, headers={}):
706 self.method = method
707 self.selector = url
708 self.req_headers += headers.items()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000709 self.req_headers.sort()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000710 if body:
711 self.data = body
712 if self.raise_on_endheaders:
713 import socket
714 raise socket.error()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000715 def getresponse(self):
716 return MockHTTPResponse(MockFile(), {}, 200, "OK")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000717
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000718 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000719 o = h.parent = MockOpener()
720
721 url = "http://example.com/"
722 for method, data in [("GET", None), ("POST", "blah")]:
723 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000724 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000725 req.add_unredirected_header("Spam", "eggs")
726 http = MockHTTPClass()
727 r = h.do_open(http, req)
728
729 # result attributes
730 r.read; r.readline # wrapped MockFile methods
731 r.info; r.geturl # addinfourl methods
732 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
733 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000734 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000735 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000736
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000737 self.assertEqual(http.host, "example.com")
738 self.assertEqual(http.level, 0)
739 self.assertEqual(http.method, method)
740 self.assertEqual(http.selector, "/")
741 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000742 [("Connection", "close"),
743 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000744 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000745
746 # check socket.error converted to URLError
747 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000748 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000749
750 # check adding of standard headers
751 o.addheaders = [("Spam", "eggs")]
752 for data in "", None: # POST, GET
753 req = Request("http://example.com/", data)
754 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000755 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000756 if data is None: # GET
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000757 self.assertTrue("Content-length" not in req.unredirected_hdrs)
758 self.assertTrue("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000759 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000760 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
761 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000762 "application/x-www-form-urlencoded")
763 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
765 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000766
767 # don't clobber existing headers
768 req.add_unredirected_header("Content-length", "foo")
769 req.add_unredirected_header("Content-type", "bar")
770 req.add_unredirected_header("Host", "baz")
771 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000772 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000773 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
774 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000775 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
776 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000777
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000778 def test_http_doubleslash(self):
779 # Checks the presence of any unnecessary double slash in url does not
780 # break anything. Previously, a double slash directly after the host
781 # could could cause incorrect parsing.
782 h = urllib.request.AbstractHTTPHandler()
783 o = h.parent = MockOpener()
784
785 data = ""
786 ds_urls = [
787 "http://example.com/foo/bar/baz.html",
788 "http://example.com//foo/bar/baz.html",
789 "http://example.com/foo//bar/baz.html",
790 "http://example.com/foo/bar//baz.html"
791 ]
792
793 for ds_url in ds_urls:
794 ds_req = Request(ds_url, data)
795
796 # Check whether host is determined correctly if there is no proxy
797 np_ds_req = h.do_request_(ds_req)
798 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
799
800 # Check whether host is determined correctly if there is a proxy
801 ds_req.set_proxy("someproxy:3128",None)
802 p_ds_req = h.do_request_(ds_req)
803 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
804
805
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808 o = h.parent = MockOpener()
809
810 url = "http://example.com/"
811 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000813 r = MockResponse(200, "OK", {}, "", url)
814 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000815 self.assertTrue(r is newr)
816 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 r = MockResponse(202, "Accepted", {}, "", url)
818 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000819 self.assertTrue(r is newr)
820 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 r = MockResponse(206, "Partial content", {}, "", url)
822 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000823 self.assertTrue(r is newr)
824 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000825 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000827 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000828 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000830
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000831 def test_cookies(self):
832 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000833 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000834 o = h.parent = MockOpener()
835
836 req = Request("http://example.com/")
837 r = MockResponse(200, "OK", {}, "")
838 newreq = h.http_request(req)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000839 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000840 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000841 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000842 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000843 self.assertTrue(cj.ec_req is req)
844 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000845
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000846 def test_redirect(self):
847 from_url = "http://example.com/a.html"
848 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000849 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000850 o = h.parent = MockOpener()
851
852 # ordinary redirect behaviour
853 for code in 301, 302, 303, 307:
854 for data in None, "blah\nblah\n":
855 method = getattr(h, "http_error_%s" % code)
856 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000857 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000858 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000859 if data is not None:
860 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000861 req.add_unredirected_header("Spam", "spam")
862 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000863 method(req, MockFile(), code, "Blah",
864 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000865 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000866 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000867 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000868 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000869 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000870 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000871 except AttributeError:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000872 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000873
874 # now it's a GET, there should not be headers regarding content
875 # (possibly dragged from before being a POST)
876 headers = [x.lower() for x in o.req.headers]
877 self.assertTrue("content-length" not in headers)
878 self.assertTrue("content-type" not in headers)
879
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000880 self.assertEqual(o.req.headers["Nonsense"],
881 "viking=withhold")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000882 self.assertTrue("Spam" not in o.req.headers)
883 self.assertTrue("Spam" not in o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000884
885 # loop detection
886 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000887 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000888 def redirect(h, req, url=to_url):
889 h.http_error_302(req, MockFile(), 302, "Blah",
890 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000891 # Note that the *original* request shares the same record of
892 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000893
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000894 # detect infinite loop redirect of a URL to itself
895 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000896 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000897 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000898 try:
899 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000900 redirect(h, req, "http://example.com/")
901 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000902 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000903 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000904 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000905
906 # detect endless non-repeating chain of redirects
907 req = Request(from_url, origin_req_host="example.com")
908 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000909 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000910 try:
911 while 1:
912 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000913 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000914 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000915 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000916 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000917
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000918 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000919 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000920 from http.cookiejar import CookieJar
921 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000922
923 cj = CookieJar()
924 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000925 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 hdeh = urllib.request.HTTPDefaultErrorHandler()
927 hrh = urllib.request.HTTPRedirectHandler()
928 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000929 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000930 o.open("http://www.example.com/")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000931 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000932
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000933 def test_proxy(self):
934 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000935 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000936 o.add_handler(ph)
937 meth_spec = [
938 [("http_open", "return response")]
939 ]
940 handlers = add_ordered_mock_handlers(o, meth_spec)
941
942 req = Request("http://acme.example.com/")
943 self.assertEqual(req.get_host(), "acme.example.com")
944 r = o.open(req)
945 self.assertEqual(req.get_host(), "proxy.example.com:3128")
946
947 self.assertEqual([(handlers[0], "http_open")],
948 [tup[0:2] for tup in o.calls])
949
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000950 def test_proxy_https(self):
951 o = OpenerDirector()
952 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
953 o.add_handler(ph)
954 meth_spec = [
955 [("https_open", "return response")]
956 ]
957 handlers = add_ordered_mock_handlers(o, meth_spec)
958
959 req = Request("https://www.example.com/")
960 self.assertEqual(req.get_host(), "www.example.com")
961 r = o.open(req)
962 self.assertEqual(req.get_host(), "proxy.example.com:3128")
963 self.assertEqual([(handlers[0], "https_open")],
964 [tup[0:2] for tup in o.calls])
965
966
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000967 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000968 opener = OpenerDirector()
969 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000970 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000971 realm = "ACME Widget Store"
972 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000973 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
974 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000975 opener.add_handler(auth_handler)
976 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000977 self._test_basic_auth(opener, auth_handler, "Authorization",
978 realm, http_handler, password_manager,
979 "http://acme.example.com/protected",
980 "http://acme.example.com/protected",
981 )
982
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000983 def test_basic_auth_with_single_quoted_realm(self):
984 self.test_basic_auth(quote_char="'")
985
Thomas Wouters477c8d52006-05-27 19:21:47 +0000986 def test_proxy_basic_auth(self):
987 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000988 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000989 opener.add_handler(ph)
990 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000991 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000992 realm = "ACME Networks"
993 http_handler = MockHTTPHandler(
994 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000995 opener.add_handler(auth_handler)
996 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000997 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +0000998 realm, http_handler, password_manager,
999 "http://acme.example.com:3128/protected",
1000 "proxy.example.com:3128",
1001 )
1002
1003 def test_basic_and_digest_auth_handlers(self):
1004 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1005 # response (http://python.org/sf/1479302), where it should instead
1006 # return None to allow another handler (especially
1007 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001008
1009 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1010 # try digest first (since it's the strongest auth scheme), so we record
1011 # order of calls here to check digest comes first:
1012 class RecordingOpenerDirector(OpenerDirector):
1013 def __init__(self):
1014 OpenerDirector.__init__(self)
1015 self.recorded = []
1016 def record(self, info):
1017 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001018 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001019 def http_error_401(self, *args, **kwds):
1020 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001022 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001024 def http_error_401(self, *args, **kwds):
1025 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001026 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001027 *args, **kwds)
1028
1029 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001030 password_manager = MockPasswordManager()
1031 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001032 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001033 realm = "ACME Networks"
1034 http_handler = MockHTTPHandler(
1035 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001036 opener.add_handler(basic_handler)
1037 opener.add_handler(digest_handler)
1038 opener.add_handler(http_handler)
1039
1040 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001041 self._test_basic_auth(opener, basic_handler, "Authorization",
1042 realm, http_handler, password_manager,
1043 "http://acme.example.com/protected",
1044 "http://acme.example.com/protected",
1045 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001046 # check digest was tried before basic (twice, because
1047 # _test_basic_auth called .open() twice)
1048 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049
1050 def _test_basic_auth(self, opener, auth_handler, auth_header,
1051 realm, http_handler, password_manager,
1052 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001053 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055
1056 # .add_password() fed through to password manager
1057 auth_handler.add_password(realm, request_url, user, password)
1058 self.assertEqual(realm, password_manager.realm)
1059 self.assertEqual(request_url, password_manager.url)
1060 self.assertEqual(user, password_manager.user)
1061 self.assertEqual(password, password_manager.password)
1062
1063 r = opener.open(request_url)
1064
1065 # should have asked the password manager for the username/password
1066 self.assertEqual(password_manager.target_realm, realm)
1067 self.assertEqual(password_manager.target_url, protected_url)
1068
1069 # expect one request without authorization, then one with
1070 self.assertEqual(len(http_handler.requests), 2)
1071 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001072 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001073 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001074 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1076 auth_hdr_value)
1077
1078 # if the password manager can't find a password, the handler won't
1079 # handle the HTTP auth error
1080 password_manager.user = password_manager.password = None
1081 http_handler.reset()
1082 r = opener.open(request_url)
1083 self.assertEqual(len(http_handler.requests), 1)
1084 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1085
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001086
1087class MiscTests(unittest.TestCase):
1088
1089 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1091 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001092 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001093 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001094 def bar_open(self): pass
1095
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001097
1098 o = build_opener(FooHandler, BarHandler)
1099 self.opener_has_handler(o, FooHandler)
1100 self.opener_has_handler(o, BarHandler)
1101
1102 # can take a mix of classes and instances
1103 o = build_opener(FooHandler, BarHandler())
1104 self.opener_has_handler(o, FooHandler)
1105 self.opener_has_handler(o, BarHandler)
1106
1107 # subclasses of default handlers override default handlers
1108 o = build_opener(MyHTTPHandler)
1109 self.opener_has_handler(o, MyHTTPHandler)
1110
1111 # a particular case of overriding: default handlers can be passed
1112 # in explicitly
1113 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001114 self.opener_has_handler(o, urllib.request.HTTPHandler)
1115 o = build_opener(urllib.request.HTTPHandler)
1116 self.opener_has_handler(o, urllib.request.HTTPHandler)
1117 o = build_opener(urllib.request.HTTPHandler())
1118 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001119
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001120 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001121 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001122 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1123 self.opener_has_handler(o, MyHTTPHandler)
1124 self.opener_has_handler(o, MyOtherHTTPHandler)
1125
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001126 def opener_has_handler(self, opener, handler_class):
1127 for h in opener.handlers:
1128 if h.__class__ == handler_class:
1129 break
1130 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001131 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001132
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001133class RequestTests(unittest.TestCase):
1134
1135 def setUp(self):
1136 self.get = Request("http://www.python.org/~jeremy/")
1137 self.post = Request("http://www.python.org/~jeremy/",
1138 "data",
1139 headers={"X-Test": "test"})
1140
1141 def test_method(self):
1142 self.assertEqual("POST", self.post.get_method())
1143 self.assertEqual("GET", self.get.get_method())
1144
1145 def test_add_data(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001146 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001147 self.assertEqual("GET", self.get.get_method())
1148 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001149 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001150 self.assertEqual("POST", self.get.get_method())
1151
1152 def test_get_full_url(self):
1153 self.assertEqual("http://www.python.org/~jeremy/",
1154 self.get.get_full_url())
1155
1156 def test_selector(self):
1157 self.assertEqual("/~jeremy/", self.get.get_selector())
1158 req = Request("http://www.python.org/")
1159 self.assertEqual("/", req.get_selector())
1160
1161 def test_get_type(self):
1162 self.assertEqual("http", self.get.get_type())
1163
1164 def test_get_host(self):
1165 self.assertEqual("www.python.org", self.get.get_host())
1166
1167 def test_get_host_unquote(self):
1168 req = Request("http://www.%70ython.org/")
1169 self.assertEqual("www.python.org", req.get_host())
1170
1171 def test_proxy(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001172 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001173 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001174 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001175 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1176 self.assertEqual("www.perl.org", self.get.get_host())
1177
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001178
1179def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001180 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001181 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001182 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001183 tests = (TrivialTests,
1184 OpenerDirectorTests,
1185 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001186 MiscTests,
1187 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001188 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001189
1190if __name__ == "__main__":
1191 test_main(verbose=True)