blob: f1c478c2debc316bed0e59cb267c5654ed8d24c8 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000024 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000030
31 file_url = "file://%s" % fname
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000033
34 buf = f.read()
35 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000036
Georg Brandle1b13d22005-08-24 22:20:32 +000037 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038 tests = [
39 ('a,b,c', ['a', 'b', 'c']),
40 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
41 ('a, b, "c", "d", "e,f", g, h',
42 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
43 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
44 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000045 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000047
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000048
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000049def test_request_headers_dict():
50 """
51 The Request.headers dictionary is not a documented interface. It should
52 stay that way, because the complete set of headers are only accessible
53 through the .get_header(), .has_header(), .header_items() interface.
54 However, .headers pre-dates those methods, and so real code will be using
55 the dictionary.
56
57 The introduction in 2.4 of those methods was a mistake for the same reason:
58 code that previously saw all (urllib2 user)-provided headers in .headers
59 now sees only a subset (and the function interface is ugly and incomplete).
60 A better change would have been to replace .headers dict with a dict
61 subclass (or UserDict.DictMixin instance?) that preserved the .headers
62 interface and also provided access to the "unredirected" headers. It's
63 probably too late to fix that, though.
64
65
66 Check .capitalize() case normalization:
67
68 >>> url = "http://example.com"
69 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
70 'blah'
71 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
72 'blah'
73
74 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
75 but that could be changed in future.
76
77 """
78
79def test_request_headers_methods():
80 """
81 Note the case normalization of header names here, to .capitalize()-case.
82 This should be preserved for backwards-compatibility. (In the HTTP case,
83 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000084 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000085
86 >>> url = "http://example.com"
87 >>> r = Request(url, headers={"Spam-eggs": "blah"})
88 >>> r.has_header("Spam-eggs")
89 True
90 >>> r.header_items()
91 [('Spam-eggs', 'blah')]
92 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000093 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000094 >>> items
95 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
96
97 Note that e.g. r.has_header("spam-EggS") is currently False, and
98 r.get_header("spam-EggS") returns None, but that could be changed in
99 future.
100
101 >>> r.has_header("Not-there")
102 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000103 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104 None
105 >>> r.get_header("Not-there", "default")
106 'default'
107
108 """
109
110
Thomas Wouters477c8d52006-05-27 19:21:47 +0000111def test_password_manager(self):
112 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114 >>> add = mgr.add_password
115 >>> add("Some Realm", "http://example.com/", "joe", "password")
116 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
117 >>> add("c", "http://example.com/foo", "foo", "ni")
118 >>> add("c", "http://example.com/bar", "bar", "nini")
119 >>> add("b", "http://example.com/", "first", "blah")
120 >>> add("b", "http://example.com/", "second", "spam")
121 >>> add("a", "http://example.com", "1", "a")
122 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
123 >>> add("Some Realm", "d.example.com", "4", "d")
124 >>> add("Some Realm", "e.example.com:3128", "5", "e")
125
126 >>> mgr.find_user_password("Some Realm", "example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("c", "http://example.com/foo")
137 ('foo', 'ni')
138 >>> mgr.find_user_password("c", "http://example.com/bar")
139 ('bar', 'nini')
140
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 Actually, this is really undefined ATM
142## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
145## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
147 Use latest add_password() in case of conflict:
148
149 >>> mgr.find_user_password("b", "http://example.com/")
150 ('second', 'spam')
151
152 No special relationship between a.example.com and example.com:
153
154 >>> mgr.find_user_password("a", "http://example.com/")
155 ('1', 'a')
156 >>> mgr.find_user_password("a", "http://a.example.com/")
157 (None, None)
158
159 Ports:
160
161 >>> mgr.find_user_password("Some Realm", "c.example.com")
162 (None, None)
163 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "d.example.com")
168 ('4', 'd')
169 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
170 ('5', 'e')
171
172 """
173 pass
174
175
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000176def test_password_manager_default_port(self):
177 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000178 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179 >>> add = mgr.add_password
180
181 The point to note here is that we can't guess the default port if there's
182 no scheme. This applies to both add_password and find_user_password.
183
184 >>> add("f", "http://g.example.com:80", "10", "j")
185 >>> add("g", "http://h.example.com", "11", "k")
186 >>> add("h", "i.example.com:80", "12", "l")
187 >>> add("i", "j.example.com", "13", "m")
188 >>> mgr.find_user_password("f", "g.example.com:100")
189 (None, None)
190 >>> mgr.find_user_password("f", "g.example.com:80")
191 ('10', 'j')
192 >>> mgr.find_user_password("f", "g.example.com")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:100")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:80")
197 ('10', 'j')
198 >>> mgr.find_user_password("f", "http://g.example.com")
199 ('10', 'j')
200 >>> mgr.find_user_password("g", "h.example.com")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "http://h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("h", "i.example.com")
207 (None, None)
208 >>> mgr.find_user_password("h", "i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("h", "http://i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("i", "j.example.com")
213 ('13', 'm')
214 >>> mgr.find_user_password("i", "j.example.com:80")
215 (None, None)
216 >>> mgr.find_user_password("i", "http://j.example.com")
217 ('13', 'm')
218 >>> mgr.find_user_password("i", "http://j.example.com:80")
219 (None, None)
220
221 """
222
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000223class MockOpener:
224 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000225 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
226 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000227 def error(self, proto, *args):
228 self.proto, self.args = proto, args
229
230class MockFile:
231 def read(self, count=None): pass
232 def readline(self, count=None): pass
233 def close(self): pass
234
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000235class MockHeaders(dict):
236 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000237 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238
Guido van Rossum34d19282007-08-09 01:03:29 +0000239class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000241 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242 self.code, self.msg, self.headers, self.url = code, msg, headers, url
243 def info(self):
244 return self.headers
245 def geturl(self):
246 return self.url
247
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000248class MockCookieJar:
249 def add_cookie_header(self, request):
250 self.ach_req = request
251 def extract_cookies(self, response, request):
252 self.ec_req, self.ec_r = request, response
253
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000254class FakeMethod:
255 def __init__(self, meth_name, action, handle):
256 self.meth_name = meth_name
257 self.handle = handle
258 self.action = action
259 def __call__(self, *args):
260 return self.handle(self.meth_name, self.action, *args)
261
262class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 # useful for testing handler machinery
264 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000265 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000266 def __init__(self, methods):
267 self._define_methods(methods)
268 def _define_methods(self, methods):
269 for spec in methods:
270 if len(spec) == 2: name, action = spec
271 else: name, action = spec, None
272 meth = FakeMethod(name, action, self.handle)
273 setattr(self.__class__, name, meth)
274 def handle(self, fn_name, action, *args, **kwds):
275 self.parent.calls.append((self, fn_name, args, kwds))
276 if action is None:
277 return None
278 elif action == "return self":
279 return self
280 elif action == "return response":
281 res = MockResponse(200, "OK", {}, "")
282 return res
283 elif action == "return request":
284 return Request("http://blah/")
285 elif action.startswith("error"):
286 code = action[action.rfind(" ")+1:]
287 try:
288 code = int(code)
289 except ValueError:
290 pass
291 res = MockResponse(200, "OK", {}, "")
292 return self.parent.error("http", args[0], res, code, "", {})
293 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000295 assert False
296 def close(self): pass
297 def add_parent(self, parent):
298 self.parent = parent
299 self.parent.calls = []
300 def __lt__(self, other):
301 if not hasattr(other, "handler_order"):
302 # No handler_order, leave in original order. Yuck.
303 return True
304 return self.handler_order < other.handler_order
305
306def add_ordered_mock_handlers(opener, meth_spec):
307 """Create MockHandlers and add them to an OpenerDirector.
308
309 meth_spec: list of lists of tuples and strings defining methods to define
310 on handlers. eg:
311
312 [["http_error", "ftp_open"], ["http_open"]]
313
314 defines methods .http_error() and .ftp_open() on one handler, and
315 .http_open() on another. These methods just record their arguments and
316 return None. Using a tuple instead of a string causes the method to
317 perform some action (see MockHandler.handle()), eg:
318
319 [["http_error"], [("http_open", "return request")]]
320
321 defines .http_error() on one handler (which simply returns None), and
322 .http_open() on another handler, which returns a Request object.
323
324 """
325 handlers = []
326 count = 0
327 for meths in meth_spec:
328 class MockHandlerSubclass(MockHandler): pass
329 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000330 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000331 h.add_parent(opener)
332 count = count + 1
333 handlers.append(h)
334 opener.add_handler(h)
335 return handlers
336
Thomas Wouters477c8d52006-05-27 19:21:47 +0000337def build_test_opener(*handler_instances):
338 opener = OpenerDirector()
339 for h in handler_instances:
340 opener.add_handler(h)
341 return opener
342
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000343class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000344 # useful for testing redirections and auth
345 # sends supplied headers and code as first response
346 # sends 200 OK as second response
347 def __init__(self, code, headers):
348 self.code = code
349 self.headers = headers
350 self.reset()
351 def reset(self):
352 self._count = 0
353 self.requests = []
354 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000355 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000356 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000357 self.requests.append(copy.deepcopy(req))
358 if self._count == 0:
359 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000360 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000361 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000362 return self.parent.error(
363 "http", req, MockFile(), self.code, name, msg)
364 else:
365 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000366 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000367 return MockResponse(200, "OK", msg, "", req.get_full_url())
368
369class MockPasswordManager:
370 def add_password(self, realm, uri, user, password):
371 self.realm = realm
372 self.url = uri
373 self.user = user
374 self.password = password
375 def find_user_password(self, realm, authuri):
376 self.target_realm = realm
377 self.target_url = authuri
378 return self.user, self.password
379
380
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000381class OpenerDirectorTests(unittest.TestCase):
382
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000383 def test_add_non_handler(self):
384 class NonHandler(object):
385 pass
386 self.assertRaises(TypeError,
387 OpenerDirector().add_handler, NonHandler())
388
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000389 def test_badly_named_methods(self):
390 # test work-around for three methods that accidentally follow the
391 # naming conventions for handler methods
392 # (*_open() / *_request() / *_response())
393
394 # These used to call the accidentally-named methods, causing a
395 # TypeError in real code; here, returning self from these mock
396 # methods would either cause no exception, or AttributeError.
397
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000399
400 o = OpenerDirector()
401 meth_spec = [
402 [("do_open", "return self"), ("proxy_open", "return self")],
403 [("redirect_request", "return self")],
404 ]
405 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000406 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000407 for scheme in "do", "proxy", "redirect":
408 self.assertRaises(URLError, o.open, scheme+"://example.com/")
409
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000410 def test_handled(self):
411 # handler returning non-None means no more handlers will be called
412 o = OpenerDirector()
413 meth_spec = [
414 ["http_open", "ftp_open", "http_error_302"],
415 ["ftp_open"],
416 [("http_open", "return self")],
417 [("http_open", "return self")],
418 ]
419 handlers = add_ordered_mock_handlers(o, meth_spec)
420
421 req = Request("http://example.com/")
422 r = o.open(req)
423 # Second .http_open() gets called, third doesn't, since second returned
424 # non-None. Handlers without .http_open() never get any methods called
425 # on them.
426 # In fact, second mock handler defining .http_open() returns self
427 # (instead of response), which becomes the OpenerDirector's return
428 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000429 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000430 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
431 for expected, got in zip(calls, o.calls):
432 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000433 self.assertEqual((handler, name), expected)
434 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000435
436 def test_handler_order(self):
437 o = OpenerDirector()
438 handlers = []
439 for meths, handler_order in [
440 ([("http_open", "return self")], 500),
441 (["http_open"], 0),
442 ]:
443 class MockHandlerSubclass(MockHandler): pass
444 h = MockHandlerSubclass(meths)
445 h.handler_order = handler_order
446 handlers.append(h)
447 o.add_handler(h)
448
449 r = o.open("http://example.com/")
450 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000451 self.assertEqual(o.calls[0][0], handlers[1])
452 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000453
454 def test_raise(self):
455 # raising URLError stops processing of request
456 o = OpenerDirector()
457 meth_spec = [
458 [("http_open", "raise")],
459 [("http_open", "return self")],
460 ]
461 handlers = add_ordered_mock_handlers(o, meth_spec)
462
463 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000464 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000465 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000466
467## def test_error(self):
468## # XXX this doesn't actually seem to be used in standard library,
469## # but should really be tested anyway...
470
471 def test_http_error(self):
472 # XXX http_error_default
473 # http errors are a special case
474 o = OpenerDirector()
475 meth_spec = [
476 [("http_open", "error 302")],
477 [("http_error_400", "raise"), "http_open"],
478 [("http_error_302", "return response"), "http_error_303",
479 "http_error"],
480 [("http_error_302")],
481 ]
482 handlers = add_ordered_mock_handlers(o, meth_spec)
483
484 class Unknown:
485 def __eq__(self, other): return True
486
487 req = Request("http://example.com/")
488 r = o.open(req)
489 assert len(o.calls) == 2
490 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000491 (handlers[2], "http_error_302",
492 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000493 for expected, got in zip(calls, o.calls):
494 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000495 self.assertEqual((handler, method_name), got[:2])
496 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000497
498 def test_processors(self):
499 # *_request / *_response methods get called appropriately
500 o = OpenerDirector()
501 meth_spec = [
502 [("http_request", "return request"),
503 ("http_response", "return response")],
504 [("http_request", "return request"),
505 ("http_response", "return response")],
506 ]
507 handlers = add_ordered_mock_handlers(o, meth_spec)
508
509 req = Request("http://example.com/")
510 r = o.open(req)
511 # processor methods are called on *all* handlers that define them,
512 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000513 calls = [
514 (handlers[0], "http_request"), (handlers[1], "http_request"),
515 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000516
517 for i, (handler, name, args, kwds) in enumerate(o.calls):
518 if i < 2:
519 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000520 self.assertEqual((handler, name), calls[i])
521 self.assertEqual(len(args), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000522 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000523 else:
524 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000525 self.assertEqual((handler, name), calls[i])
526 self.assertEqual(len(args), 2)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000527 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000528 # response from opener.open is None, because there's no
529 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000530 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000531 isinstance(args[1], MockResponse))
532
533
Tim Peters58eb11c2004-01-18 20:29:55 +0000534def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000535 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000536 if os.name == "nt" and urlpath.startswith("///"):
537 urlpath = urlpath[2:]
538 # XXX don't ask me about the mac...
539 return urlpath
540
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000541class HandlerTests(unittest.TestCase):
542
543 def test_ftp(self):
544 class MockFTPWrapper:
545 def __init__(self, data): self.data = data
546 def retrfile(self, filename, filetype):
547 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000548 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000549
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000550 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000551 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000552 def connect_ftp(self, user, passwd, host, port, dirs,
553 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000554 self.user, self.passwd = user, passwd
555 self.host, self.port = host, port
556 self.dirs = dirs
557 self.ftpwrapper = MockFTPWrapper(self.data)
558 return self.ftpwrapper
559
Georg Brandlf78e02b2008-06-10 17:40:04 +0000560 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000561 data = "rheum rhaponicum"
562 h = NullFTPHandler(data)
563 o = h.parent = MockOpener()
564
565 for url, host, port, type_, dirs, filename, mimetype in [
566 ("ftp://localhost/foo/bar/baz.html",
567 "localhost", ftplib.FTP_PORT, "I",
568 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000569 ("ftp://localhost:80/foo/bar/",
570 "localhost", 80, "D",
571 ["foo", "bar"], "", None),
572 ("ftp://localhost/baz.gif;type=a",
573 "localhost", ftplib.FTP_PORT, "A",
574 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000575 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000576 req = Request(url)
577 req.timeout = None
578 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000579 # ftp authentication not yet implemented by FTPHandler
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000580 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000581 self.assertEqual(h.host, socket.gethostbyname(host))
582 self.assertEqual(h.port, port)
583 self.assertEqual(h.dirs, dirs)
584 self.assertEqual(h.ftpwrapper.filename, filename)
585 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000586 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000587 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000588 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000589
590 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000591 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000592 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 o = h.parent = MockOpener()
594
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000595 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000596 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000597 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000598 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000599 "file://localhost%s" % urlpath,
600 "file://%s" % urlpath,
601 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000602 ]
603 try:
604 localaddr = socket.gethostbyname(socket.gethostname())
605 except socket.gaierror:
606 localaddr = ''
607 if localaddr:
608 urls.append("file://%s%s" % (localaddr, urlpath))
609
610 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000611 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000612 try:
613 try:
614 f.write(towrite)
615 finally:
616 f.close()
617
618 r = h.file_open(Request(url))
619 try:
620 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000621 headers = r.info()
622 newurl = r.geturl()
623 finally:
624 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000625 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000626 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000627 finally:
628 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000629 self.assertEqual(data, towrite)
630 self.assertEqual(headers["Content-type"], "text/plain")
631 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000632 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000633
634 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000635 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636 "file:///file_does_not_exist.txt",
637 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
638 os.getcwd(), TESTFN),
639 "file://somerandomhost.ontheinternet.com%s/%s" %
640 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 ]:
642 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000643 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000644 try:
645 f.write(towrite)
646 finally:
647 f.close()
648
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000649 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000650 h.file_open, Request(url))
651 finally:
652 os.remove(TESTFN)
653
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000655 o = h.parent = MockOpener()
656 # XXXX why does // mean ftp (and /// mean not ftp!), and where
657 # is file: scheme specified? I think this is really a bug, and
658 # what was intended was to distinguish between URLs like:
659 # file:/blah.txt (a file)
660 # file://localhost/blah.txt (a file)
661 # file:///blah.txt (a file)
662 # file://ftp.example.com/blah.txt (an ftp URL)
663 for url, ftp in [
664 ("file://ftp.example.com//foo.txt", True),
665 ("file://ftp.example.com///foo.txt", False),
666# XXXX bug: fails with OSError, should be URLError
667 ("file://ftp.example.com/foo.txt", False),
668 ]:
669 req = Request(url)
670 try:
671 h.file_open(req)
672 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000673 except (urllib.error.URLError, OSError):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000674 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000675 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000676 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000677 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000678
679 def test_http(self):
Guido van Rossum700bd922007-08-27 18:10:06 +0000680 class MockHTTPResponse(io.IOBase):
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000681 def __init__(self, fp, msg, status, reason):
682 self.fp = fp
683 self.msg = msg
684 self.status = status
685 self.reason = reason
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000686 self.code = 200
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000687 def read(self):
688 return ''
Jeremy Hylton6c5e28c2009-03-31 14:35:53 +0000689 def info(self):
690 return {}
691 def geturl(self):
692 return self.url
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000693 class MockHTTPClass:
694 def __init__(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000695 self.level = 0
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000696 self.req_headers = []
697 self.data = None
698 self.raise_on_endheaders = False
Georg Brandlf78e02b2008-06-10 17:40:04 +0000699 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000700 self.host = host
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000701 self.timeout = timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000702 return self
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000703 def set_debuglevel(self, level):
704 self.level = level
705 def request(self, method, url, body=None, headers={}):
706 self.method = method
707 self.selector = url
708 self.req_headers += headers.items()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000709 self.req_headers.sort()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000710 if body:
711 self.data = body
712 if self.raise_on_endheaders:
713 import socket
714 raise socket.error()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000715 def getresponse(self):
716 return MockHTTPResponse(MockFile(), {}, 200, "OK")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000717
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000718 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000719 o = h.parent = MockOpener()
720
721 url = "http://example.com/"
722 for method, data in [("GET", None), ("POST", "blah")]:
723 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000724 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000725 req.add_unredirected_header("Spam", "eggs")
726 http = MockHTTPClass()
727 r = h.do_open(http, req)
728
729 # result attributes
730 r.read; r.readline # wrapped MockFile methods
731 r.info; r.geturl # addinfourl methods
732 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
733 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000734 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000735 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000736
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000737 self.assertEqual(http.host, "example.com")
738 self.assertEqual(http.level, 0)
739 self.assertEqual(http.method, method)
740 self.assertEqual(http.selector, "/")
741 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000742 [("Connection", "close"),
743 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000744 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000745
746 # check socket.error converted to URLError
747 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000748 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000749
750 # check adding of standard headers
751 o.addheaders = [("Spam", "eggs")]
752 for data in "", None: # POST, GET
753 req = Request("http://example.com/", data)
754 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000755 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000756 if data is None: # GET
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000757 self.assertTrue("Content-length" not in req.unredirected_hdrs)
758 self.assertTrue("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000759 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000760 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
761 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000762 "application/x-www-form-urlencoded")
763 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
765 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000766
767 # don't clobber existing headers
768 req.add_unredirected_header("Content-length", "foo")
769 req.add_unredirected_header("Content-type", "bar")
770 req.add_unredirected_header("Host", "baz")
771 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000772 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000773 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
774 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000775 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
776 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000777
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000778 def test_http_doubleslash(self):
779 # Checks the presence of any unnecessary double slash in url does not
780 # break anything. Previously, a double slash directly after the host
781 # could could cause incorrect parsing.
782 h = urllib.request.AbstractHTTPHandler()
783 o = h.parent = MockOpener()
784
785 data = ""
786 ds_urls = [
787 "http://example.com/foo/bar/baz.html",
788 "http://example.com//foo/bar/baz.html",
789 "http://example.com/foo//bar/baz.html",
790 "http://example.com/foo/bar//baz.html"
791 ]
792
793 for ds_url in ds_urls:
794 ds_req = Request(ds_url, data)
795
796 # Check whether host is determined correctly if there is no proxy
797 np_ds_req = h.do_request_(ds_req)
798 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
799
800 # Check whether host is determined correctly if there is a proxy
801 ds_req.set_proxy("someproxy:3128",None)
802 p_ds_req = h.do_request_(ds_req)
803 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
804
805
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000808 o = h.parent = MockOpener()
809
810 url = "http://example.com/"
811 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000813 r = MockResponse(200, "OK", {}, "", url)
814 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000815 self.assertTrue(r is newr)
816 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 r = MockResponse(202, "Accepted", {}, "", url)
818 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000819 self.assertTrue(r is newr)
820 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 r = MockResponse(206, "Partial content", {}, "", url)
822 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000823 self.assertTrue(r is newr)
824 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000825 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000827 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000828 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000830
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000831 def test_cookies(self):
832 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000833 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000834 o = h.parent = MockOpener()
835
836 req = Request("http://example.com/")
837 r = MockResponse(200, "OK", {}, "")
838 newreq = h.http_request(req)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000839 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000840 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000841 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000842 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000843 self.assertTrue(cj.ec_req is req)
844 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000845
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000846 def test_redirect(self):
847 from_url = "http://example.com/a.html"
848 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000849 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000850 o = h.parent = MockOpener()
851
852 # ordinary redirect behaviour
853 for code in 301, 302, 303, 307:
854 for data in None, "blah\nblah\n":
855 method = getattr(h, "http_error_%s" % code)
856 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000857 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000858 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000859 if data is not None:
860 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000861 req.add_unredirected_header("Spam", "spam")
862 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000863 method(req, MockFile(), code, "Blah",
864 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000865 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000866 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000867 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000868 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000869 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000870 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000871 except AttributeError:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000872 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000873
874 # now it's a GET, there should not be headers regarding content
875 # (possibly dragged from before being a POST)
876 headers = [x.lower() for x in o.req.headers]
877 self.assertTrue("content-length" not in headers)
878 self.assertTrue("content-type" not in headers)
879
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000880 self.assertEqual(o.req.headers["Nonsense"],
881 "viking=withhold")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000882 self.assertTrue("Spam" not in o.req.headers)
883 self.assertTrue("Spam" not in o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000884
885 # loop detection
886 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000887 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000888 def redirect(h, req, url=to_url):
889 h.http_error_302(req, MockFile(), 302, "Blah",
890 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000891 # Note that the *original* request shares the same record of
892 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000893
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000894 # detect infinite loop redirect of a URL to itself
895 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000896 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000897 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000898 try:
899 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000900 redirect(h, req, "http://example.com/")
901 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000902 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000903 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000904 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000905
906 # detect endless non-repeating chain of redirects
907 req = Request(from_url, origin_req_host="example.com")
908 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000909 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000910 try:
911 while 1:
912 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000913 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000914 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000915 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000916 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000917
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000918 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000919 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000920 from http.cookiejar import CookieJar
921 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000922
923 cj = CookieJar()
924 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000925 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 hdeh = urllib.request.HTTPDefaultErrorHandler()
927 hrh = urllib.request.HTTPRedirectHandler()
928 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000929 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000930 o.open("http://www.example.com/")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000931 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000932
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000933 def test_proxy(self):
934 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000935 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000936 o.add_handler(ph)
937 meth_spec = [
938 [("http_open", "return response")]
939 ]
940 handlers = add_ordered_mock_handlers(o, meth_spec)
941
942 req = Request("http://acme.example.com/")
943 self.assertEqual(req.get_host(), "acme.example.com")
944 r = o.open(req)
945 self.assertEqual(req.get_host(), "proxy.example.com:3128")
946
947 self.assertEqual([(handlers[0], "http_open")],
948 [tup[0:2] for tup in o.calls])
949
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000950 def test_proxy_no_proxy(self):
951 os.environ['no_proxy'] = 'python.org'
952 o = OpenerDirector()
953 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
954 o.add_handler(ph)
955 req = Request("http://www.perl.org/")
956 self.assertEqual(req.get_host(), "www.perl.org")
957 r = o.open(req)
958 self.assertEqual(req.get_host(), "proxy.example.com")
959 req = Request("http://www.python.org")
960 self.assertEqual(req.get_host(), "www.python.org")
961 r = o.open(req)
962 self.assertEqual(req.get_host(), "www.python.org")
963 del os.environ['no_proxy']
964
965
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000966 def test_proxy_https(self):
967 o = OpenerDirector()
968 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
969 o.add_handler(ph)
970 meth_spec = [
971 [("https_open", "return response")]
972 ]
973 handlers = add_ordered_mock_handlers(o, meth_spec)
974
975 req = Request("https://www.example.com/")
976 self.assertEqual(req.get_host(), "www.example.com")
977 r = o.open(req)
978 self.assertEqual(req.get_host(), "proxy.example.com:3128")
979 self.assertEqual([(handlers[0], "https_open")],
980 [tup[0:2] for tup in o.calls])
981
982
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000983 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000984 opener = OpenerDirector()
985 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000986 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000987 realm = "ACME Widget Store"
988 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000989 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
990 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000991 opener.add_handler(auth_handler)
992 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000993 self._test_basic_auth(opener, auth_handler, "Authorization",
994 realm, http_handler, password_manager,
995 "http://acme.example.com/protected",
996 "http://acme.example.com/protected",
997 )
998
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000999 def test_basic_auth_with_single_quoted_realm(self):
1000 self.test_basic_auth(quote_char="'")
1001
Thomas Wouters477c8d52006-05-27 19:21:47 +00001002 def test_proxy_basic_auth(self):
1003 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001004 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001005 opener.add_handler(ph)
1006 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001007 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001008 realm = "ACME Networks"
1009 http_handler = MockHTTPHandler(
1010 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001011 opener.add_handler(auth_handler)
1012 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001013 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001014 realm, http_handler, password_manager,
1015 "http://acme.example.com:3128/protected",
1016 "proxy.example.com:3128",
1017 )
1018
1019 def test_basic_and_digest_auth_handlers(self):
1020 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1021 # response (http://python.org/sf/1479302), where it should instead
1022 # return None to allow another handler (especially
1023 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001024
1025 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1026 # try digest first (since it's the strongest auth scheme), so we record
1027 # order of calls here to check digest comes first:
1028 class RecordingOpenerDirector(OpenerDirector):
1029 def __init__(self):
1030 OpenerDirector.__init__(self)
1031 self.recorded = []
1032 def record(self, info):
1033 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001034 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001035 def http_error_401(self, *args, **kwds):
1036 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001037 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001038 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001039 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001040 def http_error_401(self, *args, **kwds):
1041 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001042 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001043 *args, **kwds)
1044
1045 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001046 password_manager = MockPasswordManager()
1047 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001048 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049 realm = "ACME Networks"
1050 http_handler = MockHTTPHandler(
1051 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001052 opener.add_handler(basic_handler)
1053 opener.add_handler(digest_handler)
1054 opener.add_handler(http_handler)
1055
1056 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 self._test_basic_auth(opener, basic_handler, "Authorization",
1058 realm, http_handler, password_manager,
1059 "http://acme.example.com/protected",
1060 "http://acme.example.com/protected",
1061 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001062 # check digest was tried before basic (twice, because
1063 # _test_basic_auth called .open() twice)
1064 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001065
1066 def _test_basic_auth(self, opener, auth_handler, auth_header,
1067 realm, http_handler, password_manager,
1068 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001069 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001070 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001071
1072 # .add_password() fed through to password manager
1073 auth_handler.add_password(realm, request_url, user, password)
1074 self.assertEqual(realm, password_manager.realm)
1075 self.assertEqual(request_url, password_manager.url)
1076 self.assertEqual(user, password_manager.user)
1077 self.assertEqual(password, password_manager.password)
1078
1079 r = opener.open(request_url)
1080
1081 # should have asked the password manager for the username/password
1082 self.assertEqual(password_manager.target_realm, realm)
1083 self.assertEqual(password_manager.target_url, protected_url)
1084
1085 # expect one request without authorization, then one with
1086 self.assertEqual(len(http_handler.requests), 2)
1087 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001088 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001089 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001090 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001091 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1092 auth_hdr_value)
1093
1094 # if the password manager can't find a password, the handler won't
1095 # handle the HTTP auth error
1096 password_manager.user = password_manager.password = None
1097 http_handler.reset()
1098 r = opener.open(request_url)
1099 self.assertEqual(len(http_handler.requests), 1)
1100 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1101
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001102
1103class MiscTests(unittest.TestCase):
1104
1105 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001106 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1107 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001108 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001109 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001110 def bar_open(self): pass
1111
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001112 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001113
1114 o = build_opener(FooHandler, BarHandler)
1115 self.opener_has_handler(o, FooHandler)
1116 self.opener_has_handler(o, BarHandler)
1117
1118 # can take a mix of classes and instances
1119 o = build_opener(FooHandler, BarHandler())
1120 self.opener_has_handler(o, FooHandler)
1121 self.opener_has_handler(o, BarHandler)
1122
1123 # subclasses of default handlers override default handlers
1124 o = build_opener(MyHTTPHandler)
1125 self.opener_has_handler(o, MyHTTPHandler)
1126
1127 # a particular case of overriding: default handlers can be passed
1128 # in explicitly
1129 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001130 self.opener_has_handler(o, urllib.request.HTTPHandler)
1131 o = build_opener(urllib.request.HTTPHandler)
1132 self.opener_has_handler(o, urllib.request.HTTPHandler)
1133 o = build_opener(urllib.request.HTTPHandler())
1134 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001135
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001136 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001137 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001138 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1139 self.opener_has_handler(o, MyHTTPHandler)
1140 self.opener_has_handler(o, MyOtherHTTPHandler)
1141
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001142 def opener_has_handler(self, opener, handler_class):
1143 for h in opener.handlers:
1144 if h.__class__ == handler_class:
1145 break
1146 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001147 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001148
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001149class RequestTests(unittest.TestCase):
1150
1151 def setUp(self):
1152 self.get = Request("http://www.python.org/~jeremy/")
1153 self.post = Request("http://www.python.org/~jeremy/",
1154 "data",
1155 headers={"X-Test": "test"})
1156
1157 def test_method(self):
1158 self.assertEqual("POST", self.post.get_method())
1159 self.assertEqual("GET", self.get.get_method())
1160
1161 def test_add_data(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001162 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001163 self.assertEqual("GET", self.get.get_method())
1164 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001165 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001166 self.assertEqual("POST", self.get.get_method())
1167
1168 def test_get_full_url(self):
1169 self.assertEqual("http://www.python.org/~jeremy/",
1170 self.get.get_full_url())
1171
1172 def test_selector(self):
1173 self.assertEqual("/~jeremy/", self.get.get_selector())
1174 req = Request("http://www.python.org/")
1175 self.assertEqual("/", req.get_selector())
1176
1177 def test_get_type(self):
1178 self.assertEqual("http", self.get.get_type())
1179
1180 def test_get_host(self):
1181 self.assertEqual("www.python.org", self.get.get_host())
1182
1183 def test_get_host_unquote(self):
1184 req = Request("http://www.%70ython.org/")
1185 self.assertEqual("www.python.org", req.get_host())
1186
1187 def test_proxy(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001188 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001189 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001190 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001191 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1192 self.assertEqual("www.perl.org", self.get.get_host())
1193
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001194
1195def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001196 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001197 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001198 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001199 tests = (TrivialTests,
1200 OpenerDirectorTests,
1201 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001202 MiscTests,
1203 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001204 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001205
1206if __name__ == "__main__":
1207 test_main(verbose=True)