blob: 1a8ffac1b03df4b5f17f170cc145f58455595fa6 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000024
Senthil Kumarand587e302010-01-10 17:45:52 +000025 if os.name == 'nt':
26 file_url = "file:///%s" % fname
27 else:
28 file_url = "file://%s" % fname
29
Jeremy Hylton1afc1692008-06-18 20:49:58 +000030 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000031
32 buf = f.read()
33 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000034
Georg Brandle1b13d22005-08-24 22:20:32 +000035 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036 tests = [
37 ('a,b,c', ['a', 'b', 'c']),
38 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
39 ('a, b, "c", "d", "e,f", g, h',
40 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
41 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
42 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000043 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000045
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000046
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000047def test_request_headers_dict():
48 """
49 The Request.headers dictionary is not a documented interface. It should
50 stay that way, because the complete set of headers are only accessible
51 through the .get_header(), .has_header(), .header_items() interface.
52 However, .headers pre-dates those methods, and so real code will be using
53 the dictionary.
54
55 The introduction in 2.4 of those methods was a mistake for the same reason:
56 code that previously saw all (urllib2 user)-provided headers in .headers
57 now sees only a subset (and the function interface is ugly and incomplete).
58 A better change would have been to replace .headers dict with a dict
59 subclass (or UserDict.DictMixin instance?) that preserved the .headers
60 interface and also provided access to the "unredirected" headers. It's
61 probably too late to fix that, though.
62
63
64 Check .capitalize() case normalization:
65
66 >>> url = "http://example.com"
67 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
68 'blah'
69 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
70 'blah'
71
72 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
73 but that could be changed in future.
74
75 """
76
77def test_request_headers_methods():
78 """
79 Note the case normalization of header names here, to .capitalize()-case.
80 This should be preserved for backwards-compatibility. (In the HTTP case,
81 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000082 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000083
84 >>> url = "http://example.com"
85 >>> r = Request(url, headers={"Spam-eggs": "blah"})
86 >>> r.has_header("Spam-eggs")
87 True
88 >>> r.header_items()
89 [('Spam-eggs', 'blah')]
90 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000091 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000092 >>> items
93 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
94
95 Note that e.g. r.has_header("spam-EggS") is currently False, and
96 r.get_header("spam-EggS") returns None, but that could be changed in
97 future.
98
99 >>> r.has_header("Not-there")
100 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000101 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000102 None
103 >>> r.get_header("Not-there", "default")
104 'default'
105
106 """
107
108
Thomas Wouters477c8d52006-05-27 19:21:47 +0000109def test_password_manager(self):
110 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000111 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000112 >>> add = mgr.add_password
113 >>> add("Some Realm", "http://example.com/", "joe", "password")
114 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
115 >>> add("c", "http://example.com/foo", "foo", "ni")
116 >>> add("c", "http://example.com/bar", "bar", "nini")
117 >>> add("b", "http://example.com/", "first", "blah")
118 >>> add("b", "http://example.com/", "second", "spam")
119 >>> add("a", "http://example.com", "1", "a")
120 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
121 >>> add("Some Realm", "d.example.com", "4", "d")
122 >>> add("Some Realm", "e.example.com:3128", "5", "e")
123
124 >>> mgr.find_user_password("Some Realm", "example.com")
125 ('joe', 'password')
126 >>> mgr.find_user_password("Some Realm", "http://example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com/")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("c", "http://example.com/foo")
135 ('foo', 'ni')
136 >>> mgr.find_user_password("c", "http://example.com/bar")
137 ('bar', 'nini')
138
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Actually, this is really undefined ATM
140## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000141
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000142## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
143## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000144
145 Use latest add_password() in case of conflict:
146
147 >>> mgr.find_user_password("b", "http://example.com/")
148 ('second', 'spam')
149
150 No special relationship between a.example.com and example.com:
151
152 >>> mgr.find_user_password("a", "http://example.com/")
153 ('1', 'a')
154 >>> mgr.find_user_password("a", "http://a.example.com/")
155 (None, None)
156
157 Ports:
158
159 >>> mgr.find_user_password("Some Realm", "c.example.com")
160 (None, None)
161 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
162 ('3', 'c')
163 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "d.example.com")
166 ('4', 'd')
167 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
168 ('5', 'e')
169
170 """
171 pass
172
173
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000174def test_password_manager_default_port(self):
175 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000176 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000177 >>> add = mgr.add_password
178
179 The point to note here is that we can't guess the default port if there's
180 no scheme. This applies to both add_password and find_user_password.
181
182 >>> add("f", "http://g.example.com:80", "10", "j")
183 >>> add("g", "http://h.example.com", "11", "k")
184 >>> add("h", "i.example.com:80", "12", "l")
185 >>> add("i", "j.example.com", "13", "m")
186 >>> mgr.find_user_password("f", "g.example.com:100")
187 (None, None)
188 >>> mgr.find_user_password("f", "g.example.com:80")
189 ('10', 'j')
190 >>> mgr.find_user_password("f", "g.example.com")
191 (None, None)
192 >>> mgr.find_user_password("f", "http://g.example.com:100")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:80")
195 ('10', 'j')
196 >>> mgr.find_user_password("f", "http://g.example.com")
197 ('10', 'j')
198 >>> mgr.find_user_password("g", "h.example.com")
199 ('11', 'k')
200 >>> mgr.find_user_password("g", "h.example.com:80")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "http://h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("h", "i.example.com")
205 (None, None)
206 >>> mgr.find_user_password("h", "i.example.com:80")
207 ('12', 'l')
208 >>> mgr.find_user_password("h", "http://i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("i", "j.example.com")
211 ('13', 'm')
212 >>> mgr.find_user_password("i", "j.example.com:80")
213 (None, None)
214 >>> mgr.find_user_password("i", "http://j.example.com")
215 ('13', 'm')
216 >>> mgr.find_user_password("i", "http://j.example.com:80")
217 (None, None)
218
219 """
220
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000221class MockOpener:
222 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000223 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
224 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000225 def error(self, proto, *args):
226 self.proto, self.args = proto, args
227
228class MockFile:
229 def read(self, count=None): pass
230 def readline(self, count=None): pass
231 def close(self): pass
232
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000233class MockHeaders(dict):
234 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000235 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000236
Guido van Rossum34d19282007-08-09 01:03:29 +0000237class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000238 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000239 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 self.code, self.msg, self.headers, self.url = code, msg, headers, url
241 def info(self):
242 return self.headers
243 def geturl(self):
244 return self.url
245
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000246class MockCookieJar:
247 def add_cookie_header(self, request):
248 self.ach_req = request
249 def extract_cookies(self, response, request):
250 self.ec_req, self.ec_r = request, response
251
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000252class FakeMethod:
253 def __init__(self, meth_name, action, handle):
254 self.meth_name = meth_name
255 self.handle = handle
256 self.action = action
257 def __call__(self, *args):
258 return self.handle(self.meth_name, self.action, *args)
259
Senthil Kumaran47fff872009-12-20 07:10:31 +0000260class MockHTTPResponse(io.IOBase):
261 def __init__(self, fp, msg, status, reason):
262 self.fp = fp
263 self.msg = msg
264 self.status = status
265 self.reason = reason
266 self.code = 200
267
268 def read(self):
269 return ''
270
271 def info(self):
272 return {}
273
274 def geturl(self):
275 return self.url
276
277
278class MockHTTPClass:
279 def __init__(self):
280 self.level = 0
281 self.req_headers = []
282 self.data = None
283 self.raise_on_endheaders = False
284 self._tunnel_headers = {}
285
286 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
287 self.host = host
288 self.timeout = timeout
289 return self
290
291 def set_debuglevel(self, level):
292 self.level = level
293
294 def set_tunnel(self, host, port=None, headers=None):
295 self._tunnel_host = host
296 self._tunnel_port = port
297 if headers:
298 self._tunnel_headers = headers
299 else:
300 self._tunnel_headers.clear()
301
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000302 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000303 self.method = method
304 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000305 if headers is not None:
306 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000307 self.req_headers.sort()
308 if body:
309 self.data = body
310 if self.raise_on_endheaders:
311 import socket
312 raise socket.error()
313 def getresponse(self):
314 return MockHTTPResponse(MockFile(), {}, 200, "OK")
315
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000316class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000317 # useful for testing handler machinery
318 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000319 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000320 def __init__(self, methods):
321 self._define_methods(methods)
322 def _define_methods(self, methods):
323 for spec in methods:
324 if len(spec) == 2: name, action = spec
325 else: name, action = spec, None
326 meth = FakeMethod(name, action, self.handle)
327 setattr(self.__class__, name, meth)
328 def handle(self, fn_name, action, *args, **kwds):
329 self.parent.calls.append((self, fn_name, args, kwds))
330 if action is None:
331 return None
332 elif action == "return self":
333 return self
334 elif action == "return response":
335 res = MockResponse(200, "OK", {}, "")
336 return res
337 elif action == "return request":
338 return Request("http://blah/")
339 elif action.startswith("error"):
340 code = action[action.rfind(" ")+1:]
341 try:
342 code = int(code)
343 except ValueError:
344 pass
345 res = MockResponse(200, "OK", {}, "")
346 return self.parent.error("http", args[0], res, code, "", {})
347 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000348 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000349 assert False
350 def close(self): pass
351 def add_parent(self, parent):
352 self.parent = parent
353 self.parent.calls = []
354 def __lt__(self, other):
355 if not hasattr(other, "handler_order"):
356 # No handler_order, leave in original order. Yuck.
357 return True
358 return self.handler_order < other.handler_order
359
360def add_ordered_mock_handlers(opener, meth_spec):
361 """Create MockHandlers and add them to an OpenerDirector.
362
363 meth_spec: list of lists of tuples and strings defining methods to define
364 on handlers. eg:
365
366 [["http_error", "ftp_open"], ["http_open"]]
367
368 defines methods .http_error() and .ftp_open() on one handler, and
369 .http_open() on another. These methods just record their arguments and
370 return None. Using a tuple instead of a string causes the method to
371 perform some action (see MockHandler.handle()), eg:
372
373 [["http_error"], [("http_open", "return request")]]
374
375 defines .http_error() on one handler (which simply returns None), and
376 .http_open() on another handler, which returns a Request object.
377
378 """
379 handlers = []
380 count = 0
381 for meths in meth_spec:
382 class MockHandlerSubclass(MockHandler): pass
383 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000384 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000385 h.add_parent(opener)
386 count = count + 1
387 handlers.append(h)
388 opener.add_handler(h)
389 return handlers
390
Thomas Wouters477c8d52006-05-27 19:21:47 +0000391def build_test_opener(*handler_instances):
392 opener = OpenerDirector()
393 for h in handler_instances:
394 opener.add_handler(h)
395 return opener
396
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000397class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000398 # useful for testing redirections and auth
399 # sends supplied headers and code as first response
400 # sends 200 OK as second response
401 def __init__(self, code, headers):
402 self.code = code
403 self.headers = headers
404 self.reset()
405 def reset(self):
406 self._count = 0
407 self.requests = []
408 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000409 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000410 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000411 self.requests.append(copy.deepcopy(req))
412 if self._count == 0:
413 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000414 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000415 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416 return self.parent.error(
417 "http", req, MockFile(), self.code, name, msg)
418 else:
419 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000420 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000421 return MockResponse(200, "OK", msg, "", req.get_full_url())
422
Senthil Kumaran47fff872009-12-20 07:10:31 +0000423class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
424 # Useful for testing the Proxy-Authorization request by verifying the
425 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000426
427 def __init__(self):
428 urllib.request.AbstractHTTPHandler.__init__(self)
429 self.httpconn = MockHTTPClass()
430
Senthil Kumaran47fff872009-12-20 07:10:31 +0000431 def https_open(self, req):
432 return self.do_open(self.httpconn, req)
433
Thomas Wouters477c8d52006-05-27 19:21:47 +0000434class MockPasswordManager:
435 def add_password(self, realm, uri, user, password):
436 self.realm = realm
437 self.url = uri
438 self.user = user
439 self.password = password
440 def find_user_password(self, realm, authuri):
441 self.target_realm = realm
442 self.target_url = authuri
443 return self.user, self.password
444
445
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000446class OpenerDirectorTests(unittest.TestCase):
447
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000448 def test_add_non_handler(self):
449 class NonHandler(object):
450 pass
451 self.assertRaises(TypeError,
452 OpenerDirector().add_handler, NonHandler())
453
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000454 def test_badly_named_methods(self):
455 # test work-around for three methods that accidentally follow the
456 # naming conventions for handler methods
457 # (*_open() / *_request() / *_response())
458
459 # These used to call the accidentally-named methods, causing a
460 # TypeError in real code; here, returning self from these mock
461 # methods would either cause no exception, or AttributeError.
462
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000463 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000464
465 o = OpenerDirector()
466 meth_spec = [
467 [("do_open", "return self"), ("proxy_open", "return self")],
468 [("redirect_request", "return self")],
469 ]
470 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000471 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000472 for scheme in "do", "proxy", "redirect":
473 self.assertRaises(URLError, o.open, scheme+"://example.com/")
474
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000475 def test_handled(self):
476 # handler returning non-None means no more handlers will be called
477 o = OpenerDirector()
478 meth_spec = [
479 ["http_open", "ftp_open", "http_error_302"],
480 ["ftp_open"],
481 [("http_open", "return self")],
482 [("http_open", "return self")],
483 ]
484 handlers = add_ordered_mock_handlers(o, meth_spec)
485
486 req = Request("http://example.com/")
487 r = o.open(req)
488 # Second .http_open() gets called, third doesn't, since second returned
489 # non-None. Handlers without .http_open() never get any methods called
490 # on them.
491 # In fact, second mock handler defining .http_open() returns self
492 # (instead of response), which becomes the OpenerDirector's return
493 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000494 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000495 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
496 for expected, got in zip(calls, o.calls):
497 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000498 self.assertEqual((handler, name), expected)
499 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000500
501 def test_handler_order(self):
502 o = OpenerDirector()
503 handlers = []
504 for meths, handler_order in [
505 ([("http_open", "return self")], 500),
506 (["http_open"], 0),
507 ]:
508 class MockHandlerSubclass(MockHandler): pass
509 h = MockHandlerSubclass(meths)
510 h.handler_order = handler_order
511 handlers.append(h)
512 o.add_handler(h)
513
514 r = o.open("http://example.com/")
515 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000516 self.assertEqual(o.calls[0][0], handlers[1])
517 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000518
519 def test_raise(self):
520 # raising URLError stops processing of request
521 o = OpenerDirector()
522 meth_spec = [
523 [("http_open", "raise")],
524 [("http_open", "return self")],
525 ]
526 handlers = add_ordered_mock_handlers(o, meth_spec)
527
528 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000529 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000530 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000531
532## def test_error(self):
533## # XXX this doesn't actually seem to be used in standard library,
534## # but should really be tested anyway...
535
536 def test_http_error(self):
537 # XXX http_error_default
538 # http errors are a special case
539 o = OpenerDirector()
540 meth_spec = [
541 [("http_open", "error 302")],
542 [("http_error_400", "raise"), "http_open"],
543 [("http_error_302", "return response"), "http_error_303",
544 "http_error"],
545 [("http_error_302")],
546 ]
547 handlers = add_ordered_mock_handlers(o, meth_spec)
548
549 class Unknown:
550 def __eq__(self, other): return True
551
552 req = Request("http://example.com/")
553 r = o.open(req)
554 assert len(o.calls) == 2
555 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000556 (handlers[2], "http_error_302",
557 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000558 for expected, got in zip(calls, o.calls):
559 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000560 self.assertEqual((handler, method_name), got[:2])
561 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000562
563 def test_processors(self):
564 # *_request / *_response methods get called appropriately
565 o = OpenerDirector()
566 meth_spec = [
567 [("http_request", "return request"),
568 ("http_response", "return response")],
569 [("http_request", "return request"),
570 ("http_response", "return response")],
571 ]
572 handlers = add_ordered_mock_handlers(o, meth_spec)
573
574 req = Request("http://example.com/")
575 r = o.open(req)
576 # processor methods are called on *all* handlers that define them,
577 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000578 calls = [
579 (handlers[0], "http_request"), (handlers[1], "http_request"),
580 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000581
582 for i, (handler, name, args, kwds) in enumerate(o.calls):
583 if i < 2:
584 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000585 self.assertEqual((handler, name), calls[i])
586 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000587 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000588 else:
589 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000590 self.assertEqual((handler, name), calls[i])
591 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000592 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 # response from opener.open is None, because there's no
594 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000595 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000596 isinstance(args[1], MockResponse))
597
598
Tim Peters58eb11c2004-01-18 20:29:55 +0000599def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000601 if os.name == "nt" and urlpath.startswith("///"):
602 urlpath = urlpath[2:]
603 # XXX don't ask me about the mac...
604 return urlpath
605
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000606class HandlerTests(unittest.TestCase):
607
608 def test_ftp(self):
609 class MockFTPWrapper:
610 def __init__(self, data): self.data = data
611 def retrfile(self, filename, filetype):
612 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000613 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000614
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000615 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000617 def connect_ftp(self, user, passwd, host, port, dirs,
618 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000619 self.user, self.passwd = user, passwd
620 self.host, self.port = host, port
621 self.dirs = dirs
622 self.ftpwrapper = MockFTPWrapper(self.data)
623 return self.ftpwrapper
624
Georg Brandlf78e02b2008-06-10 17:40:04 +0000625 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000626 data = "rheum rhaponicum"
627 h = NullFTPHandler(data)
628 o = h.parent = MockOpener()
629
630 for url, host, port, type_, dirs, filename, mimetype in [
631 ("ftp://localhost/foo/bar/baz.html",
632 "localhost", ftplib.FTP_PORT, "I",
633 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000634 ("ftp://localhost:80/foo/bar/",
635 "localhost", 80, "D",
636 ["foo", "bar"], "", None),
637 ("ftp://localhost/baz.gif;type=a",
638 "localhost", ftplib.FTP_PORT, "A",
639 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000640 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000641 req = Request(url)
642 req.timeout = None
643 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000644 # ftp authentication not yet implemented by FTPHandler
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000645 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000646 self.assertEqual(h.host, socket.gethostbyname(host))
647 self.assertEqual(h.port, port)
648 self.assertEqual(h.dirs, dirs)
649 self.assertEqual(h.ftpwrapper.filename, filename)
650 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000652 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000653 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000654
655 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000656 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000657 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000658 o = h.parent = MockOpener()
659
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000660 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000662 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000663 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000664 "file://localhost%s" % urlpath,
665 "file://%s" % urlpath,
666 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000667 ]
668 try:
669 localaddr = socket.gethostbyname(socket.gethostname())
670 except socket.gaierror:
671 localaddr = ''
672 if localaddr:
673 urls.append("file://%s%s" % (localaddr, urlpath))
674
675 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000676 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000677 try:
678 try:
679 f.write(towrite)
680 finally:
681 f.close()
682
683 r = h.file_open(Request(url))
684 try:
685 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000686 headers = r.info()
687 newurl = r.geturl()
688 finally:
689 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000690 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000691 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000692 finally:
693 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000694 self.assertEqual(data, towrite)
695 self.assertEqual(headers["Content-type"], "text/plain")
696 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000697 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000698
699 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000700 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000701 "file:///file_does_not_exist.txt",
702 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
703 os.getcwd(), TESTFN),
704 "file://somerandomhost.ontheinternet.com%s/%s" %
705 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000706 ]:
707 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000708 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000709 try:
710 f.write(towrite)
711 finally:
712 f.close()
713
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000714 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000715 h.file_open, Request(url))
716 finally:
717 os.remove(TESTFN)
718
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000719 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000720 o = h.parent = MockOpener()
721 # XXXX why does // mean ftp (and /// mean not ftp!), and where
722 # is file: scheme specified? I think this is really a bug, and
723 # what was intended was to distinguish between URLs like:
724 # file:/blah.txt (a file)
725 # file://localhost/blah.txt (a file)
726 # file:///blah.txt (a file)
727 # file://ftp.example.com/blah.txt (an ftp URL)
728 for url, ftp in [
729 ("file://ftp.example.com//foo.txt", True),
730 ("file://ftp.example.com///foo.txt", False),
731# XXXX bug: fails with OSError, should be URLError
732 ("file://ftp.example.com/foo.txt", False),
733 ]:
734 req = Request(url)
735 try:
736 h.file_open(req)
737 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000738 except (urllib.error.URLError, OSError):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000739 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000740 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000741 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000742 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000743
744 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000745
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000746 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000747 o = h.parent = MockOpener()
748
749 url = "http://example.com/"
750 for method, data in [("GET", None), ("POST", "blah")]:
751 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000752 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000753 req.add_unredirected_header("Spam", "eggs")
754 http = MockHTTPClass()
755 r = h.do_open(http, req)
756
757 # result attributes
758 r.read; r.readline # wrapped MockFile methods
759 r.info; r.geturl # addinfourl methods
760 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
761 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000762 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000763 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000764
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000765 self.assertEqual(http.host, "example.com")
766 self.assertEqual(http.level, 0)
767 self.assertEqual(http.method, method)
768 self.assertEqual(http.selector, "/")
769 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000770 [("Connection", "close"),
771 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000772 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000773
774 # check socket.error converted to URLError
775 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000776 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000777
778 # check adding of standard headers
779 o.addheaders = [("Spam", "eggs")]
780 for data in "", None: # POST, GET
781 req = Request("http://example.com/", data)
782 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000783 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000784 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000785 self.assertNotIn("Content-length", req.unredirected_hdrs)
786 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000787 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000788 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
789 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000790 "application/x-www-form-urlencoded")
791 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000792 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
793 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000794
795 # don't clobber existing headers
796 req.add_unredirected_header("Content-length", "foo")
797 req.add_unredirected_header("Content-type", "bar")
798 req.add_unredirected_header("Host", "baz")
799 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000800 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000801 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
802 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000803 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
804 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000805
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000806 def test_http_doubleslash(self):
807 # Checks the presence of any unnecessary double slash in url does not
808 # break anything. Previously, a double slash directly after the host
809 # could could cause incorrect parsing.
810 h = urllib.request.AbstractHTTPHandler()
811 o = h.parent = MockOpener()
812
813 data = ""
814 ds_urls = [
815 "http://example.com/foo/bar/baz.html",
816 "http://example.com//foo/bar/baz.html",
817 "http://example.com/foo//bar/baz.html",
818 "http://example.com/foo/bar//baz.html"
819 ]
820
821 for ds_url in ds_urls:
822 ds_req = Request(ds_url, data)
823
824 # Check whether host is determined correctly if there is no proxy
825 np_ds_req = h.do_request_(ds_req)
826 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
827
828 # Check whether host is determined correctly if there is a proxy
829 ds_req.set_proxy("someproxy:3128",None)
830 p_ds_req = h.do_request_(ds_req)
831 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
832
833
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000834 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000835 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000836 o = h.parent = MockOpener()
837
838 url = "http://example.com/"
839 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000841 r = MockResponse(200, "OK", {}, "", url)
842 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000843 self.assertTrue(r is newr)
844 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 r = MockResponse(202, "Accepted", {}, "", url)
846 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000847 self.assertTrue(r is newr)
848 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000849 r = MockResponse(206, "Partial content", {}, "", url)
850 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000851 self.assertTrue(r is newr)
852 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000853 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854 r = MockResponse(502, "Bad gateway", {}, "", url)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000855 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000856 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000858
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000859 def test_cookies(self):
860 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000861 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000862 o = h.parent = MockOpener()
863
864 req = Request("http://example.com/")
865 r = MockResponse(200, "OK", {}, "")
866 newreq = h.http_request(req)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000867 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000868 self.assertEquals(req.get_origin_req_host(), "example.com")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000869 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000870 newr = h.http_response(req, r)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000871 self.assertTrue(cj.ec_req is req)
872 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000873
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000874 def test_redirect(self):
875 from_url = "http://example.com/a.html"
876 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000877 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000878 o = h.parent = MockOpener()
879
880 # ordinary redirect behaviour
881 for code in 301, 302, 303, 307:
882 for data in None, "blah\nblah\n":
883 method = getattr(h, "http_error_%s" % code)
884 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000885 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000886 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000887 if data is not None:
888 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000889 req.add_unredirected_header("Spam", "spam")
890 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000891 method(req, MockFile(), code, "Blah",
892 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000893 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000894 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000895 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000896 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000897 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000898 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000899 except AttributeError:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000900 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000901
902 # now it's a GET, there should not be headers regarding content
903 # (possibly dragged from before being a POST)
904 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +0000905 self.assertNotIn("content-length", headers)
906 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +0000907
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000908 self.assertEqual(o.req.headers["Nonsense"],
909 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +0000910 self.assertNotIn("Spam", o.req.headers)
911 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000912
913 # loop detection
914 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000915 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000916 def redirect(h, req, url=to_url):
917 h.http_error_302(req, MockFile(), 302, "Blah",
918 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000919 # Note that the *original* request shares the same record of
920 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000921
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000922 # detect infinite loop redirect of a URL to itself
923 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000924 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000925 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000926 try:
927 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000928 redirect(h, req, "http://example.com/")
929 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000930 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000931 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000932 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000933
934 # detect endless non-repeating chain of redirects
935 req = Request(from_url, origin_req_host="example.com")
936 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000937 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000938 try:
939 while 1:
940 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000941 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000942 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000943 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000944 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000945
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000946 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000948 from http.cookiejar import CookieJar
949 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000950
951 cj = CookieJar()
952 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000953 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000954 hdeh = urllib.request.HTTPDefaultErrorHandler()
955 hrh = urllib.request.HTTPRedirectHandler()
956 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000957 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000958 o.open("http://www.example.com/")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000959 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000960
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000961 def test_proxy(self):
962 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000963 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000964 o.add_handler(ph)
965 meth_spec = [
966 [("http_open", "return response")]
967 ]
968 handlers = add_ordered_mock_handlers(o, meth_spec)
969
970 req = Request("http://acme.example.com/")
971 self.assertEqual(req.get_host(), "acme.example.com")
972 r = o.open(req)
973 self.assertEqual(req.get_host(), "proxy.example.com:3128")
974
975 self.assertEqual([(handlers[0], "http_open")],
976 [tup[0:2] for tup in o.calls])
977
Senthil Kumaran7bb04972009-10-11 04:58:55 +0000978 def test_proxy_no_proxy(self):
979 os.environ['no_proxy'] = 'python.org'
980 o = OpenerDirector()
981 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
982 o.add_handler(ph)
983 req = Request("http://www.perl.org/")
984 self.assertEqual(req.get_host(), "www.perl.org")
985 r = o.open(req)
986 self.assertEqual(req.get_host(), "proxy.example.com")
987 req = Request("http://www.python.org")
988 self.assertEqual(req.get_host(), "www.python.org")
989 r = o.open(req)
990 self.assertEqual(req.get_host(), "www.python.org")
991 del os.environ['no_proxy']
992
993
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +0000994 def test_proxy_https(self):
995 o = OpenerDirector()
996 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
997 o.add_handler(ph)
998 meth_spec = [
999 [("https_open", "return response")]
1000 ]
1001 handlers = add_ordered_mock_handlers(o, meth_spec)
1002
1003 req = Request("https://www.example.com/")
1004 self.assertEqual(req.get_host(), "www.example.com")
1005 r = o.open(req)
1006 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1007 self.assertEqual([(handlers[0], "https_open")],
1008 [tup[0:2] for tup in o.calls])
1009
Senthil Kumaran47fff872009-12-20 07:10:31 +00001010 def test_proxy_https_proxy_authorization(self):
1011 o = OpenerDirector()
1012 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1013 o.add_handler(ph)
1014 https_handler = MockHTTPSHandler()
1015 o.add_handler(https_handler)
1016 req = Request("https://www.example.com/")
1017 req.add_header("Proxy-Authorization","FooBar")
1018 req.add_header("User-Agent","Grail")
1019 self.assertEqual(req.get_host(), "www.example.com")
1020 self.assertIsNone(req._tunnel_host)
1021 r = o.open(req)
1022 # Verify Proxy-Authorization gets tunneled to request.
1023 # httpsconn req_headers do not have the Proxy-Authorization header but
1024 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001025 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001026 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001027 self.assertIn(("User-Agent","Grail"),
1028 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001029 self.assertIsNotNone(req._tunnel_host)
1030 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1031 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001032
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001033 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001034 opener = OpenerDirector()
1035 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001036 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001037 realm = "ACME Widget Store"
1038 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001039 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1040 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001041 opener.add_handler(auth_handler)
1042 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043 self._test_basic_auth(opener, auth_handler, "Authorization",
1044 realm, http_handler, password_manager,
1045 "http://acme.example.com/protected",
1046 "http://acme.example.com/protected",
1047 )
1048
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001049 def test_basic_auth_with_single_quoted_realm(self):
1050 self.test_basic_auth(quote_char="'")
1051
Thomas Wouters477c8d52006-05-27 19:21:47 +00001052 def test_proxy_basic_auth(self):
1053 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001054 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055 opener.add_handler(ph)
1056 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001057 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001058 realm = "ACME Networks"
1059 http_handler = MockHTTPHandler(
1060 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001061 opener.add_handler(auth_handler)
1062 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001063 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001064 realm, http_handler, password_manager,
1065 "http://acme.example.com:3128/protected",
1066 "proxy.example.com:3128",
1067 )
1068
1069 def test_basic_and_digest_auth_handlers(self):
1070 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1071 # response (http://python.org/sf/1479302), where it should instead
1072 # return None to allow another handler (especially
1073 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001074
1075 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1076 # try digest first (since it's the strongest auth scheme), so we record
1077 # order of calls here to check digest comes first:
1078 class RecordingOpenerDirector(OpenerDirector):
1079 def __init__(self):
1080 OpenerDirector.__init__(self)
1081 self.recorded = []
1082 def record(self, info):
1083 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001084 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001085 def http_error_401(self, *args, **kwds):
1086 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001087 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001088 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001089 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001090 def http_error_401(self, *args, **kwds):
1091 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001092 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001093 *args, **kwds)
1094
1095 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001096 password_manager = MockPasswordManager()
1097 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001098 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 realm = "ACME Networks"
1100 http_handler = MockHTTPHandler(
1101 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001102 opener.add_handler(basic_handler)
1103 opener.add_handler(digest_handler)
1104 opener.add_handler(http_handler)
1105
1106 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001107 self._test_basic_auth(opener, basic_handler, "Authorization",
1108 realm, http_handler, password_manager,
1109 "http://acme.example.com/protected",
1110 "http://acme.example.com/protected",
1111 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001112 # check digest was tried before basic (twice, because
1113 # _test_basic_auth called .open() twice)
1114 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001115
1116 def _test_basic_auth(self, opener, auth_handler, auth_header,
1117 realm, http_handler, password_manager,
1118 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001119 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001120 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001121
1122 # .add_password() fed through to password manager
1123 auth_handler.add_password(realm, request_url, user, password)
1124 self.assertEqual(realm, password_manager.realm)
1125 self.assertEqual(request_url, password_manager.url)
1126 self.assertEqual(user, password_manager.user)
1127 self.assertEqual(password, password_manager.password)
1128
1129 r = opener.open(request_url)
1130
1131 # should have asked the password manager for the username/password
1132 self.assertEqual(password_manager.target_realm, realm)
1133 self.assertEqual(password_manager.target_url, protected_url)
1134
1135 # expect one request without authorization, then one with
1136 self.assertEqual(len(http_handler.requests), 2)
1137 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001138 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001139 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001140 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001141 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1142 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001143 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1144 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001145 # if the password manager can't find a password, the handler won't
1146 # handle the HTTP auth error
1147 password_manager.user = password_manager.password = None
1148 http_handler.reset()
1149 r = opener.open(request_url)
1150 self.assertEqual(len(http_handler.requests), 1)
1151 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1152
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001153
1154class MiscTests(unittest.TestCase):
1155
1156 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001157 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1158 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001159 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001161 def bar_open(self): pass
1162
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001163 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001164
1165 o = build_opener(FooHandler, BarHandler)
1166 self.opener_has_handler(o, FooHandler)
1167 self.opener_has_handler(o, BarHandler)
1168
1169 # can take a mix of classes and instances
1170 o = build_opener(FooHandler, BarHandler())
1171 self.opener_has_handler(o, FooHandler)
1172 self.opener_has_handler(o, BarHandler)
1173
1174 # subclasses of default handlers override default handlers
1175 o = build_opener(MyHTTPHandler)
1176 self.opener_has_handler(o, MyHTTPHandler)
1177
1178 # a particular case of overriding: default handlers can be passed
1179 # in explicitly
1180 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 self.opener_has_handler(o, urllib.request.HTTPHandler)
1182 o = build_opener(urllib.request.HTTPHandler)
1183 self.opener_has_handler(o, urllib.request.HTTPHandler)
1184 o = build_opener(urllib.request.HTTPHandler())
1185 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001186
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001187 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001188 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001189 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1190 self.opener_has_handler(o, MyHTTPHandler)
1191 self.opener_has_handler(o, MyOtherHTTPHandler)
1192
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001193 def opener_has_handler(self, opener, handler_class):
1194 for h in opener.handlers:
1195 if h.__class__ == handler_class:
1196 break
1197 else:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001198 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001199
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001200class RequestTests(unittest.TestCase):
1201
1202 def setUp(self):
1203 self.get = Request("http://www.python.org/~jeremy/")
1204 self.post = Request("http://www.python.org/~jeremy/",
1205 "data",
1206 headers={"X-Test": "test"})
1207
1208 def test_method(self):
1209 self.assertEqual("POST", self.post.get_method())
1210 self.assertEqual("GET", self.get.get_method())
1211
1212 def test_add_data(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001213 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001214 self.assertEqual("GET", self.get.get_method())
1215 self.get.add_data("spam")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001216 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001217 self.assertEqual("POST", self.get.get_method())
1218
1219 def test_get_full_url(self):
1220 self.assertEqual("http://www.python.org/~jeremy/",
1221 self.get.get_full_url())
1222
1223 def test_selector(self):
1224 self.assertEqual("/~jeremy/", self.get.get_selector())
1225 req = Request("http://www.python.org/")
1226 self.assertEqual("/", req.get_selector())
1227
1228 def test_get_type(self):
1229 self.assertEqual("http", self.get.get_type())
1230
1231 def test_get_host(self):
1232 self.assertEqual("www.python.org", self.get.get_host())
1233
1234 def test_get_host_unquote(self):
1235 req = Request("http://www.%70ython.org/")
1236 self.assertEqual("www.python.org", req.get_host())
1237
1238 def test_proxy(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001239 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001240 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001241 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001242 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1243 self.assertEqual("www.perl.org", self.get.get_host())
1244
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001245
1246def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001247 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001248 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001249 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001250 tests = (TrivialTests,
1251 OpenerDirectorTests,
1252 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001253 MiscTests,
1254 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001255 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001256
1257if __name__ == "__main__":
1258 test_main(verbose=True)