blob: a0fff26f1e7ad737d927615e9cf121f9a3abeb64 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000024 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000030
31 file_url = "file://%s" % fname
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000033
34 buf = f.read()
35 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000036
Georg Brandle1b13d22005-08-24 22:20:32 +000037 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038 tests = [
39 ('a,b,c', ['a', 'b', 'c']),
40 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
41 ('a, b, "c", "d", "e,f", g, h',
42 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
43 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
44 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000045 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000047
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000048
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000049def test_request_headers_dict():
50 """
51 The Request.headers dictionary is not a documented interface. It should
52 stay that way, because the complete set of headers are only accessible
53 through the .get_header(), .has_header(), .header_items() interface.
54 However, .headers pre-dates those methods, and so real code will be using
55 the dictionary.
56
57 The introduction in 2.4 of those methods was a mistake for the same reason:
58 code that previously saw all (urllib2 user)-provided headers in .headers
59 now sees only a subset (and the function interface is ugly and incomplete).
60 A better change would have been to replace .headers dict with a dict
61 subclass (or UserDict.DictMixin instance?) that preserved the .headers
62 interface and also provided access to the "unredirected" headers. It's
63 probably too late to fix that, though.
64
65
66 Check .capitalize() case normalization:
67
68 >>> url = "http://example.com"
69 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
70 'blah'
71 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
72 'blah'
73
74 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
75 but that could be changed in future.
76
77 """
78
79def test_request_headers_methods():
80 """
81 Note the case normalization of header names here, to .capitalize()-case.
82 This should be preserved for backwards-compatibility. (In the HTTP case,
83 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000084 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000085
86 >>> url = "http://example.com"
87 >>> r = Request(url, headers={"Spam-eggs": "blah"})
88 >>> r.has_header("Spam-eggs")
89 True
90 >>> r.header_items()
91 [('Spam-eggs', 'blah')]
92 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000093 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000094 >>> items
95 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
96
97 Note that e.g. r.has_header("spam-EggS") is currently False, and
98 r.get_header("spam-EggS") returns None, but that could be changed in
99 future.
100
101 >>> r.has_header("Not-there")
102 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000103 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104 None
105 >>> r.get_header("Not-there", "default")
106 'default'
107
108 """
109
110
Thomas Wouters477c8d52006-05-27 19:21:47 +0000111def test_password_manager(self):
112 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114 >>> add = mgr.add_password
115 >>> add("Some Realm", "http://example.com/", "joe", "password")
116 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
117 >>> add("c", "http://example.com/foo", "foo", "ni")
118 >>> add("c", "http://example.com/bar", "bar", "nini")
119 >>> add("b", "http://example.com/", "first", "blah")
120 >>> add("b", "http://example.com/", "second", "spam")
121 >>> add("a", "http://example.com", "1", "a")
122 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
123 >>> add("Some Realm", "d.example.com", "4", "d")
124 >>> add("Some Realm", "e.example.com:3128", "5", "e")
125
126 >>> mgr.find_user_password("Some Realm", "example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("c", "http://example.com/foo")
137 ('foo', 'ni')
138 >>> mgr.find_user_password("c", "http://example.com/bar")
139 ('bar', 'nini')
140
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 Actually, this is really undefined ATM
142## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
145## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
147 Use latest add_password() in case of conflict:
148
149 >>> mgr.find_user_password("b", "http://example.com/")
150 ('second', 'spam')
151
152 No special relationship between a.example.com and example.com:
153
154 >>> mgr.find_user_password("a", "http://example.com/")
155 ('1', 'a')
156 >>> mgr.find_user_password("a", "http://a.example.com/")
157 (None, None)
158
159 Ports:
160
161 >>> mgr.find_user_password("Some Realm", "c.example.com")
162 (None, None)
163 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "d.example.com")
168 ('4', 'd')
169 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
170 ('5', 'e')
171
172 """
173 pass
174
175
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000176def test_password_manager_default_port(self):
177 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000178 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179 >>> add = mgr.add_password
180
181 The point to note here is that we can't guess the default port if there's
182 no scheme. This applies to both add_password and find_user_password.
183
184 >>> add("f", "http://g.example.com:80", "10", "j")
185 >>> add("g", "http://h.example.com", "11", "k")
186 >>> add("h", "i.example.com:80", "12", "l")
187 >>> add("i", "j.example.com", "13", "m")
188 >>> mgr.find_user_password("f", "g.example.com:100")
189 (None, None)
190 >>> mgr.find_user_password("f", "g.example.com:80")
191 ('10', 'j')
192 >>> mgr.find_user_password("f", "g.example.com")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:100")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:80")
197 ('10', 'j')
198 >>> mgr.find_user_password("f", "http://g.example.com")
199 ('10', 'j')
200 >>> mgr.find_user_password("g", "h.example.com")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "http://h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("h", "i.example.com")
207 (None, None)
208 >>> mgr.find_user_password("h", "i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("h", "http://i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("i", "j.example.com")
213 ('13', 'm')
214 >>> mgr.find_user_password("i", "j.example.com:80")
215 (None, None)
216 >>> mgr.find_user_password("i", "http://j.example.com")
217 ('13', 'm')
218 >>> mgr.find_user_password("i", "http://j.example.com:80")
219 (None, None)
220
221 """
222
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000223class MockOpener:
224 addheaders = []
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000225 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
226 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000227 def error(self, proto, *args):
228 self.proto, self.args = proto, args
229
230class MockFile:
231 def read(self, count=None): pass
232 def readline(self, count=None): pass
233 def close(self): pass
234
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000235class MockHeaders(dict):
236 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000237 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238
Guido van Rossum34d19282007-08-09 01:03:29 +0000239class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000241 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242 self.code, self.msg, self.headers, self.url = code, msg, headers, url
243 def info(self):
244 return self.headers
245 def geturl(self):
246 return self.url
247
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000248class MockCookieJar:
249 def add_cookie_header(self, request):
250 self.ach_req = request
251 def extract_cookies(self, response, request):
252 self.ec_req, self.ec_r = request, response
253
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000254class FakeMethod:
255 def __init__(self, meth_name, action, handle):
256 self.meth_name = meth_name
257 self.handle = handle
258 self.action = action
259 def __call__(self, *args):
260 return self.handle(self.meth_name, self.action, *args)
261
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000262class MockHTTPResponse(io.IOBase):
263 def __init__(self, fp, msg, status, reason):
264 self.fp = fp
265 self.msg = msg
266 self.status = status
267 self.reason = reason
268 self.code = 200
269
270 def read(self):
271 return ''
272
273 def info(self):
274 return {}
275
276 def geturl(self):
277 return self.url
278
279
280class MockHTTPClass:
281 def __init__(self):
282 self.level = 0
283 self.req_headers = []
284 self.data = None
285 self.raise_on_endheaders = False
286 self._tunnel_headers = {}
287
288 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
289 self.host = host
290 self.timeout = timeout
291 return self
292
293 def set_debuglevel(self, level):
294 self.level = level
295
296 def _set_tunnel(self, host, port=None, headers=None):
297 self._tunnel_host = host
298 self._tunnel_port = port
299 if headers:
300 self._tunnel_headers = headers
301 else:
302 self._tunnel_headers.clear()
303
Benjamin Peterson794921a2009-12-24 01:18:13 +0000304 def request(self, method, url, body=None, headers=None):
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000305 self.method = method
306 self.selector = url
Benjamin Peterson794921a2009-12-24 01:18:13 +0000307 if headers is not None:
308 self.req_headers += headers.items()
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000309 self.req_headers.sort()
310 if body:
311 self.data = body
312 if self.raise_on_endheaders:
313 import socket
314 raise socket.error()
315 def getresponse(self):
316 return MockHTTPResponse(MockFile(), {}, 200, "OK")
317
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000318class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000319 # useful for testing handler machinery
320 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000321 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000322 def __init__(self, methods):
323 self._define_methods(methods)
324 def _define_methods(self, methods):
325 for spec in methods:
326 if len(spec) == 2: name, action = spec
327 else: name, action = spec, None
328 meth = FakeMethod(name, action, self.handle)
329 setattr(self.__class__, name, meth)
330 def handle(self, fn_name, action, *args, **kwds):
331 self.parent.calls.append((self, fn_name, args, kwds))
332 if action is None:
333 return None
334 elif action == "return self":
335 return self
336 elif action == "return response":
337 res = MockResponse(200, "OK", {}, "")
338 return res
339 elif action == "return request":
340 return Request("http://blah/")
341 elif action.startswith("error"):
342 code = action[action.rfind(" ")+1:]
343 try:
344 code = int(code)
345 except ValueError:
346 pass
347 res = MockResponse(200, "OK", {}, "")
348 return self.parent.error("http", args[0], res, code, "", {})
349 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000350 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000351 assert False
352 def close(self): pass
353 def add_parent(self, parent):
354 self.parent = parent
355 self.parent.calls = []
356 def __lt__(self, other):
357 if not hasattr(other, "handler_order"):
358 # No handler_order, leave in original order. Yuck.
359 return True
360 return self.handler_order < other.handler_order
361
362def add_ordered_mock_handlers(opener, meth_spec):
363 """Create MockHandlers and add them to an OpenerDirector.
364
365 meth_spec: list of lists of tuples and strings defining methods to define
366 on handlers. eg:
367
368 [["http_error", "ftp_open"], ["http_open"]]
369
370 defines methods .http_error() and .ftp_open() on one handler, and
371 .http_open() on another. These methods just record their arguments and
372 return None. Using a tuple instead of a string causes the method to
373 perform some action (see MockHandler.handle()), eg:
374
375 [["http_error"], [("http_open", "return request")]]
376
377 defines .http_error() on one handler (which simply returns None), and
378 .http_open() on another handler, which returns a Request object.
379
380 """
381 handlers = []
382 count = 0
383 for meths in meth_spec:
384 class MockHandlerSubclass(MockHandler): pass
385 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000386 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000387 h.add_parent(opener)
388 count = count + 1
389 handlers.append(h)
390 opener.add_handler(h)
391 return handlers
392
Thomas Wouters477c8d52006-05-27 19:21:47 +0000393def build_test_opener(*handler_instances):
394 opener = OpenerDirector()
395 for h in handler_instances:
396 opener.add_handler(h)
397 return opener
398
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000399class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000400 # useful for testing redirections and auth
401 # sends supplied headers and code as first response
402 # sends 200 OK as second response
403 def __init__(self, code, headers):
404 self.code = code
405 self.headers = headers
406 self.reset()
407 def reset(self):
408 self._count = 0
409 self.requests = []
410 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000411 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000412 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000413 self.requests.append(copy.deepcopy(req))
414 if self._count == 0:
415 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000416 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000417 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000418 return self.parent.error(
419 "http", req, MockFile(), self.code, name, msg)
420 else:
421 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000422 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 return MockResponse(200, "OK", msg, "", req.get_full_url())
424
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000425class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
426 # Useful for testing the Proxy-Authorization request by verifying the
427 # properties of httpcon
Benjamin Peterson794921a2009-12-24 01:18:13 +0000428
429 def __init__(self):
430 urllib.request.AbstractHTTPHandler.__init__(self)
431 self.httpconn = MockHTTPClass()
432
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000433 def https_open(self, req):
434 return self.do_open(self.httpconn, req)
435
Thomas Wouters477c8d52006-05-27 19:21:47 +0000436class MockPasswordManager:
437 def add_password(self, realm, uri, user, password):
438 self.realm = realm
439 self.url = uri
440 self.user = user
441 self.password = password
442 def find_user_password(self, realm, authuri):
443 self.target_realm = realm
444 self.target_url = authuri
445 return self.user, self.password
446
447
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000448class OpenerDirectorTests(unittest.TestCase):
449
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000450 def test_add_non_handler(self):
451 class NonHandler(object):
452 pass
453 self.assertRaises(TypeError,
454 OpenerDirector().add_handler, NonHandler())
455
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000456 def test_badly_named_methods(self):
457 # test work-around for three methods that accidentally follow the
458 # naming conventions for handler methods
459 # (*_open() / *_request() / *_response())
460
461 # These used to call the accidentally-named methods, causing a
462 # TypeError in real code; here, returning self from these mock
463 # methods would either cause no exception, or AttributeError.
464
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000465 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000466
467 o = OpenerDirector()
468 meth_spec = [
469 [("do_open", "return self"), ("proxy_open", "return self")],
470 [("redirect_request", "return self")],
471 ]
472 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000473 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000474 for scheme in "do", "proxy", "redirect":
475 self.assertRaises(URLError, o.open, scheme+"://example.com/")
476
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000477 def test_handled(self):
478 # handler returning non-None means no more handlers will be called
479 o = OpenerDirector()
480 meth_spec = [
481 ["http_open", "ftp_open", "http_error_302"],
482 ["ftp_open"],
483 [("http_open", "return self")],
484 [("http_open", "return self")],
485 ]
486 handlers = add_ordered_mock_handlers(o, meth_spec)
487
488 req = Request("http://example.com/")
489 r = o.open(req)
490 # Second .http_open() gets called, third doesn't, since second returned
491 # non-None. Handlers without .http_open() never get any methods called
492 # on them.
493 # In fact, second mock handler defining .http_open() returns self
494 # (instead of response), which becomes the OpenerDirector's return
495 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000496 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000497 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
498 for expected, got in zip(calls, o.calls):
499 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000500 self.assertEqual((handler, name), expected)
501 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000502
503 def test_handler_order(self):
504 o = OpenerDirector()
505 handlers = []
506 for meths, handler_order in [
507 ([("http_open", "return self")], 500),
508 (["http_open"], 0),
509 ]:
510 class MockHandlerSubclass(MockHandler): pass
511 h = MockHandlerSubclass(meths)
512 h.handler_order = handler_order
513 handlers.append(h)
514 o.add_handler(h)
515
516 r = o.open("http://example.com/")
517 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000518 self.assertEqual(o.calls[0][0], handlers[1])
519 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000520
521 def test_raise(self):
522 # raising URLError stops processing of request
523 o = OpenerDirector()
524 meth_spec = [
525 [("http_open", "raise")],
526 [("http_open", "return self")],
527 ]
528 handlers = add_ordered_mock_handlers(o, meth_spec)
529
530 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000531 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000532 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000533
534## def test_error(self):
535## # XXX this doesn't actually seem to be used in standard library,
536## # but should really be tested anyway...
537
538 def test_http_error(self):
539 # XXX http_error_default
540 # http errors are a special case
541 o = OpenerDirector()
542 meth_spec = [
543 [("http_open", "error 302")],
544 [("http_error_400", "raise"), "http_open"],
545 [("http_error_302", "return response"), "http_error_303",
546 "http_error"],
547 [("http_error_302")],
548 ]
549 handlers = add_ordered_mock_handlers(o, meth_spec)
550
551 class Unknown:
552 def __eq__(self, other): return True
553
554 req = Request("http://example.com/")
555 r = o.open(req)
556 assert len(o.calls) == 2
557 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000558 (handlers[2], "http_error_302",
559 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000560 for expected, got in zip(calls, o.calls):
561 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000562 self.assertEqual((handler, method_name), got[:2])
563 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000564
565 def test_processors(self):
566 # *_request / *_response methods get called appropriately
567 o = OpenerDirector()
568 meth_spec = [
569 [("http_request", "return request"),
570 ("http_response", "return response")],
571 [("http_request", "return request"),
572 ("http_response", "return response")],
573 ]
574 handlers = add_ordered_mock_handlers(o, meth_spec)
575
576 req = Request("http://example.com/")
577 r = o.open(req)
578 # processor methods are called on *all* handlers that define them,
579 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000580 calls = [
581 (handlers[0], "http_request"), (handlers[1], "http_request"),
582 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000583
584 for i, (handler, name, args, kwds) in enumerate(o.calls):
585 if i < 2:
586 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000587 self.assertEqual((handler, name), calls[i])
588 self.assertEqual(len(args), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +0000589 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000590 else:
591 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000592 self.assertEqual((handler, name), calls[i])
593 self.assertEqual(len(args), 2)
Georg Brandlab91fde2009-08-13 08:51:18 +0000594 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000595 # response from opener.open is None, because there's no
596 # handler that defines http_open to handle it
Georg Brandlab91fde2009-08-13 08:51:18 +0000597 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000598 isinstance(args[1], MockResponse))
599
600
Tim Peters58eb11c2004-01-18 20:29:55 +0000601def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000602 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000603 if os.name == "nt" and urlpath.startswith("///"):
604 urlpath = urlpath[2:]
605 # XXX don't ask me about the mac...
606 return urlpath
607
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000608class HandlerTests(unittest.TestCase):
609
610 def test_ftp(self):
611 class MockFTPWrapper:
612 def __init__(self, data): self.data = data
613 def retrfile(self, filename, filetype):
614 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000615 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000617 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000618 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000619 def connect_ftp(self, user, passwd, host, port, dirs,
620 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000621 self.user, self.passwd = user, passwd
622 self.host, self.port = host, port
623 self.dirs = dirs
624 self.ftpwrapper = MockFTPWrapper(self.data)
625 return self.ftpwrapper
626
Georg Brandlf78e02b2008-06-10 17:40:04 +0000627 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000628 data = "rheum rhaponicum"
629 h = NullFTPHandler(data)
630 o = h.parent = MockOpener()
631
632 for url, host, port, type_, dirs, filename, mimetype in [
633 ("ftp://localhost/foo/bar/baz.html",
634 "localhost", ftplib.FTP_PORT, "I",
635 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000636 ("ftp://localhost:80/foo/bar/",
637 "localhost", 80, "D",
638 ["foo", "bar"], "", None),
639 ("ftp://localhost/baz.gif;type=a",
640 "localhost", ftplib.FTP_PORT, "A",
641 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000642 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000643 req = Request(url)
644 req.timeout = None
645 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000646 # ftp authentication not yet implemented by FTPHandler
Georg Brandlab91fde2009-08-13 08:51:18 +0000647 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000648 self.assertEqual(h.host, socket.gethostbyname(host))
649 self.assertEqual(h.port, port)
650 self.assertEqual(h.dirs, dirs)
651 self.assertEqual(h.ftpwrapper.filename, filename)
652 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000653 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000654 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000655 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000656
657 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000658 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000659 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000660 o = h.parent = MockOpener()
661
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000662 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000663 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000664 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000665 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000666 "file://localhost%s" % urlpath,
667 "file://%s" % urlpath,
668 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000669 ]
670 try:
671 localaddr = socket.gethostbyname(socket.gethostname())
672 except socket.gaierror:
673 localaddr = ''
674 if localaddr:
675 urls.append("file://%s%s" % (localaddr, urlpath))
676
677 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000678 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000679 try:
680 try:
681 f.write(towrite)
682 finally:
683 f.close()
684
685 r = h.file_open(Request(url))
686 try:
687 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000688 headers = r.info()
689 newurl = r.geturl()
690 finally:
691 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000692 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000693 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000694 finally:
695 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000696 self.assertEqual(data, towrite)
697 self.assertEqual(headers["Content-type"], "text/plain")
698 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000699 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000700
701 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000702 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000703 "file:///file_does_not_exist.txt",
704 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
705 os.getcwd(), TESTFN),
706 "file://somerandomhost.ontheinternet.com%s/%s" %
707 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000708 ]:
709 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000710 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000711 try:
712 f.write(towrite)
713 finally:
714 f.close()
715
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000716 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000717 h.file_open, Request(url))
718 finally:
719 os.remove(TESTFN)
720
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000721 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000722 o = h.parent = MockOpener()
723 # XXXX why does // mean ftp (and /// mean not ftp!), and where
724 # is file: scheme specified? I think this is really a bug, and
725 # what was intended was to distinguish between URLs like:
726 # file:/blah.txt (a file)
727 # file://localhost/blah.txt (a file)
728 # file:///blah.txt (a file)
729 # file://ftp.example.com/blah.txt (an ftp URL)
730 for url, ftp in [
731 ("file://ftp.example.com//foo.txt", True),
732 ("file://ftp.example.com///foo.txt", False),
733# XXXX bug: fails with OSError, should be URLError
734 ("file://ftp.example.com/foo.txt", False),
735 ]:
736 req = Request(url)
737 try:
738 h.file_open(req)
739 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000740 except (urllib.error.URLError, OSError):
Georg Brandlab91fde2009-08-13 08:51:18 +0000741 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000742 else:
Georg Brandlab91fde2009-08-13 08:51:18 +0000743 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000744 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000745
746 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000747
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000748 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000749 o = h.parent = MockOpener()
750
751 url = "http://example.com/"
752 for method, data in [("GET", None), ("POST", "blah")]:
753 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000754 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000755 req.add_unredirected_header("Spam", "eggs")
756 http = MockHTTPClass()
757 r = h.do_open(http, req)
758
759 # result attributes
760 r.read; r.readline # wrapped MockFile methods
761 r.info; r.geturl # addinfourl methods
762 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
763 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000764 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000765 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000766
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000767 self.assertEqual(http.host, "example.com")
768 self.assertEqual(http.level, 0)
769 self.assertEqual(http.method, method)
770 self.assertEqual(http.selector, "/")
771 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000772 [("Connection", "close"),
773 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000774 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000775
776 # check socket.error converted to URLError
777 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000778 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000779
780 # check adding of standard headers
781 o.addheaders = [("Spam", "eggs")]
782 for data in "", None: # POST, GET
783 req = Request("http://example.com/", data)
784 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000785 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000786 if data is None: # GET
Georg Brandlab91fde2009-08-13 08:51:18 +0000787 self.assertTrue("Content-length" not in req.unredirected_hdrs)
788 self.assertTrue("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000789 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000790 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
791 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000792 "application/x-www-form-urlencoded")
793 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000794 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
795 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000796
797 # don't clobber existing headers
798 req.add_unredirected_header("Content-length", "foo")
799 req.add_unredirected_header("Content-type", "bar")
800 req.add_unredirected_header("Host", "baz")
801 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000802 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000803 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
804 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000805 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
806 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000807
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000808 def test_http_doubleslash(self):
809 # Checks the presence of any unnecessary double slash in url does not
810 # break anything. Previously, a double slash directly after the host
811 # could could cause incorrect parsing.
812 h = urllib.request.AbstractHTTPHandler()
813 o = h.parent = MockOpener()
814
815 data = ""
816 ds_urls = [
817 "http://example.com/foo/bar/baz.html",
818 "http://example.com//foo/bar/baz.html",
819 "http://example.com/foo//bar/baz.html",
820 "http://example.com/foo/bar//baz.html"
821 ]
822
823 for ds_url in ds_urls:
824 ds_req = Request(ds_url, data)
825
826 # Check whether host is determined correctly if there is no proxy
827 np_ds_req = h.do_request_(ds_req)
828 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
829
830 # Check whether host is determined correctly if there is a proxy
831 ds_req.set_proxy("someproxy:3128",None)
832 p_ds_req = h.do_request_(ds_req)
833 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
834
835
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000836 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000837 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000838 o = h.parent = MockOpener()
839
840 url = "http://example.com/"
841 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000843 r = MockResponse(200, "OK", {}, "", url)
844 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000845 self.assertTrue(r is newr)
846 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 r = MockResponse(202, "Accepted", {}, "", url)
848 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000849 self.assertTrue(r is newr)
850 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851 r = MockResponse(206, "Partial content", {}, "", url)
852 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000853 self.assertTrue(r is newr)
854 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000855 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 r = MockResponse(502, "Bad gateway", {}, "", url)
Georg Brandlab91fde2009-08-13 08:51:18 +0000857 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000858 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000860
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000861 def test_cookies(self):
862 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000863 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000864 o = h.parent = MockOpener()
865
866 req = Request("http://example.com/")
867 r = MockResponse(200, "OK", {}, "")
868 newreq = h.http_request(req)
Georg Brandlab91fde2009-08-13 08:51:18 +0000869 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000870 self.assertEquals(req.get_origin_req_host(), "example.com")
Georg Brandlab91fde2009-08-13 08:51:18 +0000871 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000872 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000873 self.assertTrue(cj.ec_req is req)
874 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000875
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000876 def test_redirect(self):
877 from_url = "http://example.com/a.html"
878 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000879 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000880 o = h.parent = MockOpener()
881
882 # ordinary redirect behaviour
883 for code in 301, 302, 303, 307:
884 for data in None, "blah\nblah\n":
885 method = getattr(h, "http_error_%s" % code)
886 req = Request(from_url, data)
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000887 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000888 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000889 if data is not None:
890 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000891 req.add_unredirected_header("Spam", "spam")
892 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000893 method(req, MockFile(), code, "Blah",
894 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000895 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000896 # 307 in response to POST requires user OK
Georg Brandlab91fde2009-08-13 08:51:18 +0000897 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000898 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000899 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000900 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000901 except AttributeError:
Georg Brandlab91fde2009-08-13 08:51:18 +0000902 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000903
904 # now it's a GET, there should not be headers regarding content
905 # (possibly dragged from before being a POST)
906 headers = [x.lower() for x in o.req.headers]
907 self.assertTrue("content-length" not in headers)
908 self.assertTrue("content-type" not in headers)
909
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000910 self.assertEqual(o.req.headers["Nonsense"],
911 "viking=withhold")
Georg Brandlab91fde2009-08-13 08:51:18 +0000912 self.assertTrue("Spam" not in o.req.headers)
913 self.assertTrue("Spam" not in o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000914
915 # loop detection
916 req = Request(from_url)
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000917 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000918 def redirect(h, req, url=to_url):
919 h.http_error_302(req, MockFile(), 302, "Blah",
920 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000921 # Note that the *original* request shares the same record of
922 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000923
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000924 # detect infinite loop redirect of a URL to itself
925 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000926 count = 0
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000927 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000928 try:
929 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000930 redirect(h, req, "http://example.com/")
931 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000932 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000933 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000934 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000935
936 # detect endless non-repeating chain of redirects
937 req = Request(from_url, origin_req_host="example.com")
938 count = 0
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000939 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000940 try:
941 while 1:
942 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000943 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000944 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000945 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000947
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000948 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000949 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000950 from http.cookiejar import CookieJar
951 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000952
953 cj = CookieJar()
954 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000955 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000956 hdeh = urllib.request.HTTPDefaultErrorHandler()
957 hrh = urllib.request.HTTPRedirectHandler()
958 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000959 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000960 o.open("http://www.example.com/")
Georg Brandlab91fde2009-08-13 08:51:18 +0000961 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000962
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000963 def test_proxy(self):
964 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000966 o.add_handler(ph)
967 meth_spec = [
968 [("http_open", "return response")]
969 ]
970 handlers = add_ordered_mock_handlers(o, meth_spec)
971
972 req = Request("http://acme.example.com/")
973 self.assertEqual(req.get_host(), "acme.example.com")
974 r = o.open(req)
975 self.assertEqual(req.get_host(), "proxy.example.com:3128")
976
977 self.assertEqual([(handlers[0], "http_open")],
978 [tup[0:2] for tup in o.calls])
979
Senthil Kumaran11301632009-10-11 06:07:46 +0000980 def test_proxy_no_proxy(self):
981 os.environ['no_proxy'] = 'python.org'
982 o = OpenerDirector()
983 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
984 o.add_handler(ph)
985 req = Request("http://www.perl.org/")
986 self.assertEqual(req.get_host(), "www.perl.org")
987 r = o.open(req)
988 self.assertEqual(req.get_host(), "proxy.example.com")
989 req = Request("http://www.python.org")
990 self.assertEqual(req.get_host(), "www.python.org")
991 r = o.open(req)
992 self.assertEqual(req.get_host(), "www.python.org")
993 del os.environ['no_proxy']
994
995
Senthil Kumaran0ac1f832009-07-26 12:39:47 +0000996 def test_proxy_https(self):
997 o = OpenerDirector()
998 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
999 o.add_handler(ph)
1000 meth_spec = [
1001 [("https_open", "return response")]
1002 ]
1003 handlers = add_ordered_mock_handlers(o, meth_spec)
1004
1005 req = Request("https://www.example.com/")
1006 self.assertEqual(req.get_host(), "www.example.com")
1007 r = o.open(req)
1008 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1009 self.assertEqual([(handlers[0], "https_open")],
1010 [tup[0:2] for tup in o.calls])
1011
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +00001012 def test_proxy_https_proxy_authorization(self):
1013 o = OpenerDirector()
1014 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1015 o.add_handler(ph)
1016 https_handler = MockHTTPSHandler()
1017 o.add_handler(https_handler)
1018 req = Request("https://www.example.com/")
1019 req.add_header("Proxy-Authorization","FooBar")
1020 req.add_header("User-Agent","Grail")
1021 self.assertEqual(req.get_host(), "www.example.com")
1022 self.assertIsNone(req._tunnel_host)
1023 r = o.open(req)
1024 # Verify Proxy-Authorization gets tunneled to request.
1025 # httpsconn req_headers do not have the Proxy-Authorization header but
1026 # the req will have.
1027 self.assertFalse(("Proxy-Authorization","FooBar") in
1028 https_handler.httpconn.req_headers)
1029 self.assertTrue(("User-Agent","Grail") in
1030 https_handler.httpconn.req_headers)
1031 self.assertIsNotNone(req._tunnel_host)
1032 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1033 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran0ac1f832009-07-26 12:39:47 +00001034
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001035 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001036 opener = OpenerDirector()
1037 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001038 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001039 realm = "ACME Widget Store"
1040 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001041 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1042 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001043 opener.add_handler(auth_handler)
1044 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001045 self._test_basic_auth(opener, auth_handler, "Authorization",
1046 realm, http_handler, password_manager,
1047 "http://acme.example.com/protected",
1048 "http://acme.example.com/protected",
1049 )
1050
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001051 def test_basic_auth_with_single_quoted_realm(self):
1052 self.test_basic_auth(quote_char="'")
1053
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 def test_proxy_basic_auth(self):
1055 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001056 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 opener.add_handler(ph)
1058 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001059 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001060 realm = "ACME Networks"
1061 http_handler = MockHTTPHandler(
1062 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001063 opener.add_handler(auth_handler)
1064 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001065 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001066 realm, http_handler, password_manager,
1067 "http://acme.example.com:3128/protected",
1068 "proxy.example.com:3128",
1069 )
1070
1071 def test_basic_and_digest_auth_handlers(self):
1072 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1073 # response (http://python.org/sf/1479302), where it should instead
1074 # return None to allow another handler (especially
1075 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001076
1077 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1078 # try digest first (since it's the strongest auth scheme), so we record
1079 # order of calls here to check digest comes first:
1080 class RecordingOpenerDirector(OpenerDirector):
1081 def __init__(self):
1082 OpenerDirector.__init__(self)
1083 self.recorded = []
1084 def record(self, info):
1085 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001086 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001087 def http_error_401(self, *args, **kwds):
1088 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001089 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001090 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001091 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001092 def http_error_401(self, *args, **kwds):
1093 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001094 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001095 *args, **kwds)
1096
1097 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 password_manager = MockPasswordManager()
1099 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001100 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001101 realm = "ACME Networks"
1102 http_handler = MockHTTPHandler(
1103 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001104 opener.add_handler(basic_handler)
1105 opener.add_handler(digest_handler)
1106 opener.add_handler(http_handler)
1107
1108 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001109 self._test_basic_auth(opener, basic_handler, "Authorization",
1110 realm, http_handler, password_manager,
1111 "http://acme.example.com/protected",
1112 "http://acme.example.com/protected",
1113 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001114 # check digest was tried before basic (twice, because
1115 # _test_basic_auth called .open() twice)
1116 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001117
1118 def _test_basic_auth(self, opener, auth_handler, auth_header,
1119 realm, http_handler, password_manager,
1120 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001121 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001122 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123
1124 # .add_password() fed through to password manager
1125 auth_handler.add_password(realm, request_url, user, password)
1126 self.assertEqual(realm, password_manager.realm)
1127 self.assertEqual(request_url, password_manager.url)
1128 self.assertEqual(user, password_manager.user)
1129 self.assertEqual(password, password_manager.password)
1130
1131 r = opener.open(request_url)
1132
1133 # should have asked the password manager for the username/password
1134 self.assertEqual(password_manager.target_realm, realm)
1135 self.assertEqual(password_manager.target_url, protected_url)
1136
1137 # expect one request without authorization, then one with
1138 self.assertEqual(len(http_handler.requests), 2)
1139 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001140 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001141 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001142 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001143 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1144 auth_hdr_value)
1145
1146 # if the password manager can't find a password, the handler won't
1147 # handle the HTTP auth error
1148 password_manager.user = password_manager.password = None
1149 http_handler.reset()
1150 r = opener.open(request_url)
1151 self.assertEqual(len(http_handler.requests), 1)
1152 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1153
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001154
1155class MiscTests(unittest.TestCase):
1156
1157 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001158 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1159 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001160 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001161 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001162 def bar_open(self): pass
1163
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001164 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001165
1166 o = build_opener(FooHandler, BarHandler)
1167 self.opener_has_handler(o, FooHandler)
1168 self.opener_has_handler(o, BarHandler)
1169
1170 # can take a mix of classes and instances
1171 o = build_opener(FooHandler, BarHandler())
1172 self.opener_has_handler(o, FooHandler)
1173 self.opener_has_handler(o, BarHandler)
1174
1175 # subclasses of default handlers override default handlers
1176 o = build_opener(MyHTTPHandler)
1177 self.opener_has_handler(o, MyHTTPHandler)
1178
1179 # a particular case of overriding: default handlers can be passed
1180 # in explicitly
1181 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001182 self.opener_has_handler(o, urllib.request.HTTPHandler)
1183 o = build_opener(urllib.request.HTTPHandler)
1184 self.opener_has_handler(o, urllib.request.HTTPHandler)
1185 o = build_opener(urllib.request.HTTPHandler())
1186 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001187
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001188 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001189 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001190 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1191 self.opener_has_handler(o, MyHTTPHandler)
1192 self.opener_has_handler(o, MyOtherHTTPHandler)
1193
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001194 def opener_has_handler(self, opener, handler_class):
1195 for h in opener.handlers:
1196 if h.__class__ == handler_class:
1197 break
1198 else:
Georg Brandlab91fde2009-08-13 08:51:18 +00001199 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001200
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001201class RequestTests(unittest.TestCase):
1202
1203 def setUp(self):
1204 self.get = Request("http://www.python.org/~jeremy/")
1205 self.post = Request("http://www.python.org/~jeremy/",
1206 "data",
1207 headers={"X-Test": "test"})
1208
1209 def test_method(self):
1210 self.assertEqual("POST", self.post.get_method())
1211 self.assertEqual("GET", self.get.get_method())
1212
1213 def test_add_data(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001214 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001215 self.assertEqual("GET", self.get.get_method())
1216 self.get.add_data("spam")
Georg Brandlab91fde2009-08-13 08:51:18 +00001217 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001218 self.assertEqual("POST", self.get.get_method())
1219
1220 def test_get_full_url(self):
1221 self.assertEqual("http://www.python.org/~jeremy/",
1222 self.get.get_full_url())
1223
1224 def test_selector(self):
1225 self.assertEqual("/~jeremy/", self.get.get_selector())
1226 req = Request("http://www.python.org/")
1227 self.assertEqual("/", req.get_selector())
1228
1229 def test_get_type(self):
1230 self.assertEqual("http", self.get.get_type())
1231
1232 def test_get_host(self):
1233 self.assertEqual("www.python.org", self.get.get_host())
1234
1235 def test_get_host_unquote(self):
1236 req = Request("http://www.%70ython.org/")
1237 self.assertEqual("www.python.org", req.get_host())
1238
1239 def test_proxy(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001240 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001241 self.get.set_proxy("www.perl.org", "http")
Georg Brandlab91fde2009-08-13 08:51:18 +00001242 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001243 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1244 self.assertEqual("www.perl.org", self.get.get_host())
1245
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001246
1247def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001248 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001249 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001250 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001251 tests = (TrivialTests,
1252 OpenerDirectorTests,
1253 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001254 MiscTests,
1255 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001256 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001257
1258if __name__ == "__main__":
1259 test_main(verbose=True)