blob: e6c51ecd6377fb1e9c0fc02bbfd6d4b613882c3b [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Jeremy Hyltone3e61042001-05-09 15:50:25 +00007
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
9from urllib.request import Request, OpenerDirector
Jeremy Hyltone3e61042001-05-09 15:50:25 +000010
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000011# XXX
12# Request
13# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000014# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016class TrivialTests(unittest.TestCase):
17 def test_trivial(self):
18 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000021
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000022 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000024 if fname[1:2] == ":":
25 fname = fname[2:]
26 # And more hacking to get it to work on MacOS. This assumes
27 # urllib.pathname2url works, unfortunately...
28 if os.name == 'mac':
29 fname = '/' + fname.replace(':', '/')
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000030
31 file_url = "file://%s" % fname
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000033
34 buf = f.read()
35 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000036
Georg Brandle1b13d22005-08-24 22:20:32 +000037 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038 tests = [
39 ('a,b,c', ['a', 'b', 'c']),
40 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
41 ('a, b, "c", "d", "e,f", g, h',
42 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
43 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
44 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000045 for string, list in tests:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 self.assertEquals(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000047
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000048
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000049def test_request_headers_dict():
50 """
51 The Request.headers dictionary is not a documented interface. It should
52 stay that way, because the complete set of headers are only accessible
53 through the .get_header(), .has_header(), .header_items() interface.
54 However, .headers pre-dates those methods, and so real code will be using
55 the dictionary.
56
57 The introduction in 2.4 of those methods was a mistake for the same reason:
58 code that previously saw all (urllib2 user)-provided headers in .headers
59 now sees only a subset (and the function interface is ugly and incomplete).
60 A better change would have been to replace .headers dict with a dict
61 subclass (or UserDict.DictMixin instance?) that preserved the .headers
62 interface and also provided access to the "unredirected" headers. It's
63 probably too late to fix that, though.
64
65
66 Check .capitalize() case normalization:
67
68 >>> url = "http://example.com"
69 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
70 'blah'
71 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
72 'blah'
73
74 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
75 but that could be changed in future.
76
77 """
78
79def test_request_headers_methods():
80 """
81 Note the case normalization of header names here, to .capitalize()-case.
82 This should be preserved for backwards-compatibility. (In the HTTP case,
83 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +000084 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000085
86 >>> url = "http://example.com"
87 >>> r = Request(url, headers={"Spam-eggs": "blah"})
88 >>> r.has_header("Spam-eggs")
89 True
90 >>> r.header_items()
91 [('Spam-eggs', 'blah')]
92 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +000093 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000094 >>> items
95 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
96
97 Note that e.g. r.has_header("spam-EggS") is currently False, and
98 r.get_header("spam-EggS") returns None, but that could be changed in
99 future.
100
101 >>> r.has_header("Not-there")
102 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000103 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104 None
105 >>> r.get_header("Not-there", "default")
106 'default'
107
108 """
109
110
Thomas Wouters477c8d52006-05-27 19:21:47 +0000111def test_password_manager(self):
112 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114 >>> add = mgr.add_password
115 >>> add("Some Realm", "http://example.com/", "joe", "password")
116 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
117 >>> add("c", "http://example.com/foo", "foo", "ni")
118 >>> add("c", "http://example.com/bar", "bar", "nini")
119 >>> add("b", "http://example.com/", "first", "blah")
120 >>> add("b", "http://example.com/", "second", "spam")
121 >>> add("a", "http://example.com", "1", "a")
122 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
123 >>> add("Some Realm", "d.example.com", "4", "d")
124 >>> add("Some Realm", "e.example.com:3128", "5", "e")
125
126 >>> mgr.find_user_password("Some Realm", "example.com")
127 ('joe', 'password')
128 >>> mgr.find_user_password("Some Realm", "http://example.com")
129 ('joe', 'password')
130 >>> mgr.find_user_password("Some Realm", "http://example.com/")
131 ('joe', 'password')
132 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
133 ('joe', 'password')
134 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
135 ('joe', 'password')
136 >>> mgr.find_user_password("c", "http://example.com/foo")
137 ('foo', 'ni')
138 >>> mgr.find_user_password("c", "http://example.com/bar")
139 ('bar', 'nini')
140
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 Actually, this is really undefined ATM
142## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000144## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
145## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146
147 Use latest add_password() in case of conflict:
148
149 >>> mgr.find_user_password("b", "http://example.com/")
150 ('second', 'spam')
151
152 No special relationship between a.example.com and example.com:
153
154 >>> mgr.find_user_password("a", "http://example.com/")
155 ('1', 'a')
156 >>> mgr.find_user_password("a", "http://a.example.com/")
157 (None, None)
158
159 Ports:
160
161 >>> mgr.find_user_password("Some Realm", "c.example.com")
162 (None, None)
163 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
164 ('3', 'c')
165 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
166 ('3', 'c')
167 >>> mgr.find_user_password("Some Realm", "d.example.com")
168 ('4', 'd')
169 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
170 ('5', 'e')
171
172 """
173 pass
174
175
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000176def test_password_manager_default_port(self):
177 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000178 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000179 >>> add = mgr.add_password
180
181 The point to note here is that we can't guess the default port if there's
182 no scheme. This applies to both add_password and find_user_password.
183
184 >>> add("f", "http://g.example.com:80", "10", "j")
185 >>> add("g", "http://h.example.com", "11", "k")
186 >>> add("h", "i.example.com:80", "12", "l")
187 >>> add("i", "j.example.com", "13", "m")
188 >>> mgr.find_user_password("f", "g.example.com:100")
189 (None, None)
190 >>> mgr.find_user_password("f", "g.example.com:80")
191 ('10', 'j')
192 >>> mgr.find_user_password("f", "g.example.com")
193 (None, None)
194 >>> mgr.find_user_password("f", "http://g.example.com:100")
195 (None, None)
196 >>> mgr.find_user_password("f", "http://g.example.com:80")
197 ('10', 'j')
198 >>> mgr.find_user_password("f", "http://g.example.com")
199 ('10', 'j')
200 >>> mgr.find_user_password("g", "h.example.com")
201 ('11', 'k')
202 >>> mgr.find_user_password("g", "h.example.com:80")
203 ('11', 'k')
204 >>> mgr.find_user_password("g", "http://h.example.com:80")
205 ('11', 'k')
206 >>> mgr.find_user_password("h", "i.example.com")
207 (None, None)
208 >>> mgr.find_user_password("h", "i.example.com:80")
209 ('12', 'l')
210 >>> mgr.find_user_password("h", "http://i.example.com:80")
211 ('12', 'l')
212 >>> mgr.find_user_password("i", "j.example.com")
213 ('13', 'm')
214 >>> mgr.find_user_password("i", "j.example.com:80")
215 (None, None)
216 >>> mgr.find_user_password("i", "http://j.example.com")
217 ('13', 'm')
218 >>> mgr.find_user_password("i", "http://j.example.com:80")
219 (None, None)
220
221 """
222
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000223class MockOpener:
224 addheaders = []
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000225 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
226 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000227 def error(self, proto, *args):
228 self.proto, self.args = proto, args
229
230class MockFile:
231 def read(self, count=None): pass
232 def readline(self, count=None): pass
233 def close(self): pass
234
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000235class MockHeaders(dict):
236 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000237 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000238
Guido van Rossum34d19282007-08-09 01:03:29 +0000239class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000240 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000241 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000242 self.code, self.msg, self.headers, self.url = code, msg, headers, url
243 def info(self):
244 return self.headers
245 def geturl(self):
246 return self.url
247
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000248class MockCookieJar:
249 def add_cookie_header(self, request):
250 self.ach_req = request
251 def extract_cookies(self, response, request):
252 self.ec_req, self.ec_r = request, response
253
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000254class FakeMethod:
255 def __init__(self, meth_name, action, handle):
256 self.meth_name = meth_name
257 self.handle = handle
258 self.action = action
259 def __call__(self, *args):
260 return self.handle(self.meth_name, self.action, *args)
261
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000262class MockHTTPResponse(io.IOBase):
263 def __init__(self, fp, msg, status, reason):
264 self.fp = fp
265 self.msg = msg
266 self.status = status
267 self.reason = reason
268 self.code = 200
269
270 def read(self):
271 return ''
272
273 def info(self):
274 return {}
275
276 def geturl(self):
277 return self.url
278
279
280class MockHTTPClass:
281 def __init__(self):
282 self.level = 0
283 self.req_headers = []
284 self.data = None
285 self.raise_on_endheaders = False
286 self._tunnel_headers = {}
287
288 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
289 self.host = host
290 self.timeout = timeout
291 return self
292
293 def set_debuglevel(self, level):
294 self.level = level
295
296 def _set_tunnel(self, host, port=None, headers=None):
297 self._tunnel_host = host
298 self._tunnel_port = port
299 if headers:
300 self._tunnel_headers = headers
301 else:
302 self._tunnel_headers.clear()
303
304 def request(self, method, url, body=None, headers={}):
305 self.method = method
306 self.selector = url
307 self.req_headers += headers.items()
308 self.req_headers.sort()
309 if body:
310 self.data = body
311 if self.raise_on_endheaders:
312 import socket
313 raise socket.error()
314 def getresponse(self):
315 return MockHTTPResponse(MockFile(), {}, 200, "OK")
316
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000317class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000318 # useful for testing handler machinery
319 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000320 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000321 def __init__(self, methods):
322 self._define_methods(methods)
323 def _define_methods(self, methods):
324 for spec in methods:
325 if len(spec) == 2: name, action = spec
326 else: name, action = spec, None
327 meth = FakeMethod(name, action, self.handle)
328 setattr(self.__class__, name, meth)
329 def handle(self, fn_name, action, *args, **kwds):
330 self.parent.calls.append((self, fn_name, args, kwds))
331 if action is None:
332 return None
333 elif action == "return self":
334 return self
335 elif action == "return response":
336 res = MockResponse(200, "OK", {}, "")
337 return res
338 elif action == "return request":
339 return Request("http://blah/")
340 elif action.startswith("error"):
341 code = action[action.rfind(" ")+1:]
342 try:
343 code = int(code)
344 except ValueError:
345 pass
346 res = MockResponse(200, "OK", {}, "")
347 return self.parent.error("http", args[0], res, code, "", {})
348 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000350 assert False
351 def close(self): pass
352 def add_parent(self, parent):
353 self.parent = parent
354 self.parent.calls = []
355 def __lt__(self, other):
356 if not hasattr(other, "handler_order"):
357 # No handler_order, leave in original order. Yuck.
358 return True
359 return self.handler_order < other.handler_order
360
361def add_ordered_mock_handlers(opener, meth_spec):
362 """Create MockHandlers and add them to an OpenerDirector.
363
364 meth_spec: list of lists of tuples and strings defining methods to define
365 on handlers. eg:
366
367 [["http_error", "ftp_open"], ["http_open"]]
368
369 defines methods .http_error() and .ftp_open() on one handler, and
370 .http_open() on another. These methods just record their arguments and
371 return None. Using a tuple instead of a string causes the method to
372 perform some action (see MockHandler.handle()), eg:
373
374 [["http_error"], [("http_open", "return request")]]
375
376 defines .http_error() on one handler (which simply returns None), and
377 .http_open() on another handler, which returns a Request object.
378
379 """
380 handlers = []
381 count = 0
382 for meths in meth_spec:
383 class MockHandlerSubclass(MockHandler): pass
384 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000385 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000386 h.add_parent(opener)
387 count = count + 1
388 handlers.append(h)
389 opener.add_handler(h)
390 return handlers
391
Thomas Wouters477c8d52006-05-27 19:21:47 +0000392def build_test_opener(*handler_instances):
393 opener = OpenerDirector()
394 for h in handler_instances:
395 opener.add_handler(h)
396 return opener
397
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000399 # useful for testing redirections and auth
400 # sends supplied headers and code as first response
401 # sends 200 OK as second response
402 def __init__(self, code, headers):
403 self.code = code
404 self.headers = headers
405 self.reset()
406 def reset(self):
407 self._count = 0
408 self.requests = []
409 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000410 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000411 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000412 self.requests.append(copy.deepcopy(req))
413 if self._count == 0:
414 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000415 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000416 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000417 return self.parent.error(
418 "http", req, MockFile(), self.code, name, msg)
419 else:
420 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000421 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000422 return MockResponse(200, "OK", msg, "", req.get_full_url())
423
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +0000424class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
425 # Useful for testing the Proxy-Authorization request by verifying the
426 # properties of httpcon
427 httpconn = MockHTTPClass()
428 def https_open(self, req):
429 return self.do_open(self.httpconn, req)
430
Thomas Wouters477c8d52006-05-27 19:21:47 +0000431class MockPasswordManager:
432 def add_password(self, realm, uri, user, password):
433 self.realm = realm
434 self.url = uri
435 self.user = user
436 self.password = password
437 def find_user_password(self, realm, authuri):
438 self.target_realm = realm
439 self.target_url = authuri
440 return self.user, self.password
441
442
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000443class OpenerDirectorTests(unittest.TestCase):
444
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000445 def test_add_non_handler(self):
446 class NonHandler(object):
447 pass
448 self.assertRaises(TypeError,
449 OpenerDirector().add_handler, NonHandler())
450
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000451 def test_badly_named_methods(self):
452 # test work-around for three methods that accidentally follow the
453 # naming conventions for handler methods
454 # (*_open() / *_request() / *_response())
455
456 # These used to call the accidentally-named methods, causing a
457 # TypeError in real code; here, returning self from these mock
458 # methods would either cause no exception, or AttributeError.
459
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000460 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000461
462 o = OpenerDirector()
463 meth_spec = [
464 [("do_open", "return self"), ("proxy_open", "return self")],
465 [("redirect_request", "return self")],
466 ]
467 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000468 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000469 for scheme in "do", "proxy", "redirect":
470 self.assertRaises(URLError, o.open, scheme+"://example.com/")
471
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000472 def test_handled(self):
473 # handler returning non-None means no more handlers will be called
474 o = OpenerDirector()
475 meth_spec = [
476 ["http_open", "ftp_open", "http_error_302"],
477 ["ftp_open"],
478 [("http_open", "return self")],
479 [("http_open", "return self")],
480 ]
481 handlers = add_ordered_mock_handlers(o, meth_spec)
482
483 req = Request("http://example.com/")
484 r = o.open(req)
485 # Second .http_open() gets called, third doesn't, since second returned
486 # non-None. Handlers without .http_open() never get any methods called
487 # on them.
488 # In fact, second mock handler defining .http_open() returns self
489 # (instead of response), which becomes the OpenerDirector's return
490 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000491 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000492 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
493 for expected, got in zip(calls, o.calls):
494 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000495 self.assertEqual((handler, name), expected)
496 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000497
498 def test_handler_order(self):
499 o = OpenerDirector()
500 handlers = []
501 for meths, handler_order in [
502 ([("http_open", "return self")], 500),
503 (["http_open"], 0),
504 ]:
505 class MockHandlerSubclass(MockHandler): pass
506 h = MockHandlerSubclass(meths)
507 h.handler_order = handler_order
508 handlers.append(h)
509 o.add_handler(h)
510
511 r = o.open("http://example.com/")
512 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000513 self.assertEqual(o.calls[0][0], handlers[1])
514 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000515
516 def test_raise(self):
517 # raising URLError stops processing of request
518 o = OpenerDirector()
519 meth_spec = [
520 [("http_open", "raise")],
521 [("http_open", "return self")],
522 ]
523 handlers = add_ordered_mock_handlers(o, meth_spec)
524
525 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000526 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000527 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000528
529## def test_error(self):
530## # XXX this doesn't actually seem to be used in standard library,
531## # but should really be tested anyway...
532
533 def test_http_error(self):
534 # XXX http_error_default
535 # http errors are a special case
536 o = OpenerDirector()
537 meth_spec = [
538 [("http_open", "error 302")],
539 [("http_error_400", "raise"), "http_open"],
540 [("http_error_302", "return response"), "http_error_303",
541 "http_error"],
542 [("http_error_302")],
543 ]
544 handlers = add_ordered_mock_handlers(o, meth_spec)
545
546 class Unknown:
547 def __eq__(self, other): return True
548
549 req = Request("http://example.com/")
550 r = o.open(req)
551 assert len(o.calls) == 2
552 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000553 (handlers[2], "http_error_302",
554 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000555 for expected, got in zip(calls, o.calls):
556 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000557 self.assertEqual((handler, method_name), got[:2])
558 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000559
560 def test_processors(self):
561 # *_request / *_response methods get called appropriately
562 o = OpenerDirector()
563 meth_spec = [
564 [("http_request", "return request"),
565 ("http_response", "return response")],
566 [("http_request", "return request"),
567 ("http_response", "return response")],
568 ]
569 handlers = add_ordered_mock_handlers(o, meth_spec)
570
571 req = Request("http://example.com/")
572 r = o.open(req)
573 # processor methods are called on *all* handlers that define them,
574 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000575 calls = [
576 (handlers[0], "http_request"), (handlers[1], "http_request"),
577 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000578
579 for i, (handler, name, args, kwds) in enumerate(o.calls):
580 if i < 2:
581 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000582 self.assertEqual((handler, name), calls[i])
583 self.assertEqual(len(args), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +0000584 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000585 else:
586 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000587 self.assertEqual((handler, name), calls[i])
588 self.assertEqual(len(args), 2)
Georg Brandlab91fde2009-08-13 08:51:18 +0000589 self.assertTrue(isinstance(args[0], Request))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000590 # response from opener.open is None, because there's no
591 # handler that defines http_open to handle it
Georg Brandlab91fde2009-08-13 08:51:18 +0000592 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000593 isinstance(args[1], MockResponse))
594
595
Tim Peters58eb11c2004-01-18 20:29:55 +0000596def sanepathname2url(path):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000597 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000598 if os.name == "nt" and urlpath.startswith("///"):
599 urlpath = urlpath[2:]
600 # XXX don't ask me about the mac...
601 return urlpath
602
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000603class HandlerTests(unittest.TestCase):
604
605 def test_ftp(self):
606 class MockFTPWrapper:
607 def __init__(self, data): self.data = data
608 def retrfile(self, filename, filetype):
609 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000610 return io.StringIO(self.data), len(self.data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000611
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000612 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000613 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000614 def connect_ftp(self, user, passwd, host, port, dirs,
615 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616 self.user, self.passwd = user, passwd
617 self.host, self.port = host, port
618 self.dirs = dirs
619 self.ftpwrapper = MockFTPWrapper(self.data)
620 return self.ftpwrapper
621
Georg Brandlf78e02b2008-06-10 17:40:04 +0000622 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623 data = "rheum rhaponicum"
624 h = NullFTPHandler(data)
625 o = h.parent = MockOpener()
626
627 for url, host, port, type_, dirs, filename, mimetype in [
628 ("ftp://localhost/foo/bar/baz.html",
629 "localhost", ftplib.FTP_PORT, "I",
630 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000631 ("ftp://localhost:80/foo/bar/",
632 "localhost", 80, "D",
633 ["foo", "bar"], "", None),
634 ("ftp://localhost/baz.gif;type=a",
635 "localhost", ftplib.FTP_PORT, "A",
636 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000637 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000638 req = Request(url)
639 req.timeout = None
640 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000641 # ftp authentication not yet implemented by FTPHandler
Georg Brandlab91fde2009-08-13 08:51:18 +0000642 self.assertTrue(h.user == h.passwd == "")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000643 self.assertEqual(h.host, socket.gethostbyname(host))
644 self.assertEqual(h.port, port)
645 self.assertEqual(h.dirs, dirs)
646 self.assertEqual(h.ftpwrapper.filename, filename)
647 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000648 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000649 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000650 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000651
652 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000653 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000655 o = h.parent = MockOpener()
656
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000657 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000658 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000659 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000660 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000661 "file://localhost%s" % urlpath,
662 "file://%s" % urlpath,
663 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000664 ]
665 try:
666 localaddr = socket.gethostbyname(socket.gethostname())
667 except socket.gaierror:
668 localaddr = ''
669 if localaddr:
670 urls.append("file://%s%s" % (localaddr, urlpath))
671
672 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000673 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674 try:
675 try:
676 f.write(towrite)
677 finally:
678 f.close()
679
680 r = h.file_open(Request(url))
681 try:
682 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000683 headers = r.info()
684 newurl = r.geturl()
685 finally:
686 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000687 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000688 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000689 finally:
690 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000691 self.assertEqual(data, towrite)
692 self.assertEqual(headers["Content-type"], "text/plain")
693 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000694 self.assertEqual(headers["Last-modified"], modified)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000695
696 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000697 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698 "file:///file_does_not_exist.txt",
699 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
700 os.getcwd(), TESTFN),
701 "file://somerandomhost.ontheinternet.com%s/%s" %
702 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000703 ]:
704 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000705 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000706 try:
707 f.write(towrite)
708 finally:
709 f.close()
710
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000711 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000712 h.file_open, Request(url))
713 finally:
714 os.remove(TESTFN)
715
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000716 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000717 o = h.parent = MockOpener()
718 # XXXX why does // mean ftp (and /// mean not ftp!), and where
719 # is file: scheme specified? I think this is really a bug, and
720 # what was intended was to distinguish between URLs like:
721 # file:/blah.txt (a file)
722 # file://localhost/blah.txt (a file)
723 # file:///blah.txt (a file)
724 # file://ftp.example.com/blah.txt (an ftp URL)
725 for url, ftp in [
726 ("file://ftp.example.com//foo.txt", True),
727 ("file://ftp.example.com///foo.txt", False),
728# XXXX bug: fails with OSError, should be URLError
729 ("file://ftp.example.com/foo.txt", False),
730 ]:
731 req = Request(url)
732 try:
733 h.file_open(req)
734 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000735 except (urllib.error.URLError, OSError):
Georg Brandlab91fde2009-08-13 08:51:18 +0000736 self.assertTrue(not ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000737 else:
Georg Brandlab91fde2009-08-13 08:51:18 +0000738 self.assertTrue(o.req is req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000739 self.assertEqual(req.type, "ftp")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000740
741 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000742
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000743 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000744 o = h.parent = MockOpener()
745
746 url = "http://example.com/"
747 for method, data in [("GET", None), ("POST", "blah")]:
748 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000749 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000750 req.add_unredirected_header("Spam", "eggs")
751 http = MockHTTPClass()
752 r = h.do_open(http, req)
753
754 # result attributes
755 r.read; r.readline # wrapped MockFile methods
756 r.info; r.geturl # addinfourl methods
757 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
758 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000759 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000760 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000761
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000762 self.assertEqual(http.host, "example.com")
763 self.assertEqual(http.level, 0)
764 self.assertEqual(http.method, method)
765 self.assertEqual(http.selector, "/")
766 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000767 [("Connection", "close"),
768 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000769 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000770
771 # check socket.error converted to URLError
772 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000773 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000774
775 # check adding of standard headers
776 o.addheaders = [("Spam", "eggs")]
777 for data in "", None: # POST, GET
778 req = Request("http://example.com/", data)
779 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000780 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000781 if data is None: # GET
Georg Brandlab91fde2009-08-13 08:51:18 +0000782 self.assertTrue("Content-length" not in req.unredirected_hdrs)
783 self.assertTrue("Content-type" not in req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000784 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000785 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
786 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000787 "application/x-www-form-urlencoded")
788 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000789 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
790 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000791
792 # don't clobber existing headers
793 req.add_unredirected_header("Content-length", "foo")
794 req.add_unredirected_header("Content-type", "bar")
795 req.add_unredirected_header("Host", "baz")
796 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000797 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000798 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
799 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000800 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
801 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000802
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000803 def test_http_doubleslash(self):
804 # Checks the presence of any unnecessary double slash in url does not
805 # break anything. Previously, a double slash directly after the host
806 # could could cause incorrect parsing.
807 h = urllib.request.AbstractHTTPHandler()
808 o = h.parent = MockOpener()
809
810 data = ""
811 ds_urls = [
812 "http://example.com/foo/bar/baz.html",
813 "http://example.com//foo/bar/baz.html",
814 "http://example.com/foo//bar/baz.html",
815 "http://example.com/foo/bar//baz.html"
816 ]
817
818 for ds_url in ds_urls:
819 ds_req = Request(ds_url, data)
820
821 # Check whether host is determined correctly if there is no proxy
822 np_ds_req = h.do_request_(ds_req)
823 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
824
825 # Check whether host is determined correctly if there is a proxy
826 ds_req.set_proxy("someproxy:3128",None)
827 p_ds_req = h.do_request_(ds_req)
828 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
829
830
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000831 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000832 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000833 o = h.parent = MockOpener()
834
835 url = "http://example.com/"
836 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000838 r = MockResponse(200, "OK", {}, "", url)
839 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000840 self.assertTrue(r is newr)
841 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842 r = MockResponse(202, "Accepted", {}, "", url)
843 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000844 self.assertTrue(r is newr)
845 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 r = MockResponse(206, "Partial content", {}, "", url)
847 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000848 self.assertTrue(r is newr)
849 self.assertTrue(not hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000850 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851 r = MockResponse(502, "Bad gateway", {}, "", url)
Georg Brandlab91fde2009-08-13 08:51:18 +0000852 self.assertTrue(h.http_response(req, r) is None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000853 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000855
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000856 def test_cookies(self):
857 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000858 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000859 o = h.parent = MockOpener()
860
861 req = Request("http://example.com/")
862 r = MockResponse(200, "OK", {}, "")
863 newreq = h.http_request(req)
Georg Brandlab91fde2009-08-13 08:51:18 +0000864 self.assertTrue(cj.ach_req is req is newreq)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000865 self.assertEquals(req.get_origin_req_host(), "example.com")
Georg Brandlab91fde2009-08-13 08:51:18 +0000866 self.assertTrue(not req.is_unverifiable())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000867 newr = h.http_response(req, r)
Georg Brandlab91fde2009-08-13 08:51:18 +0000868 self.assertTrue(cj.ec_req is req)
869 self.assertTrue(cj.ec_r is r is newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000870
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000871 def test_redirect(self):
872 from_url = "http://example.com/a.html"
873 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000874 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000875 o = h.parent = MockOpener()
876
877 # ordinary redirect behaviour
878 for code in 301, 302, 303, 307:
879 for data in None, "blah\nblah\n":
880 method = getattr(h, "http_error_%s" % code)
881 req = Request(from_url, data)
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000882 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000883 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +0000884 if data is not None:
885 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000886 req.add_unredirected_header("Spam", "spam")
887 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000888 method(req, MockFile(), code, "Blah",
889 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000890 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000891 # 307 in response to POST requires user OK
Georg Brandlab91fde2009-08-13 08:51:18 +0000892 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000893 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000894 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000895 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000896 except AttributeError:
Georg Brandlab91fde2009-08-13 08:51:18 +0000897 self.assertTrue(not o.req.has_data())
Christian Heimes77c02eb2008-02-09 02:18:51 +0000898
899 # now it's a GET, there should not be headers regarding content
900 # (possibly dragged from before being a POST)
901 headers = [x.lower() for x in o.req.headers]
902 self.assertTrue("content-length" not in headers)
903 self.assertTrue("content-type" not in headers)
904
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000905 self.assertEqual(o.req.headers["Nonsense"],
906 "viking=withhold")
Georg Brandlab91fde2009-08-13 08:51:18 +0000907 self.assertTrue("Spam" not in o.req.headers)
908 self.assertTrue("Spam" not in o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000909
910 # loop detection
911 req = Request(from_url)
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000912 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000913 def redirect(h, req, url=to_url):
914 h.http_error_302(req, MockFile(), 302, "Blah",
915 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000916 # Note that the *original* request shares the same record of
917 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000918
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000919 # detect infinite loop redirect of a URL to itself
920 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000921 count = 0
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000922 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000923 try:
924 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000925 redirect(h, req, "http://example.com/")
926 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000927 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000928 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000929 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000930
931 # detect endless non-repeating chain of redirects
932 req = Request(from_url, origin_req_host="example.com")
933 count = 0
Senthil Kumarane9da06f2009-07-19 04:20:12 +0000934 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000935 try:
936 while 1:
937 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000938 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000939 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000940 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000941 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000942
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000943 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000944 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +0000945 from http.cookiejar import CookieJar
946 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000947
948 cj = CookieJar()
949 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000950 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000951 hdeh = urllib.request.HTTPDefaultErrorHandler()
952 hrh = urllib.request.HTTPRedirectHandler()
953 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000954 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000955 o.open("http://www.example.com/")
Georg Brandlab91fde2009-08-13 08:51:18 +0000956 self.assertTrue(not hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000957
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000958 def test_proxy(self):
959 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000960 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000961 o.add_handler(ph)
962 meth_spec = [
963 [("http_open", "return response")]
964 ]
965 handlers = add_ordered_mock_handlers(o, meth_spec)
966
967 req = Request("http://acme.example.com/")
968 self.assertEqual(req.get_host(), "acme.example.com")
969 r = o.open(req)
970 self.assertEqual(req.get_host(), "proxy.example.com:3128")
971
972 self.assertEqual([(handlers[0], "http_open")],
973 [tup[0:2] for tup in o.calls])
974
Senthil Kumaran11301632009-10-11 06:07:46 +0000975 def test_proxy_no_proxy(self):
976 os.environ['no_proxy'] = 'python.org'
977 o = OpenerDirector()
978 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
979 o.add_handler(ph)
980 req = Request("http://www.perl.org/")
981 self.assertEqual(req.get_host(), "www.perl.org")
982 r = o.open(req)
983 self.assertEqual(req.get_host(), "proxy.example.com")
984 req = Request("http://www.python.org")
985 self.assertEqual(req.get_host(), "www.python.org")
986 r = o.open(req)
987 self.assertEqual(req.get_host(), "www.python.org")
988 del os.environ['no_proxy']
989
990
Senthil Kumaran0ac1f832009-07-26 12:39:47 +0000991 def test_proxy_https(self):
992 o = OpenerDirector()
993 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
994 o.add_handler(ph)
995 meth_spec = [
996 [("https_open", "return response")]
997 ]
998 handlers = add_ordered_mock_handlers(o, meth_spec)
999
1000 req = Request("https://www.example.com/")
1001 self.assertEqual(req.get_host(), "www.example.com")
1002 r = o.open(req)
1003 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1004 self.assertEqual([(handlers[0], "https_open")],
1005 [tup[0:2] for tup in o.calls])
1006
Senthil Kumaran4b9fbeb2009-12-20 07:18:22 +00001007 def test_proxy_https_proxy_authorization(self):
1008 o = OpenerDirector()
1009 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1010 o.add_handler(ph)
1011 https_handler = MockHTTPSHandler()
1012 o.add_handler(https_handler)
1013 req = Request("https://www.example.com/")
1014 req.add_header("Proxy-Authorization","FooBar")
1015 req.add_header("User-Agent","Grail")
1016 self.assertEqual(req.get_host(), "www.example.com")
1017 self.assertIsNone(req._tunnel_host)
1018 r = o.open(req)
1019 # Verify Proxy-Authorization gets tunneled to request.
1020 # httpsconn req_headers do not have the Proxy-Authorization header but
1021 # the req will have.
1022 self.assertFalse(("Proxy-Authorization","FooBar") in
1023 https_handler.httpconn.req_headers)
1024 self.assertTrue(("User-Agent","Grail") in
1025 https_handler.httpconn.req_headers)
1026 self.assertIsNotNone(req._tunnel_host)
1027 self.assertEqual(req.get_host(), "proxy.example.com:3128")
1028 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran0ac1f832009-07-26 12:39:47 +00001029
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001030 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001031 opener = OpenerDirector()
1032 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001033 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001034 realm = "ACME Widget Store"
1035 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001036 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1037 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001038 opener.add_handler(auth_handler)
1039 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 self._test_basic_auth(opener, auth_handler, "Authorization",
1041 realm, http_handler, password_manager,
1042 "http://acme.example.com/protected",
1043 "http://acme.example.com/protected",
1044 )
1045
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001046 def test_basic_auth_with_single_quoted_realm(self):
1047 self.test_basic_auth(quote_char="'")
1048
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049 def test_proxy_basic_auth(self):
1050 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001051 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001052 opener.add_handler(ph)
1053 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001054 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055 realm = "ACME Networks"
1056 http_handler = MockHTTPHandler(
1057 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001058 opener.add_handler(auth_handler)
1059 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001060 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001061 realm, http_handler, password_manager,
1062 "http://acme.example.com:3128/protected",
1063 "proxy.example.com:3128",
1064 )
1065
1066 def test_basic_and_digest_auth_handlers(self):
1067 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1068 # response (http://python.org/sf/1479302), where it should instead
1069 # return None to allow another handler (especially
1070 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001071
1072 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1073 # try digest first (since it's the strongest auth scheme), so we record
1074 # order of calls here to check digest comes first:
1075 class RecordingOpenerDirector(OpenerDirector):
1076 def __init__(self):
1077 OpenerDirector.__init__(self)
1078 self.recorded = []
1079 def record(self, info):
1080 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001081 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001082 def http_error_401(self, *args, **kwds):
1083 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001084 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001085 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001086 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001087 def http_error_401(self, *args, **kwds):
1088 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001089 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001090 *args, **kwds)
1091
1092 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001093 password_manager = MockPasswordManager()
1094 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001095 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001096 realm = "ACME Networks"
1097 http_handler = MockHTTPHandler(
1098 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001099 opener.add_handler(basic_handler)
1100 opener.add_handler(digest_handler)
1101 opener.add_handler(http_handler)
1102
1103 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001104 self._test_basic_auth(opener, basic_handler, "Authorization",
1105 realm, http_handler, password_manager,
1106 "http://acme.example.com/protected",
1107 "http://acme.example.com/protected",
1108 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001109 # check digest was tried before basic (twice, because
1110 # _test_basic_auth called .open() twice)
1111 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001112
1113 def _test_basic_auth(self, opener, auth_handler, auth_header,
1114 realm, http_handler, password_manager,
1115 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001116 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001117 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001118
1119 # .add_password() fed through to password manager
1120 auth_handler.add_password(realm, request_url, user, password)
1121 self.assertEqual(realm, password_manager.realm)
1122 self.assertEqual(request_url, password_manager.url)
1123 self.assertEqual(user, password_manager.user)
1124 self.assertEqual(password, password_manager.password)
1125
1126 r = opener.open(request_url)
1127
1128 # should have asked the password manager for the username/password
1129 self.assertEqual(password_manager.target_realm, realm)
1130 self.assertEqual(password_manager.target_url, protected_url)
1131
1132 # expect one request without authorization, then one with
1133 self.assertEqual(len(http_handler.requests), 2)
1134 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001135 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001136 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001137 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001138 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1139 auth_hdr_value)
1140
1141 # if the password manager can't find a password, the handler won't
1142 # handle the HTTP auth error
1143 password_manager.user = password_manager.password = None
1144 http_handler.reset()
1145 r = opener.open(request_url)
1146 self.assertEqual(len(http_handler.requests), 1)
1147 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1148
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001149
1150class MiscTests(unittest.TestCase):
1151
1152 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001153 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1154 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001155 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001156 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001157 def bar_open(self): pass
1158
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001159 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001160
1161 o = build_opener(FooHandler, BarHandler)
1162 self.opener_has_handler(o, FooHandler)
1163 self.opener_has_handler(o, BarHandler)
1164
1165 # can take a mix of classes and instances
1166 o = build_opener(FooHandler, BarHandler())
1167 self.opener_has_handler(o, FooHandler)
1168 self.opener_has_handler(o, BarHandler)
1169
1170 # subclasses of default handlers override default handlers
1171 o = build_opener(MyHTTPHandler)
1172 self.opener_has_handler(o, MyHTTPHandler)
1173
1174 # a particular case of overriding: default handlers can be passed
1175 # in explicitly
1176 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001177 self.opener_has_handler(o, urllib.request.HTTPHandler)
1178 o = build_opener(urllib.request.HTTPHandler)
1179 self.opener_has_handler(o, urllib.request.HTTPHandler)
1180 o = build_opener(urllib.request.HTTPHandler())
1181 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001182
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001183 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001184 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001185 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1186 self.opener_has_handler(o, MyHTTPHandler)
1187 self.opener_has_handler(o, MyOtherHTTPHandler)
1188
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001189 def opener_has_handler(self, opener, handler_class):
1190 for h in opener.handlers:
1191 if h.__class__ == handler_class:
1192 break
1193 else:
Georg Brandlab91fde2009-08-13 08:51:18 +00001194 self.assertTrue(False)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001195
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001196class RequestTests(unittest.TestCase):
1197
1198 def setUp(self):
1199 self.get = Request("http://www.python.org/~jeremy/")
1200 self.post = Request("http://www.python.org/~jeremy/",
1201 "data",
1202 headers={"X-Test": "test"})
1203
1204 def test_method(self):
1205 self.assertEqual("POST", self.post.get_method())
1206 self.assertEqual("GET", self.get.get_method())
1207
1208 def test_add_data(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001209 self.assertTrue(not self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001210 self.assertEqual("GET", self.get.get_method())
1211 self.get.add_data("spam")
Georg Brandlab91fde2009-08-13 08:51:18 +00001212 self.assertTrue(self.get.has_data())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001213 self.assertEqual("POST", self.get.get_method())
1214
1215 def test_get_full_url(self):
1216 self.assertEqual("http://www.python.org/~jeremy/",
1217 self.get.get_full_url())
1218
1219 def test_selector(self):
1220 self.assertEqual("/~jeremy/", self.get.get_selector())
1221 req = Request("http://www.python.org/")
1222 self.assertEqual("/", req.get_selector())
1223
1224 def test_get_type(self):
1225 self.assertEqual("http", self.get.get_type())
1226
1227 def test_get_host(self):
1228 self.assertEqual("www.python.org", self.get.get_host())
1229
1230 def test_get_host_unquote(self):
1231 req = Request("http://www.%70ython.org/")
1232 self.assertEqual("www.python.org", req.get_host())
1233
1234 def test_proxy(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001235 self.assertTrue(not self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001236 self.get.set_proxy("www.perl.org", "http")
Georg Brandlab91fde2009-08-13 08:51:18 +00001237 self.assertTrue(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001238 self.assertEqual("www.python.org", self.get.get_origin_req_host())
1239 self.assertEqual("www.perl.org", self.get.get_host())
1240
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001241
1242def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001243 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001244 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001245 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001246 tests = (TrivialTests,
1247 OpenerDirectorTests,
1248 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001249 MiscTests,
1250 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001251 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001252
1253if __name__ == "__main__":
1254 test_main(verbose=True)