blob: f296aa49e65d839e2e7faf065af705763de77149 [file] [log] [blame]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00003
Christian Heimes05e8be12008-02-23 18:30:17 +00004import os
Guido van Rossum34d19282007-08-09 01:03:29 +00005import io
Georg Brandlf78e02b2008-06-10 17:40:04 +00006import socket
Senthil Kumaran7bc0d872010-12-19 10:49:52 +00007import array
Senthil Kumaran4de00a22011-05-11 21:17:57 +08008import sys
Jeremy Hyltone3e61042001-05-09 15:50:25 +00009
Jeremy Hylton1afc1692008-06-18 20:49:58 +000010import urllib.request
Ronald Oussorene72e1612011-03-14 18:15:25 -040011# The proxy bypass method imported below has logic specific to the OSX
12# proxy config data structure but is testable on all platforms.
13from urllib.request import Request, OpenerDirector, _proxy_bypass_macosx_sysconf
guido@google.coma119df92011-03-29 11:41:02 -070014import urllib.error
Jeremy Hyltone3e61042001-05-09 15:50:25 +000015
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000016# XXX
17# Request
18# CacheFTPHandler (hard to write)
Thomas Wouters477c8d52006-05-27 19:21:47 +000019# parse_keqv_list, parse_http_list, HTTPDigestAuthHandler
Jeremy Hyltone3e61042001-05-09 15:50:25 +000020
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000021class TrivialTests(unittest.TestCase):
Senthil Kumaran6c5bd402011-11-01 23:20:31 +080022
23 def test___all__(self):
24 # Verify which names are exposed
25 for module in 'request', 'response', 'parse', 'error', 'robotparser':
26 context = {}
27 exec('from urllib.%s import *' % module, context)
28 del context['__builtins__']
Florent Xicluna3dbb1f12011-11-04 22:15:37 +010029 if module == 'request' and os.name == 'nt':
30 u, p = context.pop('url2pathname'), context.pop('pathname2url')
31 self.assertEqual(u.__module__, 'nturl2path')
32 self.assertEqual(p.__module__, 'nturl2path')
Senthil Kumaran6c5bd402011-11-01 23:20:31 +080033 for k, v in context.items():
34 self.assertEqual(v.__module__, 'urllib.%s' % module,
35 "%r is exposed in 'urllib.%s' but defined in %r" %
36 (k, module, v.__module__))
37
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000038 def test_trivial(self):
39 # A couple trivial tests
Guido van Rossume2ae77b2001-10-24 20:42:55 +000040
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
Tim Peters861adac2001-07-16 20:49:49 +000042
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000043 # XXX Name hacking to get this to work on Windows.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
Senthil Kumarand587e302010-01-10 17:45:52 +000045
Senthil Kumarand587e302010-01-10 17:45:52 +000046 if os.name == 'nt':
47 file_url = "file:///%s" % fname
48 else:
49 file_url = "file://%s" % fname
50
Jeremy Hylton1afc1692008-06-18 20:49:58 +000051 f = urllib.request.urlopen(file_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000052
53 buf = f.read()
54 f.close()
Tim Petersf5f32b42005-07-17 23:16:17 +000055
Georg Brandle1b13d22005-08-24 22:20:32 +000056 def test_parse_http_list(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 tests = [
58 ('a,b,c', ['a', 'b', 'c']),
59 ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
60 ('a, b, "c", "d", "e,f", g, h',
61 ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
62 ('a="b\\"c", d="e\\,f", g="h\\\\i"',
63 ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
Georg Brandle1b13d22005-08-24 22:20:32 +000064 for string, list in tests:
Florent Xicluna419e3842010-08-08 16:16:07 +000065 self.assertEqual(urllib.request.parse_http_list(string), list)
Georg Brandle1b13d22005-08-24 22:20:32 +000066
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +000067
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000068def test_request_headers_dict():
69 """
70 The Request.headers dictionary is not a documented interface. It should
71 stay that way, because the complete set of headers are only accessible
72 through the .get_header(), .has_header(), .header_items() interface.
73 However, .headers pre-dates those methods, and so real code will be using
74 the dictionary.
75
76 The introduction in 2.4 of those methods was a mistake for the same reason:
77 code that previously saw all (urllib2 user)-provided headers in .headers
78 now sees only a subset (and the function interface is ugly and incomplete).
79 A better change would have been to replace .headers dict with a dict
80 subclass (or UserDict.DictMixin instance?) that preserved the .headers
81 interface and also provided access to the "unredirected" headers. It's
82 probably too late to fix that, though.
83
84
85 Check .capitalize() case normalization:
86
87 >>> url = "http://example.com"
88 >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
89 'blah'
90 >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
91 'blah'
92
93 Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
94 but that could be changed in future.
95
96 """
97
98def test_request_headers_methods():
99 """
100 Note the case normalization of header names here, to .capitalize()-case.
101 This should be preserved for backwards-compatibility. (In the HTTP case,
102 normalization to .title()-case is done by urllib2 before sending headers to
Georg Brandl24420152008-05-26 16:32:26 +0000103 http.client).
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000104
105 >>> url = "http://example.com"
106 >>> r = Request(url, headers={"Spam-eggs": "blah"})
107 >>> r.has_header("Spam-eggs")
108 True
109 >>> r.header_items()
110 [('Spam-eggs', 'blah')]
111 >>> r.add_header("Foo-Bar", "baz")
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000112 >>> items = sorted(r.header_items())
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000113 >>> items
114 [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
115
116 Note that e.g. r.has_header("spam-EggS") is currently False, and
117 r.get_header("spam-EggS") returns None, but that could be changed in
118 future.
119
120 >>> r.has_header("Not-there")
121 False
Guido van Rossum7131f842007-02-09 20:13:25 +0000122 >>> print(r.get_header("Not-there"))
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000123 None
124 >>> r.get_header("Not-there", "default")
125 'default'
126
Andrew Svetlovbff98fe2012-11-27 23:06:19 +0200127 Method r.remove_header should remove items both from r.headers and
128 r.unredirected_hdrs dictionaries
129
130 >>> r.remove_header("Spam-eggs")
131 >>> r.has_header("Spam-eggs")
132 False
133 >>> r.add_unredirected_header("Unredirected-spam", "Eggs")
134 >>> r.has_header("Unredirected-spam")
135 True
136 >>> r.remove_header("Unredirected-spam")
137 >>> r.has_header("Unredirected-spam")
138 False
139
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000140 """
141
142
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143def test_password_manager(self):
144 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000146 >>> add = mgr.add_password
147 >>> add("Some Realm", "http://example.com/", "joe", "password")
148 >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
149 >>> add("c", "http://example.com/foo", "foo", "ni")
150 >>> add("c", "http://example.com/bar", "bar", "nini")
151 >>> add("b", "http://example.com/", "first", "blah")
152 >>> add("b", "http://example.com/", "second", "spam")
153 >>> add("a", "http://example.com", "1", "a")
154 >>> add("Some Realm", "http://c.example.com:3128", "3", "c")
155 >>> add("Some Realm", "d.example.com", "4", "d")
156 >>> add("Some Realm", "e.example.com:3128", "5", "e")
157
158 >>> mgr.find_user_password("Some Realm", "example.com")
159 ('joe', 'password')
160 >>> mgr.find_user_password("Some Realm", "http://example.com")
161 ('joe', 'password')
162 >>> mgr.find_user_password("Some Realm", "http://example.com/")
163 ('joe', 'password')
164 >>> mgr.find_user_password("Some Realm", "http://example.com/spam")
165 ('joe', 'password')
166 >>> mgr.find_user_password("Some Realm", "http://example.com/spam/spam")
167 ('joe', 'password')
168 >>> mgr.find_user_password("c", "http://example.com/foo")
169 ('foo', 'ni')
170 >>> mgr.find_user_password("c", "http://example.com/bar")
171 ('bar', 'nini')
172
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000173 Actually, this is really undefined ATM
174## Currently, we use the highest-level path where more than one match:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000175
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000176## >>> mgr.find_user_password("Some Realm", "http://example.com/ni")
177## ('joe', 'password')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000178
179 Use latest add_password() in case of conflict:
180
181 >>> mgr.find_user_password("b", "http://example.com/")
182 ('second', 'spam')
183
184 No special relationship between a.example.com and example.com:
185
186 >>> mgr.find_user_password("a", "http://example.com/")
187 ('1', 'a')
188 >>> mgr.find_user_password("a", "http://a.example.com/")
189 (None, None)
190
191 Ports:
192
193 >>> mgr.find_user_password("Some Realm", "c.example.com")
194 (None, None)
195 >>> mgr.find_user_password("Some Realm", "c.example.com:3128")
196 ('3', 'c')
197 >>> mgr.find_user_password("Some Realm", "http://c.example.com:3128")
198 ('3', 'c')
199 >>> mgr.find_user_password("Some Realm", "d.example.com")
200 ('4', 'd')
201 >>> mgr.find_user_password("Some Realm", "e.example.com:3128")
202 ('5', 'e')
203
204 """
205 pass
206
207
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000208def test_password_manager_default_port(self):
209 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000210 >>> mgr = urllib.request.HTTPPasswordMgr()
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000211 >>> add = mgr.add_password
212
213 The point to note here is that we can't guess the default port if there's
214 no scheme. This applies to both add_password and find_user_password.
215
216 >>> add("f", "http://g.example.com:80", "10", "j")
217 >>> add("g", "http://h.example.com", "11", "k")
218 >>> add("h", "i.example.com:80", "12", "l")
219 >>> add("i", "j.example.com", "13", "m")
220 >>> mgr.find_user_password("f", "g.example.com:100")
221 (None, None)
222 >>> mgr.find_user_password("f", "g.example.com:80")
223 ('10', 'j')
224 >>> mgr.find_user_password("f", "g.example.com")
225 (None, None)
226 >>> mgr.find_user_password("f", "http://g.example.com:100")
227 (None, None)
228 >>> mgr.find_user_password("f", "http://g.example.com:80")
229 ('10', 'j')
230 >>> mgr.find_user_password("f", "http://g.example.com")
231 ('10', 'j')
232 >>> mgr.find_user_password("g", "h.example.com")
233 ('11', 'k')
234 >>> mgr.find_user_password("g", "h.example.com:80")
235 ('11', 'k')
236 >>> mgr.find_user_password("g", "http://h.example.com:80")
237 ('11', 'k')
238 >>> mgr.find_user_password("h", "i.example.com")
239 (None, None)
240 >>> mgr.find_user_password("h", "i.example.com:80")
241 ('12', 'l')
242 >>> mgr.find_user_password("h", "http://i.example.com:80")
243 ('12', 'l')
244 >>> mgr.find_user_password("i", "j.example.com")
245 ('13', 'm')
246 >>> mgr.find_user_password("i", "j.example.com:80")
247 (None, None)
248 >>> mgr.find_user_password("i", "http://j.example.com")
249 ('13', 'm')
250 >>> mgr.find_user_password("i", "http://j.example.com:80")
251 (None, None)
252
253 """
254
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000255class MockOpener:
256 addheaders = []
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +0000257 def open(self, req, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
258 self.req, self.data, self.timeout = req, data, timeout
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000259 def error(self, proto, *args):
260 self.proto, self.args = proto, args
261
262class MockFile:
263 def read(self, count=None): pass
264 def readline(self, count=None): pass
265 def close(self): pass
266
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000267class MockHeaders(dict):
268 def getheaders(self, name):
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000269 return list(self.values())
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000270
Guido van Rossum34d19282007-08-09 01:03:29 +0000271class MockResponse(io.StringIO):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000272 def __init__(self, code, msg, headers, data, url=None):
Guido van Rossum34d19282007-08-09 01:03:29 +0000273 io.StringIO.__init__(self, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000274 self.code, self.msg, self.headers, self.url = code, msg, headers, url
275 def info(self):
276 return self.headers
277 def geturl(self):
278 return self.url
279
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000280class MockCookieJar:
281 def add_cookie_header(self, request):
282 self.ach_req = request
283 def extract_cookies(self, response, request):
284 self.ec_req, self.ec_r = request, response
285
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000286class FakeMethod:
287 def __init__(self, meth_name, action, handle):
288 self.meth_name = meth_name
289 self.handle = handle
290 self.action = action
291 def __call__(self, *args):
292 return self.handle(self.meth_name, self.action, *args)
293
Senthil Kumaran47fff872009-12-20 07:10:31 +0000294class MockHTTPResponse(io.IOBase):
295 def __init__(self, fp, msg, status, reason):
296 self.fp = fp
297 self.msg = msg
298 self.status = status
299 self.reason = reason
300 self.code = 200
301
302 def read(self):
303 return ''
304
305 def info(self):
306 return {}
307
308 def geturl(self):
309 return self.url
310
311
312class MockHTTPClass:
313 def __init__(self):
314 self.level = 0
315 self.req_headers = []
316 self.data = None
317 self.raise_on_endheaders = False
Nadeem Vawdabd26b542012-10-21 17:37:43 +0200318 self.sock = None
Senthil Kumaran47fff872009-12-20 07:10:31 +0000319 self._tunnel_headers = {}
320
321 def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
322 self.host = host
323 self.timeout = timeout
324 return self
325
326 def set_debuglevel(self, level):
327 self.level = level
328
329 def set_tunnel(self, host, port=None, headers=None):
330 self._tunnel_host = host
331 self._tunnel_port = port
332 if headers:
333 self._tunnel_headers = headers
334 else:
335 self._tunnel_headers.clear()
336
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000337 def request(self, method, url, body=None, headers=None):
Senthil Kumaran47fff872009-12-20 07:10:31 +0000338 self.method = method
339 self.selector = url
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000340 if headers is not None:
341 self.req_headers += headers.items()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000342 self.req_headers.sort()
343 if body:
344 self.data = body
345 if self.raise_on_endheaders:
346 import socket
Andrew Svetlov0832af62012-12-18 23:10:48 +0200347 raise OSError()
Senthil Kumaran47fff872009-12-20 07:10:31 +0000348 def getresponse(self):
349 return MockHTTPResponse(MockFile(), {}, 200, "OK")
350
Victor Stinnera4c45d72011-06-17 14:01:18 +0200351 def close(self):
352 pass
353
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000354class MockHandler:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000355 # useful for testing handler machinery
356 # see add_ordered_mock_handlers() docstring
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000357 handler_order = 500
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000358 def __init__(self, methods):
359 self._define_methods(methods)
360 def _define_methods(self, methods):
361 for spec in methods:
362 if len(spec) == 2: name, action = spec
363 else: name, action = spec, None
364 meth = FakeMethod(name, action, self.handle)
365 setattr(self.__class__, name, meth)
366 def handle(self, fn_name, action, *args, **kwds):
367 self.parent.calls.append((self, fn_name, args, kwds))
368 if action is None:
369 return None
370 elif action == "return self":
371 return self
372 elif action == "return response":
373 res = MockResponse(200, "OK", {}, "")
374 return res
375 elif action == "return request":
376 return Request("http://blah/")
377 elif action.startswith("error"):
378 code = action[action.rfind(" ")+1:]
379 try:
380 code = int(code)
381 except ValueError:
382 pass
383 res = MockResponse(200, "OK", {}, "")
384 return self.parent.error("http", args[0], res, code, "", {})
385 elif action == "raise":
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000386 raise urllib.error.URLError("blah")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000387 assert False
388 def close(self): pass
389 def add_parent(self, parent):
390 self.parent = parent
391 self.parent.calls = []
392 def __lt__(self, other):
393 if not hasattr(other, "handler_order"):
394 # No handler_order, leave in original order. Yuck.
395 return True
396 return self.handler_order < other.handler_order
397
398def add_ordered_mock_handlers(opener, meth_spec):
399 """Create MockHandlers and add them to an OpenerDirector.
400
401 meth_spec: list of lists of tuples and strings defining methods to define
402 on handlers. eg:
403
404 [["http_error", "ftp_open"], ["http_open"]]
405
406 defines methods .http_error() and .ftp_open() on one handler, and
407 .http_open() on another. These methods just record their arguments and
408 return None. Using a tuple instead of a string causes the method to
409 perform some action (see MockHandler.handle()), eg:
410
411 [["http_error"], [("http_open", "return request")]]
412
413 defines .http_error() on one handler (which simply returns None), and
414 .http_open() on another handler, which returns a Request object.
415
416 """
417 handlers = []
418 count = 0
419 for meths in meth_spec:
420 class MockHandlerSubclass(MockHandler): pass
421 h = MockHandlerSubclass(meths)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000422 h.handler_order += count
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000423 h.add_parent(opener)
424 count = count + 1
425 handlers.append(h)
426 opener.add_handler(h)
427 return handlers
428
Thomas Wouters477c8d52006-05-27 19:21:47 +0000429def build_test_opener(*handler_instances):
430 opener = OpenerDirector()
431 for h in handler_instances:
432 opener.add_handler(h)
433 return opener
434
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000435class MockHTTPHandler(urllib.request.BaseHandler):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000436 # useful for testing redirections and auth
437 # sends supplied headers and code as first response
438 # sends 200 OK as second response
439 def __init__(self, code, headers):
440 self.code = code
441 self.headers = headers
442 self.reset()
443 def reset(self):
444 self._count = 0
445 self.requests = []
446 def http_open(self, req):
Barry Warsaw820c1202008-06-12 04:06:45 +0000447 import email, http.client, copy
Guido van Rossum34d19282007-08-09 01:03:29 +0000448 from io import StringIO
Thomas Wouters477c8d52006-05-27 19:21:47 +0000449 self.requests.append(copy.deepcopy(req))
450 if self._count == 0:
451 self._count = self._count + 1
Georg Brandl24420152008-05-26 16:32:26 +0000452 name = http.client.responses[self.code]
Barry Warsaw820c1202008-06-12 04:06:45 +0000453 msg = email.message_from_string(self.headers)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000454 return self.parent.error(
455 "http", req, MockFile(), self.code, name, msg)
456 else:
457 self.req = req
Barry Warsaw820c1202008-06-12 04:06:45 +0000458 msg = email.message_from_string("\r\n\r\n")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000459 return MockResponse(200, "OK", msg, "", req.get_full_url())
460
Senthil Kumaran47fff872009-12-20 07:10:31 +0000461class MockHTTPSHandler(urllib.request.AbstractHTTPHandler):
462 # Useful for testing the Proxy-Authorization request by verifying the
463 # properties of httpcon
Benjamin Peterson3d5b8db2009-12-24 01:14:05 +0000464
465 def __init__(self):
466 urllib.request.AbstractHTTPHandler.__init__(self)
467 self.httpconn = MockHTTPClass()
468
Senthil Kumaran47fff872009-12-20 07:10:31 +0000469 def https_open(self, req):
470 return self.do_open(self.httpconn, req)
471
Thomas Wouters477c8d52006-05-27 19:21:47 +0000472class MockPasswordManager:
473 def add_password(self, realm, uri, user, password):
474 self.realm = realm
475 self.url = uri
476 self.user = user
477 self.password = password
478 def find_user_password(self, realm, authuri):
479 self.target_realm = realm
480 self.target_url = authuri
481 return self.user, self.password
482
483
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000484class OpenerDirectorTests(unittest.TestCase):
485
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000486 def test_add_non_handler(self):
487 class NonHandler(object):
488 pass
489 self.assertRaises(TypeError,
490 OpenerDirector().add_handler, NonHandler())
491
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000492 def test_badly_named_methods(self):
493 # test work-around for three methods that accidentally follow the
494 # naming conventions for handler methods
495 # (*_open() / *_request() / *_response())
496
497 # These used to call the accidentally-named methods, causing a
498 # TypeError in real code; here, returning self from these mock
499 # methods would either cause no exception, or AttributeError.
500
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000501 from urllib.error import URLError
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000502
503 o = OpenerDirector()
504 meth_spec = [
505 [("do_open", "return self"), ("proxy_open", "return self")],
506 [("redirect_request", "return self")],
507 ]
508 handlers = add_ordered_mock_handlers(o, meth_spec)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000509 o.add_handler(urllib.request.UnknownHandler())
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000510 for scheme in "do", "proxy", "redirect":
511 self.assertRaises(URLError, o.open, scheme+"://example.com/")
512
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000513 def test_handled(self):
514 # handler returning non-None means no more handlers will be called
515 o = OpenerDirector()
516 meth_spec = [
517 ["http_open", "ftp_open", "http_error_302"],
518 ["ftp_open"],
519 [("http_open", "return self")],
520 [("http_open", "return self")],
521 ]
522 handlers = add_ordered_mock_handlers(o, meth_spec)
523
524 req = Request("http://example.com/")
525 r = o.open(req)
526 # Second .http_open() gets called, third doesn't, since second returned
527 # non-None. Handlers without .http_open() never get any methods called
528 # on them.
529 # In fact, second mock handler defining .http_open() returns self
530 # (instead of response), which becomes the OpenerDirector's return
531 # value.
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000532 self.assertEqual(r, handlers[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000533 calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
534 for expected, got in zip(calls, o.calls):
535 handler, name, args, kwds = got
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000536 self.assertEqual((handler, name), expected)
537 self.assertEqual(args, (req,))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000538
539 def test_handler_order(self):
540 o = OpenerDirector()
541 handlers = []
542 for meths, handler_order in [
543 ([("http_open", "return self")], 500),
544 (["http_open"], 0),
545 ]:
546 class MockHandlerSubclass(MockHandler): pass
547 h = MockHandlerSubclass(meths)
548 h.handler_order = handler_order
549 handlers.append(h)
550 o.add_handler(h)
551
552 r = o.open("http://example.com/")
553 # handlers called in reverse order, thanks to their sort order
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000554 self.assertEqual(o.calls[0][0], handlers[1])
555 self.assertEqual(o.calls[1][0], handlers[0])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000556
557 def test_raise(self):
558 # raising URLError stops processing of request
559 o = OpenerDirector()
560 meth_spec = [
561 [("http_open", "raise")],
562 [("http_open", "return self")],
563 ]
564 handlers = add_ordered_mock_handlers(o, meth_spec)
565
566 req = Request("http://example.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000567 self.assertRaises(urllib.error.URLError, o.open, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000568 self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000569
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000570 def test_http_error(self):
571 # XXX http_error_default
572 # http errors are a special case
573 o = OpenerDirector()
574 meth_spec = [
575 [("http_open", "error 302")],
576 [("http_error_400", "raise"), "http_open"],
577 [("http_error_302", "return response"), "http_error_303",
578 "http_error"],
579 [("http_error_302")],
580 ]
581 handlers = add_ordered_mock_handlers(o, meth_spec)
582
583 class Unknown:
584 def __eq__(self, other): return True
585
586 req = Request("http://example.com/")
587 r = o.open(req)
588 assert len(o.calls) == 2
589 calls = [(handlers[0], "http_open", (req,)),
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000590 (handlers[2], "http_error_302",
591 (req, Unknown(), 302, "", {}))]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000592 for expected, got in zip(calls, o.calls):
593 handler, method_name, args = expected
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000594 self.assertEqual((handler, method_name), got[:2])
595 self.assertEqual(args, got[2])
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000596
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700597
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000598 def test_processors(self):
599 # *_request / *_response methods get called appropriately
600 o = OpenerDirector()
601 meth_spec = [
602 [("http_request", "return request"),
603 ("http_response", "return response")],
604 [("http_request", "return request"),
605 ("http_response", "return response")],
606 ]
607 handlers = add_ordered_mock_handlers(o, meth_spec)
608
609 req = Request("http://example.com/")
610 r = o.open(req)
611 # processor methods are called on *all* handlers that define them,
612 # not just the first handler that handles the request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000613 calls = [
614 (handlers[0], "http_request"), (handlers[1], "http_request"),
615 (handlers[0], "http_response"), (handlers[1], "http_response")]
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000616
617 for i, (handler, name, args, kwds) in enumerate(o.calls):
618 if i < 2:
619 # *_request
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000620 self.assertEqual((handler, name), calls[i])
621 self.assertEqual(len(args), 1)
Ezio Melottie9615932010-01-24 19:26:24 +0000622 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000623 else:
624 # *_response
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000625 self.assertEqual((handler, name), calls[i])
626 self.assertEqual(len(args), 2)
Ezio Melottie9615932010-01-24 19:26:24 +0000627 self.assertIsInstance(args[0], Request)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000628 # response from opener.open is None, because there's no
629 # handler that defines http_open to handle it
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000630 self.assertTrue(args[1] is None or
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000631 isinstance(args[1], MockResponse))
632
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700633 def test_method_deprecations(self):
634 req = Request("http://www.example.com")
Senthil Kumaran08bd4aa2012-04-11 23:05:49 +0800635
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800636 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700637 req.add_data("data")
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800638 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700639 req.get_data()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800640 with self.assertWarns(DeprecationWarning):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -0700641 req.has_data()
642 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700643 req.get_host()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800644 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700645 req.get_selector()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800646 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700647 req.is_unverifiable()
Senthil Kumaran80a133b2012-04-12 19:28:07 +0800648 with self.assertWarns(DeprecationWarning):
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700649 req.get_origin_req_host()
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -0700650 with self.assertWarns(DeprecationWarning):
651 req.get_type()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000652
Senthil Kumaran08bd4aa2012-04-11 23:05:49 +0800653
Tim Peters58eb11c2004-01-18 20:29:55 +0000654def sanepathname2url(path):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000655 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000656 path.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000657 except UnicodeEncodeError:
658 raise unittest.SkipTest("path is not encodable to utf8")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000659 urlpath = urllib.request.pathname2url(path)
Tim Peters58eb11c2004-01-18 20:29:55 +0000660 if os.name == "nt" and urlpath.startswith("///"):
661 urlpath = urlpath[2:]
662 # XXX don't ask me about the mac...
663 return urlpath
664
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000665class HandlerTests(unittest.TestCase):
666
667 def test_ftp(self):
668 class MockFTPWrapper:
669 def __init__(self, data): self.data = data
670 def retrfile(self, filename, filetype):
671 self.filename, self.filetype = filename, filetype
Guido van Rossum34d19282007-08-09 01:03:29 +0000672 return io.StringIO(self.data), len(self.data)
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200673 def close(self): pass
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000674
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000675 class NullFTPHandler(urllib.request.FTPHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000676 def __init__(self, data): self.data = data
Georg Brandlf78e02b2008-06-10 17:40:04 +0000677 def connect_ftp(self, user, passwd, host, port, dirs,
678 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000679 self.user, self.passwd = user, passwd
680 self.host, self.port = host, port
681 self.dirs = dirs
682 self.ftpwrapper = MockFTPWrapper(self.data)
683 return self.ftpwrapper
684
Georg Brandlf78e02b2008-06-10 17:40:04 +0000685 import ftplib
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000686 data = "rheum rhaponicum"
687 h = NullFTPHandler(data)
688 o = h.parent = MockOpener()
689
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000690 for url, host, port, user, passwd, type_, dirs, filename, mimetype in [
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000691 ("ftp://localhost/foo/bar/baz.html",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000692 "localhost", ftplib.FTP_PORT, "", "", "I",
693 ["foo", "bar"], "baz.html", "text/html"),
694 ("ftp://parrot@localhost/foo/bar/baz.html",
695 "localhost", ftplib.FTP_PORT, "parrot", "", "I",
696 ["foo", "bar"], "baz.html", "text/html"),
697 ("ftp://%25parrot@localhost/foo/bar/baz.html",
698 "localhost", ftplib.FTP_PORT, "%parrot", "", "I",
699 ["foo", "bar"], "baz.html", "text/html"),
700 ("ftp://%2542parrot@localhost/foo/bar/baz.html",
701 "localhost", ftplib.FTP_PORT, "%42parrot", "", "I",
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000702 ["foo", "bar"], "baz.html", "text/html"),
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000703 ("ftp://localhost:80/foo/bar/",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000704 "localhost", 80, "", "", "D",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000705 ["foo", "bar"], "", None),
706 ("ftp://localhost/baz.gif;type=a",
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000707 "localhost", ftplib.FTP_PORT, "", "", "A",
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000708 [], "baz.gif", None), # XXX really this should guess image/gif
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000709 ]:
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000710 req = Request(url)
711 req.timeout = None
712 r = h.ftp_open(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000713 # ftp authentication not yet implemented by FTPHandler
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000714 self.assertEqual(h.user, user)
715 self.assertEqual(h.passwd, passwd)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000716 self.assertEqual(h.host, socket.gethostbyname(host))
717 self.assertEqual(h.port, port)
718 self.assertEqual(h.dirs, dirs)
719 self.assertEqual(h.ftpwrapper.filename, filename)
720 self.assertEqual(h.ftpwrapper.filetype, type_)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000721 headers = r.info()
Kurt B. Kaiser3f7cb5d2004-07-11 17:14:13 +0000722 self.assertEqual(headers.get("Content-type"), mimetype)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000723 self.assertEqual(int(headers["Content-length"]), len(data))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000724
725 def test_file(self):
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000726 import email.utils, socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000727 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000728 o = h.parent = MockOpener()
729
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000730 TESTFN = support.TESTFN
Tim Peters58eb11c2004-01-18 20:29:55 +0000731 urlpath = sanepathname2url(os.path.abspath(TESTFN))
Guido van Rossum6a2ccd02007-07-16 20:51:57 +0000732 towrite = b"hello, world\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000733 urls = [
Tim Peters58eb11c2004-01-18 20:29:55 +0000734 "file://localhost%s" % urlpath,
735 "file://%s" % urlpath,
736 "file://%s%s" % (socket.gethostbyname('localhost'), urlpath),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000737 ]
738 try:
739 localaddr = socket.gethostbyname(socket.gethostname())
740 except socket.gaierror:
741 localaddr = ''
742 if localaddr:
743 urls.append("file://%s%s" % (localaddr, urlpath))
744
745 for url in urls:
Tim Peters58eb11c2004-01-18 20:29:55 +0000746 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000747 try:
748 try:
749 f.write(towrite)
750 finally:
751 f.close()
752
753 r = h.file_open(Request(url))
754 try:
755 data = r.read()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000756 headers = r.info()
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000757 respurl = r.geturl()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000758 finally:
759 r.close()
Tim Peters58eb11c2004-01-18 20:29:55 +0000760 stats = os.stat(TESTFN)
Benjamin Petersona0c0a4a2008-06-12 22:15:50 +0000761 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000762 finally:
763 os.remove(TESTFN)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000764 self.assertEqual(data, towrite)
765 self.assertEqual(headers["Content-type"], "text/plain")
766 self.assertEqual(headers["Content-length"], "13")
Tim Peters58eb11c2004-01-18 20:29:55 +0000767 self.assertEqual(headers["Last-modified"], modified)
Senthil Kumaran4fbed102010-05-08 03:29:09 +0000768 self.assertEqual(respurl, url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000769
770 for url in [
Tim Peters58eb11c2004-01-18 20:29:55 +0000771 "file://localhost:80%s" % urlpath,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772 "file:///file_does_not_exist.txt",
773 "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
774 os.getcwd(), TESTFN),
775 "file://somerandomhost.ontheinternet.com%s/%s" %
776 (os.getcwd(), TESTFN),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000777 ]:
778 try:
Tim Peters58eb11c2004-01-18 20:29:55 +0000779 f = open(TESTFN, "wb")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000780 try:
781 f.write(towrite)
782 finally:
783 f.close()
784
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000785 self.assertRaises(urllib.error.URLError,
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000786 h.file_open, Request(url))
787 finally:
788 os.remove(TESTFN)
789
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000790 h = urllib.request.FileHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000791 o = h.parent = MockOpener()
792 # XXXX why does // mean ftp (and /// mean not ftp!), and where
793 # is file: scheme specified? I think this is really a bug, and
794 # what was intended was to distinguish between URLs like:
795 # file:/blah.txt (a file)
796 # file://localhost/blah.txt (a file)
797 # file:///blah.txt (a file)
798 # file://ftp.example.com/blah.txt (an ftp URL)
799 for url, ftp in [
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000800 ("file://ftp.example.com//foo.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000801 ("file://ftp.example.com///foo.txt", False),
802# XXXX bug: fails with OSError, should be URLError
803 ("file://ftp.example.com/foo.txt", False),
Senthil Kumaran383c32d2010-10-14 11:57:35 +0000804 ("file://somehost//foo/something.txt", False),
Senthil Kumaran2ef16322010-07-11 03:12:43 +0000805 ("file://localhost//foo/something.txt", False),
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000806 ]:
807 req = Request(url)
808 try:
809 h.file_open(req)
810 # XXXX remove OSError when bug fixed
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000811 except (urllib.error.URLError, OSError):
Florent Xicluna419e3842010-08-08 16:16:07 +0000812 self.assertFalse(ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000813 else:
Florent Xicluna419e3842010-08-08 16:16:07 +0000814 self.assertIs(o.req, req)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000815 self.assertEqual(req.type, "ftp")
Łukasz Langad7e81cc2011-01-09 18:18:53 +0000816 self.assertEqual(req.type == "ftp", ftp)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000817
818 def test_http(self):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000819
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000820 h = urllib.request.AbstractHTTPHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000821 o = h.parent = MockOpener()
822
823 url = "http://example.com/"
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000824 for method, data in [("GET", None), ("POST", b"blah")]:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000825 req = Request(url, data, {"Foo": "bar"})
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000826 req.timeout = None
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000827 req.add_unredirected_header("Spam", "eggs")
828 http = MockHTTPClass()
829 r = h.do_open(http, req)
830
831 # result attributes
832 r.read; r.readline # wrapped MockFile methods
833 r.info; r.geturl # addinfourl methods
834 r.code, r.msg == 200, "OK" # added from MockHTTPClass.getreply()
835 hdrs = r.info()
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000836 hdrs.get; hdrs.__contains__ # r.info() gives dict from .getreply()
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000837 self.assertEqual(r.geturl(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000838
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000839 self.assertEqual(http.host, "example.com")
840 self.assertEqual(http.level, 0)
841 self.assertEqual(http.method, method)
842 self.assertEqual(http.selector, "/")
843 self.assertEqual(http.req_headers,
Jeremy Hyltonb3ee6f92004-02-24 19:40:35 +0000844 [("Connection", "close"),
845 ("Foo", "bar"), ("Spam", "eggs")])
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000846 self.assertEqual(http.data, data)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000847
Andrew Svetlov0832af62012-12-18 23:10:48 +0200848 # check OSError converted to URLError
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000849 http.raise_on_endheaders = True
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 self.assertRaises(urllib.error.URLError, h.do_open, http, req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000851
Senthil Kumaran29333122011-02-11 11:25:47 +0000852 # Check for TypeError on POST data which is str.
853 req = Request("http://example.com/","badpost")
854 self.assertRaises(TypeError, h.do_request_, req)
855
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000856 # check adding of standard headers
857 o.addheaders = [("Spam", "eggs")]
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000858 for data in b"", None: # POST, GET
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000859 req = Request("http://example.com/", data)
860 r = MockResponse(200, "OK", {}, "")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000861 newreq = h.do_request_(req)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000862 if data is None: # GET
Benjamin Peterson577473f2010-01-19 00:09:57 +0000863 self.assertNotIn("Content-length", req.unredirected_hdrs)
864 self.assertNotIn("Content-type", req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000865 else: # POST
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000866 self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
867 self.assertEqual(req.unredirected_hdrs["Content-type"],
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000868 "application/x-www-form-urlencoded")
869 # XXX the details of Host could be better tested
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000870 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
871 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000872
873 # don't clobber existing headers
874 req.add_unredirected_header("Content-length", "foo")
875 req.add_unredirected_header("Content-type", "bar")
876 req.add_unredirected_header("Host", "baz")
877 req.add_unredirected_header("Spam", "foo")
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000878 newreq = h.do_request_(req)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000879 self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
880 self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000881 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
882 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000883
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000884 # Check iterable body support
885 def iterable_body():
886 yield b"one"
887 yield b"two"
888 yield b"three"
889
890 for headers in {}, {"Content-Length": 11}:
891 req = Request("http://example.com/", iterable_body(), headers)
892 if not headers:
893 # Having an iterable body without a Content-Length should
894 # raise an exception
895 self.assertRaises(ValueError, h.do_request_, req)
896 else:
897 newreq = h.do_request_(req)
898
Senthil Kumaran29333122011-02-11 11:25:47 +0000899 # A file object.
900 # Test only Content-Length attribute of request.
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000901
Senthil Kumaran29333122011-02-11 11:25:47 +0000902 file_obj = io.BytesIO()
903 file_obj.write(b"Something\nSomething\nSomething\n")
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000904
905 for headers in {}, {"Content-Length": 30}:
906 req = Request("http://example.com/", file_obj, headers)
907 if not headers:
908 # Having an iterable body without a Content-Length should
909 # raise an exception
910 self.assertRaises(ValueError, h.do_request_, req)
911 else:
912 newreq = h.do_request_(req)
913 self.assertEqual(int(newreq.get_header('Content-length')),30)
914
915 file_obj.close()
916
917 # array.array Iterable - Content Length is calculated
918
919 iterable_array = array.array("I",[1,2,3,4])
920
921 for headers in {}, {"Content-Length": 16}:
922 req = Request("http://example.com/", iterable_array, headers)
923 newreq = h.do_request_(req)
924 self.assertEqual(int(newreq.get_header('Content-length')),16)
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000925
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000926 def test_http_doubleslash(self):
927 # Checks the presence of any unnecessary double slash in url does not
928 # break anything. Previously, a double slash directly after the host
Ezio Melottie130a522011-10-19 10:58:56 +0300929 # could cause incorrect parsing.
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000930 h = urllib.request.AbstractHTTPHandler()
931 o = h.parent = MockOpener()
932
Senthil Kumaran7bc0d872010-12-19 10:49:52 +0000933 data = b""
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000934 ds_urls = [
935 "http://example.com/foo/bar/baz.html",
936 "http://example.com//foo/bar/baz.html",
937 "http://example.com/foo//bar/baz.html",
938 "http://example.com/foo/bar//baz.html"
939 ]
940
941 for ds_url in ds_urls:
942 ds_req = Request(ds_url, data)
943
944 # Check whether host is determined correctly if there is no proxy
945 np_ds_req = h.do_request_(ds_req)
946 self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com")
947
948 # Check whether host is determined correctly if there is a proxy
949 ds_req.set_proxy("someproxy:3128",None)
950 p_ds_req = h.do_request_(ds_req)
951 self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
952
Senthil Kumaranc2958622010-11-22 04:48:26 +0000953 def test_fixpath_in_weirdurls(self):
954 # Issue4493: urllib2 to supply '/' when to urls where path does not
955 # start with'/'
956
957 h = urllib.request.AbstractHTTPHandler()
958 o = h.parent = MockOpener()
959
960 weird_url = 'http://www.python.org?getspam'
961 req = Request(weird_url)
962 newreq = h.do_request_(req)
963 self.assertEqual(newreq.host,'www.python.org')
964 self.assertEqual(newreq.selector,'/?getspam')
965
966 url_without_path = 'http://www.python.org'
967 req = Request(url_without_path)
968 newreq = h.do_request_(req)
969 self.assertEqual(newreq.host,'www.python.org')
970 self.assertEqual(newreq.selector,'')
971
Facundo Batista72dc1ea2008-08-16 14:44:32 +0000972
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000973 def test_errors(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000974 h = urllib.request.HTTPErrorProcessor()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000975 o = h.parent = MockOpener()
976
977 url = "http://example.com/"
978 req = Request(url)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000979 # all 2xx are passed through
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000980 r = MockResponse(200, "OK", {}, "", url)
981 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000982 self.assertIs(r, newr)
983 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984 r = MockResponse(202, "Accepted", {}, "", url)
985 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000986 self.assertIs(r, newr)
987 self.assertFalse(hasattr(o, "proto")) # o.error not called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 r = MockResponse(206, "Partial content", {}, "", url)
989 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +0000990 self.assertIs(r, newr)
991 self.assertFalse(hasattr(o, "proto")) # o.error not called
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000992 # anything else calls o.error (and MockOpener returns None, here)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 r = MockResponse(502, "Bad gateway", {}, "", url)
Florent Xicluna419e3842010-08-08 16:16:07 +0000994 self.assertIsNone(h.http_response(req, r))
Jeremy Hyltondf38ea92003-12-17 20:42:38 +0000995 self.assertEqual(o.proto, "http") # o.error called
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 self.assertEqual(o.args, (req, r, 502, "Bad gateway", {}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +0000997
Martin v. Löwis2a6ba902004-05-31 18:22:40 +0000998 def test_cookies(self):
999 cj = MockCookieJar()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001000 h = urllib.request.HTTPCookieProcessor(cj)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001001 o = h.parent = MockOpener()
1002
1003 req = Request("http://example.com/")
1004 r = MockResponse(200, "OK", {}, "")
1005 newreq = h.http_request(req)
Florent Xicluna419e3842010-08-08 16:16:07 +00001006 self.assertIs(cj.ach_req, req)
1007 self.assertIs(cj.ach_req, newreq)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001008 self.assertEqual(req.origin_req_host, "example.com")
1009 self.assertFalse(req.unverifiable)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001010 newr = h.http_response(req, r)
Florent Xicluna419e3842010-08-08 16:16:07 +00001011 self.assertIs(cj.ec_req, req)
1012 self.assertIs(cj.ec_r, r)
1013 self.assertIs(r, newr)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001014
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001015 def test_redirect(self):
1016 from_url = "http://example.com/a.html"
1017 to_url = "http://example.com/b.html"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001018 h = urllib.request.HTTPRedirectHandler()
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001019 o = h.parent = MockOpener()
1020
1021 # ordinary redirect behaviour
1022 for code in 301, 302, 303, 307:
1023 for data in None, "blah\nblah\n":
1024 method = getattr(h, "http_error_%s" % code)
1025 req = Request(from_url, data)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001026 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001027 req.add_header("Nonsense", "viking=withhold")
Christian Heimes77c02eb2008-02-09 02:18:51 +00001028 if data is not None:
1029 req.add_header("Content-Length", str(len(data)))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001030 req.add_unredirected_header("Spam", "spam")
1031 try:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001032 method(req, MockFile(), code, "Blah",
1033 MockHeaders({"location": to_url}))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001034 except urllib.error.HTTPError:
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001035 # 307 in response to POST requires user OK
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001036 self.assertTrue(code == 307 and data is not None)
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001037 self.assertEqual(o.req.get_full_url(), to_url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001038 try:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001039 self.assertEqual(o.req.get_method(), "GET")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001040 except AttributeError:
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001041 self.assertFalse(o.req.data)
Christian Heimes77c02eb2008-02-09 02:18:51 +00001042
1043 # now it's a GET, there should not be headers regarding content
1044 # (possibly dragged from before being a POST)
1045 headers = [x.lower() for x in o.req.headers]
Benjamin Peterson577473f2010-01-19 00:09:57 +00001046 self.assertNotIn("content-length", headers)
1047 self.assertNotIn("content-type", headers)
Christian Heimes77c02eb2008-02-09 02:18:51 +00001048
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001049 self.assertEqual(o.req.headers["Nonsense"],
1050 "viking=withhold")
Benjamin Peterson577473f2010-01-19 00:09:57 +00001051 self.assertNotIn("Spam", o.req.headers)
1052 self.assertNotIn("Spam", o.req.unredirected_hdrs)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001053
1054 # loop detection
1055 req = Request(from_url)
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001056 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001057 def redirect(h, req, url=to_url):
1058 h.http_error_302(req, MockFile(), 302, "Blah",
1059 MockHeaders({"location": url}))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001060 # Note that the *original* request shares the same record of
1061 # redirections with the sub-requests caused by the redirections.
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001062
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001063 # detect infinite loop redirect of a URL to itself
1064 req = Request(from_url, origin_req_host="example.com")
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001065 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001066 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001067 try:
1068 while 1:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001069 redirect(h, req, "http://example.com/")
1070 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001071 except urllib.error.HTTPError:
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001072 # don't stop until max_repeats, because cookies may introduce state
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001073 self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001074
1075 # detect endless non-repeating chain of redirects
1076 req = Request(from_url, origin_req_host="example.com")
1077 count = 0
Senthil Kumaranfb8cc2f2009-07-19 02:44:19 +00001078 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001079 try:
1080 while 1:
1081 redirect(h, req, "http://example.com/%d" % count)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001082 count = count + 1
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001083 except urllib.error.HTTPError:
Jeremy Hyltondf38ea92003-12-17 20:42:38 +00001084 self.assertEqual(count,
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001085 urllib.request.HTTPRedirectHandler.max_redirections)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001086
guido@google.coma119df92011-03-29 11:41:02 -07001087
1088 def test_invalid_redirect(self):
1089 from_url = "http://example.com/a.html"
1090 valid_schemes = ['http','https','ftp']
1091 invalid_schemes = ['file','imap','ldap']
1092 schemeless_url = "example.com/b.html"
1093 h = urllib.request.HTTPRedirectHandler()
1094 o = h.parent = MockOpener()
1095 req = Request(from_url)
1096 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1097
1098 for scheme in invalid_schemes:
1099 invalid_url = scheme + '://' + schemeless_url
1100 self.assertRaises(urllib.error.HTTPError, h.http_error_302,
1101 req, MockFile(), 302, "Security Loophole",
1102 MockHeaders({"location": invalid_url}))
1103
1104 for scheme in valid_schemes:
1105 valid_url = scheme + '://' + schemeless_url
1106 h.http_error_302(req, MockFile(), 302, "That's fine",
1107 MockHeaders({"location": valid_url}))
1108 self.assertEqual(o.req.get_full_url(), valid_url)
1109
Senthil Kumaran6497aa32012-01-04 13:46:59 +08001110 def test_relative_redirect(self):
1111 from_url = "http://example.com/a.html"
1112 relative_url = "/b.html"
1113 h = urllib.request.HTTPRedirectHandler()
1114 o = h.parent = MockOpener()
1115 req = Request(from_url)
1116 req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
1117
1118 valid_url = urllib.parse.urljoin(from_url,relative_url)
1119 h.http_error_302(req, MockFile(), 302, "That's fine",
1120 MockHeaders({"location": valid_url}))
1121 self.assertEqual(o.req.get_full_url(), valid_url)
1122
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001123 def test_cookie_redirect(self):
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001124 # cookies shouldn't leak into redirected requests
Georg Brandl24420152008-05-26 16:32:26 +00001125 from http.cookiejar import CookieJar
1126 from test.test_http_cookiejar import interact_netscape
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001127
1128 cj = CookieJar()
1129 interact_netscape(cj, "http://www.example.com/", "spam=eggs")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001130 hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001131 hdeh = urllib.request.HTTPDefaultErrorHandler()
1132 hrh = urllib.request.HTTPRedirectHandler()
1133 cp = urllib.request.HTTPCookieProcessor(cj)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001134 o = build_test_opener(hh, hdeh, hrh, cp)
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001135 o.open("http://www.example.com/")
Florent Xicluna419e3842010-08-08 16:16:07 +00001136 self.assertFalse(hh.req.has_header("Cookie"))
Martin v. Löwis2a6ba902004-05-31 18:22:40 +00001137
Senthil Kumaran26430412011-04-13 07:01:19 +08001138 def test_redirect_fragment(self):
1139 redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
1140 hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
1141 hdeh = urllib.request.HTTPDefaultErrorHandler()
1142 hrh = urllib.request.HTTPRedirectHandler()
1143 o = build_test_opener(hh, hdeh, hrh)
1144 fp = o.open('http://www.example.com')
1145 self.assertEqual(fp.geturl(), redirected_url.strip())
1146
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001147 def test_proxy(self):
1148 o = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001149 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001150 o.add_handler(ph)
1151 meth_spec = [
1152 [("http_open", "return response")]
1153 ]
1154 handlers = add_ordered_mock_handlers(o, meth_spec)
1155
1156 req = Request("http://acme.example.com/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001157 self.assertEqual(req.host, "acme.example.com")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001158 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001159 self.assertEqual(req.host, "proxy.example.com:3128")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001160
1161 self.assertEqual([(handlers[0], "http_open")],
1162 [tup[0:2] for tup in o.calls])
1163
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001164 def test_proxy_no_proxy(self):
1165 os.environ['no_proxy'] = 'python.org'
1166 o = OpenerDirector()
1167 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1168 o.add_handler(ph)
1169 req = Request("http://www.perl.org/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001170 self.assertEqual(req.host, "www.perl.org")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001171 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001172 self.assertEqual(req.host, "proxy.example.com")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001173 req = Request("http://www.python.org")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001174 self.assertEqual(req.host, "www.python.org")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001175 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001176 self.assertEqual(req.host, "www.python.org")
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001177 del os.environ['no_proxy']
1178
Ronald Oussorene72e1612011-03-14 18:15:25 -04001179 def test_proxy_no_proxy_all(self):
1180 os.environ['no_proxy'] = '*'
1181 o = OpenerDirector()
1182 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
1183 o.add_handler(ph)
1184 req = Request("http://www.python.org")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001185 self.assertEqual(req.host, "www.python.org")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001186 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001187 self.assertEqual(req.host, "www.python.org")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001188 del os.environ['no_proxy']
1189
Senthil Kumaran7bb04972009-10-11 04:58:55 +00001190
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001191 def test_proxy_https(self):
1192 o = OpenerDirector()
1193 ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
1194 o.add_handler(ph)
1195 meth_spec = [
1196 [("https_open", "return response")]
1197 ]
1198 handlers = add_ordered_mock_handlers(o, meth_spec)
1199
1200 req = Request("https://www.example.com/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001201 self.assertEqual(req.host, "www.example.com")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001202 r = o.open(req)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001203 self.assertEqual(req.host, "proxy.example.com:3128")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001204 self.assertEqual([(handlers[0], "https_open")],
1205 [tup[0:2] for tup in o.calls])
1206
Senthil Kumaran47fff872009-12-20 07:10:31 +00001207 def test_proxy_https_proxy_authorization(self):
1208 o = OpenerDirector()
1209 ph = urllib.request.ProxyHandler(dict(https='proxy.example.com:3128'))
1210 o.add_handler(ph)
1211 https_handler = MockHTTPSHandler()
1212 o.add_handler(https_handler)
1213 req = Request("https://www.example.com/")
1214 req.add_header("Proxy-Authorization","FooBar")
1215 req.add_header("User-Agent","Grail")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001216 self.assertEqual(req.host, "www.example.com")
Senthil Kumaran47fff872009-12-20 07:10:31 +00001217 self.assertIsNone(req._tunnel_host)
1218 r = o.open(req)
1219 # Verify Proxy-Authorization gets tunneled to request.
1220 # httpsconn req_headers do not have the Proxy-Authorization header but
1221 # the req will have.
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001222 self.assertNotIn(("Proxy-Authorization","FooBar"),
Senthil Kumaran47fff872009-12-20 07:10:31 +00001223 https_handler.httpconn.req_headers)
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001224 self.assertIn(("User-Agent","Grail"),
1225 https_handler.httpconn.req_headers)
Senthil Kumaran47fff872009-12-20 07:10:31 +00001226 self.assertIsNotNone(req._tunnel_host)
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001227 self.assertEqual(req.host, "proxy.example.com:3128")
Senthil Kumaran47fff872009-12-20 07:10:31 +00001228 self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
Senthil Kumaran97f0c6b2009-07-25 04:24:38 +00001229
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001230 # TODO: This should be only for OSX
1231 @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX")
Ronald Oussorene72e1612011-03-14 18:15:25 -04001232 def test_osx_proxy_bypass(self):
1233 bypass = {
1234 'exclude_simple': False,
1235 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10',
1236 '10.0/16']
1237 }
1238 # Check hosts that should trigger the proxy bypass
1239 for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1',
1240 '10.0.0.1'):
1241 self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass),
1242 'expected bypass of %s to be True' % host)
1243 # Check hosts that should not trigger the proxy bypass
1244 for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', 'test'):
1245 self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass),
1246 'expected bypass of %s to be False' % host)
1247
1248 # Check the exclude_simple flag
1249 bypass = {'exclude_simple': True, 'exceptions': []}
1250 self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass))
1251
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001252 def test_basic_auth(self, quote_char='"'):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001253 opener = OpenerDirector()
1254 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001255 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001256 realm = "ACME Widget Store"
1257 http_handler = MockHTTPHandler(
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001258 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
1259 (quote_char, realm, quote_char) )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001260 opener.add_handler(auth_handler)
1261 opener.add_handler(http_handler)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001262 self._test_basic_auth(opener, auth_handler, "Authorization",
1263 realm, http_handler, password_manager,
1264 "http://acme.example.com/protected",
1265 "http://acme.example.com/protected",
1266 )
1267
Christian Heimes4fbc72b2008-03-22 00:47:35 +00001268 def test_basic_auth_with_single_quoted_realm(self):
1269 self.test_basic_auth(quote_char="'")
1270
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +08001271 def test_basic_auth_with_unquoted_realm(self):
1272 opener = OpenerDirector()
1273 password_manager = MockPasswordManager()
1274 auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
1275 realm = "ACME Widget Store"
1276 http_handler = MockHTTPHandler(
1277 401, 'WWW-Authenticate: Basic realm=%s\r\n\r\n' % realm)
1278 opener.add_handler(auth_handler)
1279 opener.add_handler(http_handler)
Senthil Kumaran0ea91cb2012-05-15 23:59:42 +08001280 with self.assertWarns(UserWarning):
1281 self._test_basic_auth(opener, auth_handler, "Authorization",
1282 realm, http_handler, password_manager,
1283 "http://acme.example.com/protected",
1284 "http://acme.example.com/protected",
1285 )
Senthil Kumaran34f3fcc2012-05-15 22:30:25 +08001286
Thomas Wouters477c8d52006-05-27 19:21:47 +00001287 def test_proxy_basic_auth(self):
1288 opener = OpenerDirector()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001289 ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001290 opener.add_handler(ph)
1291 password_manager = MockPasswordManager()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001292 auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001293 realm = "ACME Networks"
1294 http_handler = MockHTTPHandler(
1295 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001296 opener.add_handler(auth_handler)
1297 opener.add_handler(http_handler)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001298 self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
Thomas Wouters477c8d52006-05-27 19:21:47 +00001299 realm, http_handler, password_manager,
1300 "http://acme.example.com:3128/protected",
1301 "proxy.example.com:3128",
1302 )
1303
1304 def test_basic_and_digest_auth_handlers(self):
1305 # HTTPDigestAuthHandler threw an exception if it couldn't handle a 40*
1306 # response (http://python.org/sf/1479302), where it should instead
1307 # return None to allow another handler (especially
1308 # HTTPBasicAuthHandler) to handle the response.
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001309
1310 # Also (http://python.org/sf/14797027, RFC 2617 section 1.2), we must
1311 # try digest first (since it's the strongest auth scheme), so we record
1312 # order of calls here to check digest comes first:
1313 class RecordingOpenerDirector(OpenerDirector):
1314 def __init__(self):
1315 OpenerDirector.__init__(self)
1316 self.recorded = []
1317 def record(self, info):
1318 self.recorded.append(info)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001319 class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001320 def http_error_401(self, *args, **kwds):
1321 self.parent.record("digest")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001322 urllib.request.HTTPDigestAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001323 *args, **kwds)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001324 class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001325 def http_error_401(self, *args, **kwds):
1326 self.parent.record("basic")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001327 urllib.request.HTTPBasicAuthHandler.http_error_401(self,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001328 *args, **kwds)
1329
1330 opener = RecordingOpenerDirector()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001331 password_manager = MockPasswordManager()
1332 digest_handler = TestDigestAuthHandler(password_manager)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001333 basic_handler = TestBasicAuthHandler(password_manager)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001334 realm = "ACME Networks"
1335 http_handler = MockHTTPHandler(
1336 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001337 opener.add_handler(basic_handler)
1338 opener.add_handler(digest_handler)
1339 opener.add_handler(http_handler)
1340
1341 # check basic auth isn't blocked by digest handler failing
Thomas Wouters477c8d52006-05-27 19:21:47 +00001342 self._test_basic_auth(opener, basic_handler, "Authorization",
1343 realm, http_handler, password_manager,
1344 "http://acme.example.com/protected",
1345 "http://acme.example.com/protected",
1346 )
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001347 # check digest was tried before basic (twice, because
1348 # _test_basic_auth called .open() twice)
1349 self.assertEqual(opener.recorded, ["digest", "basic"]*2)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001350
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001351 def test_unsupported_auth_digest_handler(self):
1352 opener = OpenerDirector()
1353 # While using DigestAuthHandler
1354 digest_auth_handler = urllib.request.HTTPDigestAuthHandler(None)
1355 http_handler = MockHTTPHandler(
1356 401, 'WWW-Authenticate: Kerberos\r\n\r\n')
1357 opener.add_handler(digest_auth_handler)
1358 opener.add_handler(http_handler)
1359 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1360
1361 def test_unsupported_auth_basic_handler(self):
1362 # While using BasicAuthHandler
1363 opener = OpenerDirector()
1364 basic_auth_handler = urllib.request.HTTPBasicAuthHandler(None)
1365 http_handler = MockHTTPHandler(
1366 401, 'WWW-Authenticate: NTLM\r\n\r\n')
1367 opener.add_handler(basic_auth_handler)
1368 opener.add_handler(http_handler)
1369 self.assertRaises(ValueError,opener.open,"http://www.example.com")
1370
Thomas Wouters477c8d52006-05-27 19:21:47 +00001371 def _test_basic_auth(self, opener, auth_handler, auth_header,
1372 realm, http_handler, password_manager,
1373 request_url, protected_url):
Christian Heimes05e8be12008-02-23 18:30:17 +00001374 import base64
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375 user, password = "wile", "coyote"
Thomas Wouters477c8d52006-05-27 19:21:47 +00001376
1377 # .add_password() fed through to password manager
1378 auth_handler.add_password(realm, request_url, user, password)
1379 self.assertEqual(realm, password_manager.realm)
1380 self.assertEqual(request_url, password_manager.url)
1381 self.assertEqual(user, password_manager.user)
1382 self.assertEqual(password, password_manager.password)
1383
1384 r = opener.open(request_url)
1385
1386 # should have asked the password manager for the username/password
1387 self.assertEqual(password_manager.target_realm, realm)
1388 self.assertEqual(password_manager.target_url, protected_url)
1389
1390 # expect one request without authorization, then one with
1391 self.assertEqual(len(http_handler.requests), 2)
1392 self.assertFalse(http_handler.requests[0].has_header(auth_header))
Guido van Rossum98b349f2007-08-27 21:47:52 +00001393 userpass = bytes('%s:%s' % (user, password), "ascii")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001394 auth_hdr_value = ('Basic ' +
Georg Brandl706824f2009-06-04 09:42:55 +00001395 base64.encodebytes(userpass).strip().decode())
Thomas Wouters477c8d52006-05-27 19:21:47 +00001396 self.assertEqual(http_handler.requests[1].get_header(auth_header),
1397 auth_hdr_value)
Senthil Kumaranca2fc9e2010-02-24 16:53:16 +00001398 self.assertEqual(http_handler.requests[1].unredirected_hdrs[auth_header],
1399 auth_hdr_value)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001400 # if the password manager can't find a password, the handler won't
1401 # handle the HTTP auth error
1402 password_manager.user = password_manager.password = None
1403 http_handler.reset()
1404 r = opener.open(request_url)
1405 self.assertEqual(len(http_handler.requests), 1)
1406 self.assertFalse(http_handler.requests[0].has_header(auth_header))
1407
Senthil Kumaran4de00a22011-05-11 21:17:57 +08001408
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001409class MiscTests(unittest.TestCase):
1410
1411 def test_build_opener(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001412 class MyHTTPHandler(urllib.request.HTTPHandler): pass
1413 class FooHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001414 def foo_open(self): pass
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001415 class BarHandler(urllib.request.BaseHandler):
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001416 def bar_open(self): pass
1417
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001418 build_opener = urllib.request.build_opener
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001419
1420 o = build_opener(FooHandler, BarHandler)
1421 self.opener_has_handler(o, FooHandler)
1422 self.opener_has_handler(o, BarHandler)
1423
1424 # can take a mix of classes and instances
1425 o = build_opener(FooHandler, BarHandler())
1426 self.opener_has_handler(o, FooHandler)
1427 self.opener_has_handler(o, BarHandler)
1428
1429 # subclasses of default handlers override default handlers
1430 o = build_opener(MyHTTPHandler)
1431 self.opener_has_handler(o, MyHTTPHandler)
1432
1433 # a particular case of overriding: default handlers can be passed
1434 # in explicitly
1435 o = build_opener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001436 self.opener_has_handler(o, urllib.request.HTTPHandler)
1437 o = build_opener(urllib.request.HTTPHandler)
1438 self.opener_has_handler(o, urllib.request.HTTPHandler)
1439 o = build_opener(urllib.request.HTTPHandler())
1440 self.opener_has_handler(o, urllib.request.HTTPHandler)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001441
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001442 # Issue2670: multiple handlers sharing the same base class
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001443 class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
Christian Heimes81ee3ef2008-05-04 22:42:01 +00001444 o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
1445 self.opener_has_handler(o, MyHTTPHandler)
1446 self.opener_has_handler(o, MyOtherHTTPHandler)
1447
Andrew Svetlovbff98fe2012-11-27 23:06:19 +02001448 def test_issue16464(self):
1449 opener = urllib.request.build_opener()
1450 request = urllib.request.Request("http://www.python.org/~jeremy/")
1451 self.assertEqual(None, request.data)
1452
1453 opener.open(request, "1".encode("us-ascii"))
1454 self.assertEqual(b"1", request.data)
1455 self.assertEqual("1", request.get_header("Content-length"))
1456
1457 opener.open(request, "1234567890".encode("us-ascii"))
1458 self.assertEqual(b"1234567890", request.data)
1459 self.assertEqual("10", request.get_header("Content-length"))
1460
1461
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001462 def opener_has_handler(self, opener, handler_class):
Florent Xicluna419e3842010-08-08 16:16:07 +00001463 self.assertTrue(any(h.__class__ == handler_class
1464 for h in opener.handlers))
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001465
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001466class RequestTests(unittest.TestCase):
1467
1468 def setUp(self):
1469 self.get = Request("http://www.python.org/~jeremy/")
1470 self.post = Request("http://www.python.org/~jeremy/",
1471 "data",
1472 headers={"X-Test": "test"})
1473
1474 def test_method(self):
1475 self.assertEqual("POST", self.post.get_method())
1476 self.assertEqual("GET", self.get.get_method())
1477
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001478 def test_data(self):
1479 self.assertFalse(self.get.data)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001480 self.assertEqual("GET", self.get.get_method())
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001481 self.get.data = "spam"
1482 self.assertTrue(self.get.data)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001483 self.assertEqual("POST", self.get.get_method())
1484
Andrew Svetlovbff98fe2012-11-27 23:06:19 +02001485 # issue 16464
1486 # if we change data we need to remove content-length header
1487 # (cause it's most probably calculated for previous value)
1488 def test_setting_data_should_remove_content_length(self):
1489 self.assertFalse("Content-length" in self.get.unredirected_hdrs)
1490 self.get.add_unredirected_header("Content-length", 42)
1491 self.assertEqual(42, self.get.unredirected_hdrs["Content-length"])
1492 self.get.data = "spam"
1493 self.assertFalse("Content-length" in self.get.unredirected_hdrs)
1494
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001495 def test_get_full_url(self):
1496 self.assertEqual("http://www.python.org/~jeremy/",
1497 self.get.get_full_url())
1498
1499 def test_selector(self):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001500 self.assertEqual("/~jeremy/", self.get.selector)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001501 req = Request("http://www.python.org/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001502 self.assertEqual("/", req.selector)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001503
1504 def test_get_type(self):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001505 self.assertEqual("http", self.get.type)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001506
1507 def test_get_host(self):
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001508 self.assertEqual("www.python.org", self.get.host)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001509
1510 def test_get_host_unquote(self):
1511 req = Request("http://www.%70ython.org/")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001512 self.assertEqual("www.python.org", req.host)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001513
1514 def test_proxy(self):
Florent Xicluna419e3842010-08-08 16:16:07 +00001515 self.assertFalse(self.get.has_proxy())
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001516 self.get.set_proxy("www.perl.org", "http")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001517 self.assertTrue(self.get.has_proxy())
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001518 self.assertEqual("www.python.org", self.get.origin_req_host)
1519 self.assertEqual("www.perl.org", self.get.host)
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001520
Senthil Kumarand95cc752010-08-08 11:27:53 +00001521 def test_wrapped_url(self):
1522 req = Request("<URL:http://www.python.org>")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001523 self.assertEqual("www.python.org", req.host)
Senthil Kumarand95cc752010-08-08 11:27:53 +00001524
Senthil Kumaran26430412011-04-13 07:01:19 +08001525 def test_url_fragment(self):
Senthil Kumarand95cc752010-08-08 11:27:53 +00001526 req = Request("http://www.python.org/?qs=query#fragment=true")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001527 self.assertEqual("/?qs=query", req.selector)
Senthil Kumarand95cc752010-08-08 11:27:53 +00001528 req = Request("http://www.python.org/#fun=true")
Senthil Kumaran77ebfcc2012-08-20 13:43:59 -07001529 self.assertEqual("/", req.selector)
Senthil Kumarand95cc752010-08-08 11:27:53 +00001530
Senthil Kumaran26430412011-04-13 07:01:19 +08001531 # Issue 11703: geturl() omits fragment in the original URL.
1532 url = 'http://docs.python.org/library/urllib2.html#OK'
1533 req = Request(url)
1534 self.assertEqual(req.get_full_url(), url)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001535
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001536def test_HTTPError_interface():
1537 """
1538 Issue 13211 reveals that HTTPError didn't implement the URLError
1539 interface even though HTTPError is a subclass of URLError.
1540
Jason R. Coombs7ff21d72011-12-03 23:18:11 -05001541 >>> msg = 'something bad happened'
Senthil Kumaran5962cce2012-12-10 02:09:35 -08001542 >>> url = code = fp = None
1543 >>> hdrs = 'Content-Length: 42'
Jason R. Coombs9c3895f2011-12-04 08:14:18 -05001544 >>> err = urllib.error.HTTPError(url, code, msg, hdrs, fp)
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001545 >>> assert hasattr(err, 'reason')
1546 >>> err.reason
1547 'something bad happened'
Senthil Kumaran5962cce2012-12-10 02:09:35 -08001548 >>> assert hasattr(err, 'headers')
1549 >>> err.headers
1550 'Content-Length: 42'
Jason R. Coombsaa204db2011-11-07 10:50:32 -05001551 """
1552
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001553def test_main(verbose=None):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001554 from test import test_urllib2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001555 support.run_doctest(test_urllib2, verbose)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001556 support.run_doctest(urllib.request, verbose)
Andrew M. Kuchlingbd3200f2004-06-29 13:15:46 +00001557 tests = (TrivialTests,
1558 OpenerDirectorTests,
1559 HandlerTests,
Benjamin Peterson6ebe78f2008-12-21 00:06:59 +00001560 MiscTests,
1561 RequestTests)
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001562 support.run_unittest(*tests)
Jeremy Hyltonc1be59f2003-12-14 05:27:34 +00001563
1564if __name__ == "__main__":
1565 test_main(verbose=True)