blob: 5fcb4cbca20f94a40a4b04f4a288779934e1d496 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00002
3import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00004from test import support
Thomas Wouters477c8d52006-05-27 19:21:47 +00005from test.test_urllib2 import sanepathname2url
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00006
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00007import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00009import urllib.error
10import urllib.request
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +000011import sys
Antoine Pitroud5323212010-10-22 18:19:07 +000012try:
13 import ssl
14except ImportError:
15 ssl = None
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +000016
17TIMEOUT = 60 # seconds
Jeremy Hylton5d9c3032004-08-07 17:40:50 +000018
Christian Heimes969fe572008-01-25 11:23:10 +000019
Georg Brandlc28e1fa2008-06-10 19:20:26 +000020def _retry_thrice(func, exc, *args, **kwargs):
Christian Heimes969fe572008-01-25 11:23:10 +000021 for i in range(3):
22 try:
Georg Brandlc28e1fa2008-06-10 19:20:26 +000023 return func(*args, **kwargs)
24 except exc as e:
Neal Norwitz2f142582008-01-26 19:49:41 +000025 last_exc = e
Christian Heimes969fe572008-01-25 11:23:10 +000026 continue
27 except:
28 raise
29 raise last_exc
30
Georg Brandlc28e1fa2008-06-10 19:20:26 +000031def _wrap_with_retry_thrice(func, exc):
32 def wrapped(*args, **kwargs):
33 return _retry_thrice(func, exc, *args, **kwargs)
34 return wrapped
35
36# Connecting to remote hosts is flaky. Make it more robust by retrying
37# the connection several times.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
39 urllib.error.URLError)
Christian Heimes969fe572008-01-25 11:23:10 +000040
Thomas Wouters477c8d52006-05-27 19:21:47 +000041
42class AuthTests(unittest.TestCase):
43 """Tests urllib2 authentication features."""
44
45## Disabled at the moment since there is no page under python.org which
46## could be used to HTTP authentication.
47#
48# def test_basic_auth(self):
Georg Brandl24420152008-05-26 16:32:26 +000049# import http.client
Thomas Wouters477c8d52006-05-27 19:21:47 +000050#
51# test_url = "http://www.python.org/test/test_urllib2/basic_auth"
52# test_hostport = "www.python.org"
53# test_realm = 'Test Realm'
54# test_user = 'test.test_urllib2net'
55# test_password = 'blah'
56#
57# # failure
58# try:
Christian Heimes969fe572008-01-25 11:23:10 +000059# _urlopen_with_retry(test_url)
Thomas Wouters477c8d52006-05-27 19:21:47 +000060# except urllib2.HTTPError, exc:
61# self.assertEqual(exc.code, 401)
62# else:
63# self.fail("urlopen() should have failed with 401")
64#
65# # success
66# auth_handler = urllib2.HTTPBasicAuthHandler()
67# auth_handler.add_password(test_realm, test_hostport,
68# test_user, test_password)
69# opener = urllib2.build_opener(auth_handler)
70# f = opener.open('http://localhost/')
Christian Heimes969fe572008-01-25 11:23:10 +000071# response = _urlopen_with_retry("http://www.python.org/")
Thomas Wouters477c8d52006-05-27 19:21:47 +000072#
73# # The 'userinfo' URL component is deprecated by RFC 3986 for security
74# # reasons, let's not implement it! (it's already implemented for proxy
75# # specification strings (that is, URLs or authorities specifying a
76# # proxy), so we must keep that)
Georg Brandl24420152008-05-26 16:32:26 +000077# self.assertRaises(http.client.InvalidURL,
Thomas Wouters477c8d52006-05-27 19:21:47 +000078# urllib2.urlopen, "http://evil:thing@example.com")
79
80
Thomas Woutersb2137042007-02-01 18:02:27 +000081class CloseSocketTest(unittest.TestCase):
82
83 def test_close(self):
Thomas Woutersb2137042007-02-01 18:02:27 +000084 # calling .close() on urllib2's response objects should close the
85 # underlying socket
86
Christian Heimes969fe572008-01-25 11:23:10 +000087 response = _urlopen_with_retry("http://www.python.org/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +000088 sock = response.fp
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000089 self.assertTrue(not sock.closed)
Thomas Woutersb2137042007-02-01 18:02:27 +000090 response.close()
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000091 self.assertTrue(sock.closed)
Thomas Woutersb2137042007-02-01 18:02:27 +000092
Thomas Wouters477c8d52006-05-27 19:21:47 +000093class OtherNetworkTests(unittest.TestCase):
94 def setUp(self):
95 if 0: # for debugging
96 import logging
97 logger = logging.getLogger("test_urllib2net")
98 logger.addHandler(logging.StreamHandler())
99
Thomas Wouters477c8d52006-05-27 19:21:47 +0000100 # XXX The rest of these tests aren't very good -- they don't check much.
101 # They do sometimes catch some major disasters, though.
102
103 def test_ftp(self):
104 urls = [
Gregory P. Smithc111d9f2007-09-09 23:55:55 +0000105 'ftp://ftp.kernel.org/pub/linux/kernel/README',
Mark Dickinson934896d2009-02-21 20:59:32 +0000106 'ftp://ftp.kernel.org/pub/linux/kernel/non-existent-file',
Gregory P. Smithc111d9f2007-09-09 23:55:55 +0000107 #'ftp://ftp.kernel.org/pub/leenox/kernel/test',
Thomas Wouters477c8d52006-05-27 19:21:47 +0000108 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC'
109 '/research-reports/00README-Legal-Rules-Regs',
110 ]
111 self._test_urls(urls, self._extra_handlers())
112
Thomas Wouters477c8d52006-05-27 19:21:47 +0000113 def test_file(self):
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000114 TESTFN = support.TESTFN
Thomas Wouters477c8d52006-05-27 19:21:47 +0000115 f = open(TESTFN, 'w')
116 try:
117 f.write('hi there\n')
118 f.close()
119 urls = [
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000120 'file:' + sanepathname2url(os.path.abspath(TESTFN)),
121 ('file:///nonsensename/etc/passwd', None,
122 urllib.error.URLError),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000123 ]
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000124 self._test_urls(urls, self._extra_handlers(), retry=True)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000125 finally:
126 os.remove(TESTFN)
127
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800128 self.assertRaises(ValueError, urllib.request.urlopen,'./relative_path/to/file')
129
Thomas Wouters477c8d52006-05-27 19:21:47 +0000130 # XXX Following test depends on machine configurations that are internal
131 # to CNRI. Need to set up a public server with the right authentication
132 # configuration for test purposes.
133
134## def test_cnri(self):
135## if socket.gethostname() == 'bitdiddle':
136## localhost = 'bitdiddle.cnri.reston.va.us'
137## elif socket.gethostname() == 'bitdiddle.concentric.net':
138## localhost = 'localhost'
139## else:
140## localhost = None
141## if localhost is not None:
142## urls = [
143## 'file://%s/etc/passwd' % localhost,
144## 'http://%s/simple/' % localhost,
145## 'http://%s/digest/' % localhost,
146## 'http://%s/not/found.h' % localhost,
147## ]
148
149## bauth = HTTPBasicAuthHandler()
150## bauth.add_password('basic_test_realm', localhost, 'jhylton',
151## 'password')
152## dauth = HTTPDigestAuthHandler()
153## dauth.add_password('digest_test_realm', localhost, 'jhylton',
154## 'password')
155
156## self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
157
Senthil Kumarand95cc752010-08-08 11:27:53 +0000158 def test_urlwithfrag(self):
159 urlwith_frag = "http://docs.python.org/glossary.html#glossary"
Georg Brandl5be365f2010-10-28 14:55:02 +0000160 with support.transient_internet(urlwith_frag):
161 req = urllib.request.Request(urlwith_frag)
162 res = urllib.request.urlopen(req)
163 self.assertEqual(res.geturl(),
Senthil Kumaran26430412011-04-13 07:01:19 +0800164 "http://docs.python.org/glossary.html#glossary")
Senthil Kumarand95cc752010-08-08 11:27:53 +0000165
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000166 def test_custom_headers(self):
167 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000168 with support.transient_internet(url):
169 opener = urllib.request.build_opener()
170 request = urllib.request.Request(url)
171 self.assertFalse(request.header_items())
172 opener.open(request)
173 self.assertTrue(request.header_items())
174 self.assertTrue(request.has_header('User-agent'))
175 request.add_header('User-Agent','Test-Agent')
176 opener.open(request)
177 self.assertEqual(request.get_header('User-agent'),'Test-Agent')
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000178
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800179 def test_sites_no_connection_close(self):
180 # Some sites do not send Connection: close header.
181 # Verify that those work properly. (#issue12576)
182
Senthil Kumarane324c572011-07-31 11:45:14 +0800183 URL = 'http://www.imdb.com' # mangles Connection:close
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800184
Senthil Kumarane324c572011-07-31 11:45:14 +0800185 with support.transient_internet(URL):
186 try:
187 with urllib.request.urlopen(URL) as res:
188 pass
189 except ValueError as e:
190 self.fail("urlopen failed for site not sending \
191 Connection:close")
192 else:
193 self.assertTrue(res)
194
195 req = urllib.request.urlopen(URL)
196 res = req.read()
197 self.assertTrue(res)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800198
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000199 def _test_urls(self, urls, handlers, retry=True):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000200 import time
201 import logging
202 debug = logging.getLogger("test_urllib2").debug
203
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000204 urlopen = urllib.request.build_opener(*handlers).open
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000205 if retry:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000206 urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000207
208 for url in urls:
209 if isinstance(url, tuple):
210 url, req, expected_err = url
211 else:
212 req = expected_err = None
Georg Brandl5be365f2010-10-28 14:55:02 +0000213
214 with support.transient_internet(url):
215 debug(url)
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +0000216 try:
Georg Brandl5be365f2010-10-28 14:55:02 +0000217 f = urlopen(url, req, TIMEOUT)
218 except EnvironmentError as err:
219 debug(err)
220 if expected_err:
221 msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" %
222 (expected_err, url, req, type(err), err))
223 self.assertIsInstance(err, expected_err, msg)
224 except urllib.error.URLError as err:
225 if isinstance(err[0], socket.timeout):
226 print("<timeout: %s>" % url, file=sys.stderr)
227 continue
228 else:
229 raise
230 else:
231 try:
232 with support.time_out, \
233 support.socket_peer_reset, \
234 support.ioerror_peer_reset:
235 buf = f.read()
236 debug("read %d bytes" % len(buf))
237 except socket.timeout:
238 print("<timeout: %s>" % url, file=sys.stderr)
239 f.close()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000240 debug("******** next url coming up...")
241 time.sleep(0.1)
242
243 def _extra_handlers(self):
244 handlers = []
245
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000246 cfh = urllib.request.CacheFTPHandler()
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200247 self.addCleanup(cfh.clear_cache)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000248 cfh.setTimeout(1)
249 handlers.append(cfh)
250
251 return handlers
252
Christian Heimesbbe741d2008-03-28 10:53:29 +0000253
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000254class TimeoutTest(unittest.TestCase):
255 def test_http_basic(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +0000256 self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandl5be365f2010-10-28 14:55:02 +0000257 url = "http://www.python.org"
258 with support.transient_internet(url, timeout=None):
259 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200260 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000261 self.assertTrue(u.fp.raw._sock.gettimeout() is None)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000262
Georg Brandlf78e02b2008-06-10 17:40:04 +0000263 def test_http_default_timeout(self):
264 self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandl5be365f2010-10-28 14:55:02 +0000265 url = "http://www.python.org"
266 with support.transient_internet(url):
267 socket.setdefaulttimeout(60)
268 try:
269 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200270 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000271 finally:
272 socket.setdefaulttimeout(None)
273 self.assertEqual(u.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000274
275 def test_http_no_timeout(self):
276 self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandl5be365f2010-10-28 14:55:02 +0000277 url = "http://www.python.org"
278 with support.transient_internet(url):
279 socket.setdefaulttimeout(60)
280 try:
281 u = _urlopen_with_retry(url, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200282 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000283 finally:
284 socket.setdefaulttimeout(None)
285 self.assertTrue(u.fp.raw._sock.gettimeout() is None)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000286
Georg Brandlf78e02b2008-06-10 17:40:04 +0000287 def test_http_timeout(self):
Georg Brandl5be365f2010-10-28 14:55:02 +0000288 url = "http://www.python.org"
289 with support.transient_internet(url):
290 u = _urlopen_with_retry(url, timeout=120)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200291 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000292 self.assertEqual(u.fp.raw._sock.gettimeout(), 120)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000293
Benjamin Peterson87cb7872010-04-11 21:59:57 +0000294 FTP_HOST = "ftp://ftp.mirror.nl/pub/gnu/"
Christian Heimes969fe572008-01-25 11:23:10 +0000295
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000296 def test_ftp_basic(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +0000297 self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandl5be365f2010-10-28 14:55:02 +0000298 with support.transient_internet(self.FTP_HOST, timeout=None):
299 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200300 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000301 self.assertTrue(u.fp.fp.raw._sock.gettimeout() is None)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000302
Georg Brandlf78e02b2008-06-10 17:40:04 +0000303 def test_ftp_default_timeout(self):
304 self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandl5be365f2010-10-28 14:55:02 +0000305 with support.transient_internet(self.FTP_HOST):
306 socket.setdefaulttimeout(60)
307 try:
308 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200309 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000310 finally:
311 socket.setdefaulttimeout(None)
312 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000313
314 def test_ftp_no_timeout(self):
315 self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandl5be365f2010-10-28 14:55:02 +0000316 with support.transient_internet(self.FTP_HOST):
317 socket.setdefaulttimeout(60)
318 try:
319 u = _urlopen_with_retry(self.FTP_HOST, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200320 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000321 finally:
322 socket.setdefaulttimeout(None)
323 self.assertTrue(u.fp.fp.raw._sock.gettimeout() is None)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000324
Georg Brandlf78e02b2008-06-10 17:40:04 +0000325 def test_ftp_timeout(self):
Georg Brandl5be365f2010-10-28 14:55:02 +0000326 with support.transient_internet(self.FTP_HOST):
327 u = _urlopen_with_retry(self.FTP_HOST, timeout=60)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200328 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000329 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000330
Thomas Wouters477c8d52006-05-27 19:21:47 +0000331
Antoine Pitroud5323212010-10-22 18:19:07 +0000332@unittest.skipUnless(ssl, "requires SSL support")
333class HTTPSTests(unittest.TestCase):
334
335 def test_sni(self):
Antoine Pitrou0eee1f52010-11-03 08:53:25 +0000336 self.skipTest("test disabled - test server needed")
Antoine Pitroud5323212010-10-22 18:19:07 +0000337 # Checks that Server Name Indication works, if supported by the
338 # OpenSSL linked to.
339 # The ssl module itself doesn't have server-side support for SNI,
340 # so we rely on a third-party test site.
341 expect_sni = ssl.HAS_SNI
Antoine Pitrou0eee1f52010-11-03 08:53:25 +0000342 with support.transient_internet("XXX"):
343 u = urllib.request.urlopen("XXX")
Antoine Pitroud5323212010-10-22 18:19:07 +0000344 contents = u.readall()
345 if expect_sni:
346 self.assertIn(b"Great", contents)
347 self.assertNotIn(b"Unfortunately", contents)
348 else:
349 self.assertNotIn(b"Great", contents)
350 self.assertIn(b"Unfortunately", contents)
351
352
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000353def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000354 support.requires("network")
355 support.run_unittest(AuthTests,
Antoine Pitroud5323212010-10-22 18:19:07 +0000356 HTTPSTests,
357 OtherNetworkTests,
358 CloseSocketTest,
359 TimeoutTest,
360 )
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000361
362if __name__ == "__main__":
363 test_main()