blob: 4981c2410bf87bf9e69ca6c5b0f338a12693b636 [file] [log] [blame]
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Thomas Wouters477c8d52006-05-27 19:21:47 +00003from test.test_urllib2 import sanepathname2url
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00004
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00005import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00007import urllib.error
8import urllib.request
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +00009import sys
Antoine Pitroud5323212010-10-22 18:19:07 +000010try:
11 import ssl
12except ImportError:
13 ssl = None
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +000014
15TIMEOUT = 60 # seconds
Jeremy Hylton5d9c3032004-08-07 17:40:50 +000016
Christian Heimes969fe572008-01-25 11:23:10 +000017
Georg Brandlc28e1fa2008-06-10 19:20:26 +000018def _retry_thrice(func, exc, *args, **kwargs):
Christian Heimes969fe572008-01-25 11:23:10 +000019 for i in range(3):
20 try:
Georg Brandlc28e1fa2008-06-10 19:20:26 +000021 return func(*args, **kwargs)
22 except exc as e:
Neal Norwitz2f142582008-01-26 19:49:41 +000023 last_exc = e
Christian Heimes969fe572008-01-25 11:23:10 +000024 continue
25 except:
26 raise
27 raise last_exc
28
Georg Brandlc28e1fa2008-06-10 19:20:26 +000029def _wrap_with_retry_thrice(func, exc):
30 def wrapped(*args, **kwargs):
31 return _retry_thrice(func, exc, *args, **kwargs)
32 return wrapped
33
34# Connecting to remote hosts is flaky. Make it more robust by retrying
35# the connection several times.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
37 urllib.error.URLError)
Christian Heimes969fe572008-01-25 11:23:10 +000038
Thomas Wouters477c8d52006-05-27 19:21:47 +000039
40class AuthTests(unittest.TestCase):
41 """Tests urllib2 authentication features."""
42
43## Disabled at the moment since there is no page under python.org which
44## could be used to HTTP authentication.
45#
46# def test_basic_auth(self):
Georg Brandl24420152008-05-26 16:32:26 +000047# import http.client
Thomas Wouters477c8d52006-05-27 19:21:47 +000048#
49# test_url = "http://www.python.org/test/test_urllib2/basic_auth"
50# test_hostport = "www.python.org"
51# test_realm = 'Test Realm'
52# test_user = 'test.test_urllib2net'
53# test_password = 'blah'
54#
55# # failure
56# try:
Christian Heimes969fe572008-01-25 11:23:10 +000057# _urlopen_with_retry(test_url)
Thomas Wouters477c8d52006-05-27 19:21:47 +000058# except urllib2.HTTPError, exc:
59# self.assertEqual(exc.code, 401)
60# else:
61# self.fail("urlopen() should have failed with 401")
62#
63# # success
64# auth_handler = urllib2.HTTPBasicAuthHandler()
65# auth_handler.add_password(test_realm, test_hostport,
66# test_user, test_password)
67# opener = urllib2.build_opener(auth_handler)
68# f = opener.open('http://localhost/')
Christian Heimes969fe572008-01-25 11:23:10 +000069# response = _urlopen_with_retry("http://www.python.org/")
Thomas Wouters477c8d52006-05-27 19:21:47 +000070#
71# # The 'userinfo' URL component is deprecated by RFC 3986 for security
72# # reasons, let's not implement it! (it's already implemented for proxy
73# # specification strings (that is, URLs or authorities specifying a
74# # proxy), so we must keep that)
Georg Brandl24420152008-05-26 16:32:26 +000075# self.assertRaises(http.client.InvalidURL,
Thomas Wouters477c8d52006-05-27 19:21:47 +000076# urllib2.urlopen, "http://evil:thing@example.com")
77
78
Thomas Woutersb2137042007-02-01 18:02:27 +000079class CloseSocketTest(unittest.TestCase):
80
81 def test_close(self):
Thomas Woutersb2137042007-02-01 18:02:27 +000082 # calling .close() on urllib2's response objects should close the
83 # underlying socket
Nadeem Vawda61baebd2012-01-25 08:02:05 +020084 url = "http://www.python.org/"
85 with support.transient_internet(url):
86 response = _urlopen_with_retry(url)
87 sock = response.fp
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +020088 self.assertFalse(sock.closed)
Nadeem Vawda61baebd2012-01-25 08:02:05 +020089 response.close()
90 self.assertTrue(sock.closed)
Thomas Woutersb2137042007-02-01 18:02:27 +000091
Thomas Wouters477c8d52006-05-27 19:21:47 +000092class OtherNetworkTests(unittest.TestCase):
93 def setUp(self):
94 if 0: # for debugging
95 import logging
96 logger = logging.getLogger("test_urllib2net")
97 logger.addHandler(logging.StreamHandler())
98
Thomas Wouters477c8d52006-05-27 19:21:47 +000099 # XXX The rest of these tests aren't very good -- they don't check much.
100 # They do sometimes catch some major disasters, though.
101
102 def test_ftp(self):
103 urls = [
Gregory P. Smithc111d9f2007-09-09 23:55:55 +0000104 'ftp://ftp.kernel.org/pub/linux/kernel/README',
Mark Dickinson934896d2009-02-21 20:59:32 +0000105 'ftp://ftp.kernel.org/pub/linux/kernel/non-existent-file',
Gregory P. Smithc111d9f2007-09-09 23:55:55 +0000106 #'ftp://ftp.kernel.org/pub/leenox/kernel/test',
Thomas Wouters477c8d52006-05-27 19:21:47 +0000107 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC'
108 '/research-reports/00README-Legal-Rules-Regs',
109 ]
110 self._test_urls(urls, self._extra_handlers())
111
Thomas Wouters477c8d52006-05-27 19:21:47 +0000112 def test_file(self):
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000113 TESTFN = support.TESTFN
Thomas Wouters477c8d52006-05-27 19:21:47 +0000114 f = open(TESTFN, 'w')
115 try:
116 f.write('hi there\n')
117 f.close()
118 urls = [
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000119 'file:' + sanepathname2url(os.path.abspath(TESTFN)),
120 ('file:///nonsensename/etc/passwd', None,
121 urllib.error.URLError),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000122 ]
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000123 self._test_urls(urls, self._extra_handlers(), retry=True)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000124 finally:
125 os.remove(TESTFN)
126
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800127 self.assertRaises(ValueError, urllib.request.urlopen,'./relative_path/to/file')
128
Thomas Wouters477c8d52006-05-27 19:21:47 +0000129 # XXX Following test depends on machine configurations that are internal
130 # to CNRI. Need to set up a public server with the right authentication
131 # configuration for test purposes.
132
133## def test_cnri(self):
134## if socket.gethostname() == 'bitdiddle':
135## localhost = 'bitdiddle.cnri.reston.va.us'
136## elif socket.gethostname() == 'bitdiddle.concentric.net':
137## localhost = 'localhost'
138## else:
139## localhost = None
140## if localhost is not None:
141## urls = [
142## 'file://%s/etc/passwd' % localhost,
143## 'http://%s/simple/' % localhost,
144## 'http://%s/digest/' % localhost,
145## 'http://%s/not/found.h' % localhost,
146## ]
147
148## bauth = HTTPBasicAuthHandler()
149## bauth.add_password('basic_test_realm', localhost, 'jhylton',
150## 'password')
151## dauth = HTTPDigestAuthHandler()
152## dauth.add_password('digest_test_realm', localhost, 'jhylton',
153## 'password')
154
155## self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
156
Senthil Kumarand95cc752010-08-08 11:27:53 +0000157 def test_urlwithfrag(self):
Georg Brandl579d5cd2012-10-28 10:51:35 +0100158 urlwith_frag = "http://docs.python.org/2/glossary.html#glossary"
Georg Brandl5be365f2010-10-28 14:55:02 +0000159 with support.transient_internet(urlwith_frag):
160 req = urllib.request.Request(urlwith_frag)
161 res = urllib.request.urlopen(req)
162 self.assertEqual(res.geturl(),
Georg Brandl579d5cd2012-10-28 10:51:35 +0100163 "http://docs.python.org/2/glossary.html#glossary")
Senthil Kumarand95cc752010-08-08 11:27:53 +0000164
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000165 def test_custom_headers(self):
166 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000167 with support.transient_internet(url):
168 opener = urllib.request.build_opener()
169 request = urllib.request.Request(url)
170 self.assertFalse(request.header_items())
171 opener.open(request)
172 self.assertTrue(request.header_items())
173 self.assertTrue(request.has_header('User-agent'))
174 request.add_header('User-Agent','Test-Agent')
175 opener.open(request)
176 self.assertEqual(request.get_header('User-agent'),'Test-Agent')
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000177
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800178 def test_sites_no_connection_close(self):
179 # Some sites do not send Connection: close header.
180 # Verify that those work properly. (#issue12576)
181
Senthil Kumarane324c572011-07-31 11:45:14 +0800182 URL = 'http://www.imdb.com' # mangles Connection:close
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800183
Senthil Kumarane324c572011-07-31 11:45:14 +0800184 with support.transient_internet(URL):
185 try:
186 with urllib.request.urlopen(URL) as res:
187 pass
188 except ValueError as e:
189 self.fail("urlopen failed for site not sending \
190 Connection:close")
191 else:
192 self.assertTrue(res)
193
194 req = urllib.request.urlopen(URL)
195 res = req.read()
196 self.assertTrue(res)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800197
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000198 def _test_urls(self, urls, handlers, retry=True):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 import time
200 import logging
201 debug = logging.getLogger("test_urllib2").debug
202
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000203 urlopen = urllib.request.build_opener(*handlers).open
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000204 if retry:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000205 urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206
207 for url in urls:
208 if isinstance(url, tuple):
209 url, req, expected_err = url
210 else:
211 req = expected_err = None
Georg Brandl5be365f2010-10-28 14:55:02 +0000212
213 with support.transient_internet(url):
214 debug(url)
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +0000215 try:
Georg Brandl5be365f2010-10-28 14:55:02 +0000216 f = urlopen(url, req, TIMEOUT)
217 except EnvironmentError as err:
218 debug(err)
219 if expected_err:
220 msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" %
221 (expected_err, url, req, type(err), err))
222 self.assertIsInstance(err, expected_err, msg)
223 except urllib.error.URLError as err:
224 if isinstance(err[0], socket.timeout):
225 print("<timeout: %s>" % url, file=sys.stderr)
226 continue
227 else:
228 raise
229 else:
230 try:
231 with support.time_out, \
232 support.socket_peer_reset, \
233 support.ioerror_peer_reset:
234 buf = f.read()
235 debug("read %d bytes" % len(buf))
236 except socket.timeout:
237 print("<timeout: %s>" % url, file=sys.stderr)
238 f.close()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000239 debug("******** next url coming up...")
240 time.sleep(0.1)
241
242 def _extra_handlers(self):
243 handlers = []
244
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000245 cfh = urllib.request.CacheFTPHandler()
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200246 self.addCleanup(cfh.clear_cache)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000247 cfh.setTimeout(1)
248 handlers.append(cfh)
249
250 return handlers
251
Christian Heimesbbe741d2008-03-28 10:53:29 +0000252
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000253class TimeoutTest(unittest.TestCase):
254 def test_http_basic(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200255 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000256 url = "http://www.python.org"
257 with support.transient_internet(url, timeout=None):
258 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200259 self.addCleanup(u.close)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200260 self.assertIsNone(u.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000261
Georg Brandlf78e02b2008-06-10 17:40:04 +0000262 def test_http_default_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200263 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000264 url = "http://www.python.org"
265 with support.transient_internet(url):
266 socket.setdefaulttimeout(60)
267 try:
268 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200269 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000270 finally:
271 socket.setdefaulttimeout(None)
272 self.assertEqual(u.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000273
274 def test_http_no_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200275 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000276 url = "http://www.python.org"
277 with support.transient_internet(url):
278 socket.setdefaulttimeout(60)
279 try:
280 u = _urlopen_with_retry(url, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200281 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000282 finally:
283 socket.setdefaulttimeout(None)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200284 self.assertIsNone(u.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000285
Georg Brandlf78e02b2008-06-10 17:40:04 +0000286 def test_http_timeout(self):
Georg Brandl5be365f2010-10-28 14:55:02 +0000287 url = "http://www.python.org"
288 with support.transient_internet(url):
289 u = _urlopen_with_retry(url, timeout=120)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200290 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000291 self.assertEqual(u.fp.raw._sock.gettimeout(), 120)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000292
Benjamin Peterson87cb7872010-04-11 21:59:57 +0000293 FTP_HOST = "ftp://ftp.mirror.nl/pub/gnu/"
Christian Heimes969fe572008-01-25 11:23:10 +0000294
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000295 def test_ftp_basic(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200296 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000297 with support.transient_internet(self.FTP_HOST, timeout=None):
298 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200299 self.addCleanup(u.close)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200300 self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000301
Georg Brandlf78e02b2008-06-10 17:40:04 +0000302 def test_ftp_default_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200303 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000304 with support.transient_internet(self.FTP_HOST):
305 socket.setdefaulttimeout(60)
306 try:
307 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200308 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000309 finally:
310 socket.setdefaulttimeout(None)
311 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000312
313 def test_ftp_no_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200314 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000315 with support.transient_internet(self.FTP_HOST):
316 socket.setdefaulttimeout(60)
317 try:
318 u = _urlopen_with_retry(self.FTP_HOST, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200319 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000320 finally:
321 socket.setdefaulttimeout(None)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200322 self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000323
Georg Brandlf78e02b2008-06-10 17:40:04 +0000324 def test_ftp_timeout(self):
Georg Brandl5be365f2010-10-28 14:55:02 +0000325 with support.transient_internet(self.FTP_HOST):
326 u = _urlopen_with_retry(self.FTP_HOST, timeout=60)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200327 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000328 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000329
Thomas Wouters477c8d52006-05-27 19:21:47 +0000330
Antoine Pitroud5323212010-10-22 18:19:07 +0000331@unittest.skipUnless(ssl, "requires SSL support")
332class HTTPSTests(unittest.TestCase):
333
334 def test_sni(self):
Antoine Pitrou0eee1f52010-11-03 08:53:25 +0000335 self.skipTest("test disabled - test server needed")
Antoine Pitroud5323212010-10-22 18:19:07 +0000336 # Checks that Server Name Indication works, if supported by the
337 # OpenSSL linked to.
338 # The ssl module itself doesn't have server-side support for SNI,
339 # so we rely on a third-party test site.
340 expect_sni = ssl.HAS_SNI
Antoine Pitrou0eee1f52010-11-03 08:53:25 +0000341 with support.transient_internet("XXX"):
342 u = urllib.request.urlopen("XXX")
Antoine Pitroud5323212010-10-22 18:19:07 +0000343 contents = u.readall()
344 if expect_sni:
345 self.assertIn(b"Great", contents)
346 self.assertNotIn(b"Unfortunately", contents)
347 else:
348 self.assertNotIn(b"Great", contents)
349 self.assertIn(b"Unfortunately", contents)
350
351
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000352def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000353 support.requires("network")
354 support.run_unittest(AuthTests,
Antoine Pitroud5323212010-10-22 18:19:07 +0000355 HTTPSTests,
356 OtherNetworkTests,
357 CloseSocketTest,
358 TimeoutTest,
359 )
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000360
361if __name__ == "__main__":
362 test_main()