blob: 6f78cea1b79f034100d8e42376a1297163582bea [file] [log] [blame]
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Thomas Wouters477c8d52006-05-27 19:21:47 +00003from test.test_urllib2 import sanepathname2url
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00004
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00005import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00007import urllib.error
8import urllib.request
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +00009import sys
Berker Peksagb77983d2014-10-10 14:34:16 +030010
Antoine Pitroud5323212010-10-22 18:19:07 +000011try:
12 import ssl
13except ImportError:
14 ssl = None
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +000015
Berker Peksagb77983d2014-10-10 14:34:16 +030016requires_ssl = unittest.skipIf(ssl is None, "SSL not supported")
17
doko@ubuntu.come5751482013-12-26 17:37:11 +010018support.requires("network")
19
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +000020TIMEOUT = 60 # seconds
Jeremy Hylton5d9c3032004-08-07 17:40:50 +000021
Christian Heimes969fe572008-01-25 11:23:10 +000022
Georg Brandlc28e1fa2008-06-10 19:20:26 +000023def _retry_thrice(func, exc, *args, **kwargs):
Christian Heimes969fe572008-01-25 11:23:10 +000024 for i in range(3):
25 try:
Georg Brandlc28e1fa2008-06-10 19:20:26 +000026 return func(*args, **kwargs)
27 except exc as e:
Neal Norwitz2f142582008-01-26 19:49:41 +000028 last_exc = e
Christian Heimes969fe572008-01-25 11:23:10 +000029 continue
30 except:
31 raise
32 raise last_exc
33
Georg Brandlc28e1fa2008-06-10 19:20:26 +000034def _wrap_with_retry_thrice(func, exc):
35 def wrapped(*args, **kwargs):
36 return _retry_thrice(func, exc, *args, **kwargs)
37 return wrapped
38
39# Connecting to remote hosts is flaky. Make it more robust by retrying
40# the connection several times.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
42 urllib.error.URLError)
Christian Heimes969fe572008-01-25 11:23:10 +000043
Thomas Wouters477c8d52006-05-27 19:21:47 +000044
45class AuthTests(unittest.TestCase):
46 """Tests urllib2 authentication features."""
47
48## Disabled at the moment since there is no page under python.org which
49## could be used to HTTP authentication.
50#
51# def test_basic_auth(self):
Georg Brandl24420152008-05-26 16:32:26 +000052# import http.client
Thomas Wouters477c8d52006-05-27 19:21:47 +000053#
54# test_url = "http://www.python.org/test/test_urllib2/basic_auth"
55# test_hostport = "www.python.org"
56# test_realm = 'Test Realm'
57# test_user = 'test.test_urllib2net'
58# test_password = 'blah'
59#
60# # failure
61# try:
Christian Heimes969fe572008-01-25 11:23:10 +000062# _urlopen_with_retry(test_url)
Thomas Wouters477c8d52006-05-27 19:21:47 +000063# except urllib2.HTTPError, exc:
64# self.assertEqual(exc.code, 401)
65# else:
66# self.fail("urlopen() should have failed with 401")
67#
68# # success
69# auth_handler = urllib2.HTTPBasicAuthHandler()
70# auth_handler.add_password(test_realm, test_hostport,
71# test_user, test_password)
72# opener = urllib2.build_opener(auth_handler)
73# f = opener.open('http://localhost/')
Christian Heimes969fe572008-01-25 11:23:10 +000074# response = _urlopen_with_retry("http://www.python.org/")
Thomas Wouters477c8d52006-05-27 19:21:47 +000075#
76# # The 'userinfo' URL component is deprecated by RFC 3986 for security
77# # reasons, let's not implement it! (it's already implemented for proxy
78# # specification strings (that is, URLs or authorities specifying a
79# # proxy), so we must keep that)
Georg Brandl24420152008-05-26 16:32:26 +000080# self.assertRaises(http.client.InvalidURL,
Thomas Wouters477c8d52006-05-27 19:21:47 +000081# urllib2.urlopen, "http://evil:thing@example.com")
82
83
Thomas Woutersb2137042007-02-01 18:02:27 +000084class CloseSocketTest(unittest.TestCase):
85
86 def test_close(self):
Thomas Woutersb2137042007-02-01 18:02:27 +000087 # calling .close() on urllib2's response objects should close the
88 # underlying socket
Ned Deily5a507f02014-03-26 23:31:39 -070089 url = "http://www.example.com/"
Nadeem Vawda61baebd2012-01-25 08:02:05 +020090 with support.transient_internet(url):
91 response = _urlopen_with_retry(url)
92 sock = response.fp
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +020093 self.assertFalse(sock.closed)
Nadeem Vawda61baebd2012-01-25 08:02:05 +020094 response.close()
95 self.assertTrue(sock.closed)
Thomas Woutersb2137042007-02-01 18:02:27 +000096
Thomas Wouters477c8d52006-05-27 19:21:47 +000097class OtherNetworkTests(unittest.TestCase):
98 def setUp(self):
99 if 0: # for debugging
100 import logging
101 logger = logging.getLogger("test_urllib2net")
102 logger.addHandler(logging.StreamHandler())
103
Thomas Wouters477c8d52006-05-27 19:21:47 +0000104 # XXX The rest of these tests aren't very good -- they don't check much.
105 # They do sometimes catch some major disasters, though.
106
107 def test_ftp(self):
108 urls = [
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200109 'ftp://ftp.debian.org/debian/README',
110 ('ftp://ftp.debian.org/debian/non-existent-file',
111 None, urllib.error.URLError),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000112 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC'
113 '/research-reports/00README-Legal-Rules-Regs',
114 ]
115 self._test_urls(urls, self._extra_handlers())
116
Thomas Wouters477c8d52006-05-27 19:21:47 +0000117 def test_file(self):
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000118 TESTFN = support.TESTFN
Thomas Wouters477c8d52006-05-27 19:21:47 +0000119 f = open(TESTFN, 'w')
120 try:
121 f.write('hi there\n')
122 f.close()
123 urls = [
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000124 'file:' + sanepathname2url(os.path.abspath(TESTFN)),
125 ('file:///nonsensename/etc/passwd', None,
126 urllib.error.URLError),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000127 ]
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000128 self._test_urls(urls, self._extra_handlers(), retry=True)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000129 finally:
130 os.remove(TESTFN)
131
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800132 self.assertRaises(ValueError, urllib.request.urlopen,'./relative_path/to/file')
133
Thomas Wouters477c8d52006-05-27 19:21:47 +0000134 # XXX Following test depends on machine configurations that are internal
135 # to CNRI. Need to set up a public server with the right authentication
136 # configuration for test purposes.
137
138## def test_cnri(self):
139## if socket.gethostname() == 'bitdiddle':
140## localhost = 'bitdiddle.cnri.reston.va.us'
141## elif socket.gethostname() == 'bitdiddle.concentric.net':
142## localhost = 'localhost'
143## else:
144## localhost = None
145## if localhost is not None:
146## urls = [
147## 'file://%s/etc/passwd' % localhost,
148## 'http://%s/simple/' % localhost,
149## 'http://%s/digest/' % localhost,
150## 'http://%s/not/found.h' % localhost,
151## ]
152
153## bauth = HTTPBasicAuthHandler()
154## bauth.add_password('basic_test_realm', localhost, 'jhylton',
155## 'password')
156## dauth = HTTPDigestAuthHandler()
157## dauth.add_password('digest_test_realm', localhost, 'jhylton',
158## 'password')
159
160## self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
161
Berker Peksagb77983d2014-10-10 14:34:16 +0300162 @requires_ssl
Senthil Kumarand95cc752010-08-08 11:27:53 +0000163 def test_urlwithfrag(self):
Benjamin Petersonb4be3762014-03-31 13:44:53 -0400164 urlwith_frag = "https://docs.python.org/2/glossary.html#glossary"
Georg Brandl5be365f2010-10-28 14:55:02 +0000165 with support.transient_internet(urlwith_frag):
166 req = urllib.request.Request(urlwith_frag)
167 res = urllib.request.urlopen(req)
168 self.assertEqual(res.geturl(),
Benjamin Petersonb4be3762014-03-31 13:44:53 -0400169 "https://docs.python.org/2/glossary.html#glossary")
Senthil Kumarand95cc752010-08-08 11:27:53 +0000170
Berker Peksagb77983d2014-10-10 14:34:16 +0300171 @requires_ssl
Senthil Kumaran83070752013-05-24 09:14:12 -0700172 def test_redirect_url_withfrag(self):
Benjamin Peterson809ee902014-03-31 13:50:34 -0400173 redirect_url_with_frag = "http://bit.ly/1iSHToT"
Senthil Kumaran83070752013-05-24 09:14:12 -0700174 with support.transient_internet(redirect_url_with_frag):
175 req = urllib.request.Request(redirect_url_with_frag)
176 res = urllib.request.urlopen(req)
177 self.assertEqual(res.geturl(),
Benjamin Peterson809ee902014-03-31 13:50:34 -0400178 "https://docs.python.org/3.4/glossary.html#term-global-interpreter-lock")
Senthil Kumaran83070752013-05-24 09:14:12 -0700179
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000180 def test_custom_headers(self):
181 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000182 with support.transient_internet(url):
183 opener = urllib.request.build_opener()
184 request = urllib.request.Request(url)
185 self.assertFalse(request.header_items())
186 opener.open(request)
187 self.assertTrue(request.header_items())
188 self.assertTrue(request.has_header('User-agent'))
189 request.add_header('User-Agent','Test-Agent')
190 opener.open(request)
191 self.assertEqual(request.get_header('User-agent'),'Test-Agent')
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000192
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800193 def test_sites_no_connection_close(self):
194 # Some sites do not send Connection: close header.
195 # Verify that those work properly. (#issue12576)
196
Senthil Kumarane324c572011-07-31 11:45:14 +0800197 URL = 'http://www.imdb.com' # mangles Connection:close
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800198
Senthil Kumarane324c572011-07-31 11:45:14 +0800199 with support.transient_internet(URL):
200 try:
201 with urllib.request.urlopen(URL) as res:
202 pass
203 except ValueError as e:
204 self.fail("urlopen failed for site not sending \
205 Connection:close")
206 else:
207 self.assertTrue(res)
208
209 req = urllib.request.urlopen(URL)
210 res = req.read()
211 self.assertTrue(res)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800212
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000213 def _test_urls(self, urls, handlers, retry=True):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 import time
215 import logging
216 debug = logging.getLogger("test_urllib2").debug
217
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000218 urlopen = urllib.request.build_opener(*handlers).open
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000219 if retry:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000220 urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221
222 for url in urls:
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200223 with self.subTest(url=url):
224 if isinstance(url, tuple):
225 url, req, expected_err = url
Georg Brandl5be365f2010-10-28 14:55:02 +0000226 else:
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200227 req = expected_err = None
228
229 with support.transient_internet(url):
Georg Brandl5be365f2010-10-28 14:55:02 +0000230 try:
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200231 f = urlopen(url, req, TIMEOUT)
Berker Peksag8b63d3a2014-10-25 05:42:30 +0300232 # urllib.error.URLError is a subclass of OSError
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200233 except OSError as err:
234 if expected_err:
235 msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" %
236 (expected_err, url, req, type(err), err))
237 self.assertIsInstance(err, expected_err, msg)
238 else:
239 raise
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200240 else:
241 try:
242 with support.time_out, \
243 support.socket_peer_reset, \
244 support.ioerror_peer_reset:
245 buf = f.read()
246 debug("read %d bytes" % len(buf))
247 except socket.timeout:
248 print("<timeout: %s>" % url, file=sys.stderr)
249 f.close()
250 time.sleep(0.1)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000251
252 def _extra_handlers(self):
253 handlers = []
254
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000255 cfh = urllib.request.CacheFTPHandler()
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200256 self.addCleanup(cfh.clear_cache)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000257 cfh.setTimeout(1)
258 handlers.append(cfh)
259
260 return handlers
261
Christian Heimesbbe741d2008-03-28 10:53:29 +0000262
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000263class TimeoutTest(unittest.TestCase):
264 def test_http_basic(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200265 self.assertIsNone(socket.getdefaulttimeout())
Ned Deily5a507f02014-03-26 23:31:39 -0700266 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000267 with support.transient_internet(url, timeout=None):
268 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200269 self.addCleanup(u.close)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200270 self.assertIsNone(u.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000271
Georg Brandlf78e02b2008-06-10 17:40:04 +0000272 def test_http_default_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200273 self.assertIsNone(socket.getdefaulttimeout())
Ned Deily5a507f02014-03-26 23:31:39 -0700274 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000275 with support.transient_internet(url):
276 socket.setdefaulttimeout(60)
277 try:
278 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200279 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000280 finally:
281 socket.setdefaulttimeout(None)
282 self.assertEqual(u.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000283
284 def test_http_no_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200285 self.assertIsNone(socket.getdefaulttimeout())
Ned Deily5a507f02014-03-26 23:31:39 -0700286 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000287 with support.transient_internet(url):
288 socket.setdefaulttimeout(60)
289 try:
290 u = _urlopen_with_retry(url, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200291 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000292 finally:
293 socket.setdefaulttimeout(None)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200294 self.assertIsNone(u.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000295
Georg Brandlf78e02b2008-06-10 17:40:04 +0000296 def test_http_timeout(self):
Ned Deily5a507f02014-03-26 23:31:39 -0700297 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000298 with support.transient_internet(url):
299 u = _urlopen_with_retry(url, timeout=120)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200300 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000301 self.assertEqual(u.fp.raw._sock.gettimeout(), 120)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000302
Benjamin Peterson87cb7872010-04-11 21:59:57 +0000303 FTP_HOST = "ftp://ftp.mirror.nl/pub/gnu/"
Christian Heimes969fe572008-01-25 11:23:10 +0000304
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000305 def test_ftp_basic(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200306 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000307 with support.transient_internet(self.FTP_HOST, timeout=None):
308 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200309 self.addCleanup(u.close)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200310 self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000311
Georg Brandlf78e02b2008-06-10 17:40:04 +0000312 def test_ftp_default_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200313 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000314 with support.transient_internet(self.FTP_HOST):
315 socket.setdefaulttimeout(60)
316 try:
317 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200318 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000319 finally:
320 socket.setdefaulttimeout(None)
321 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000322
323 def test_ftp_no_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200324 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000325 with support.transient_internet(self.FTP_HOST):
326 socket.setdefaulttimeout(60)
327 try:
328 u = _urlopen_with_retry(self.FTP_HOST, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200329 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000330 finally:
331 socket.setdefaulttimeout(None)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200332 self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000333
Georg Brandlf78e02b2008-06-10 17:40:04 +0000334 def test_ftp_timeout(self):
Georg Brandl5be365f2010-10-28 14:55:02 +0000335 with support.transient_internet(self.FTP_HOST):
336 u = _urlopen_with_retry(self.FTP_HOST, timeout=60)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200337 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000338 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000339
Thomas Wouters477c8d52006-05-27 19:21:47 +0000340
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000341if __name__ == "__main__":
Brett Cannon3e9a9ae2013-06-12 21:25:59 -0400342 unittest.main()