blob: fc2140809ed55ac18efdfef929d5de6a5d9bf07b [file] [log] [blame]
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Thomas Wouters477c8d52006-05-27 19:21:47 +00003from test.test_urllib2 import sanepathname2url
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00004
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00005import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00007import urllib.error
8import urllib.request
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +00009import sys
Berker Peksagb77983d2014-10-10 14:34:16 +030010
doko@ubuntu.come5751482013-12-26 17:37:11 +010011support.requires("network")
12
Senthil Kumaranb8f7ea62010-04-20 10:35:49 +000013TIMEOUT = 60 # seconds
Jeremy Hylton5d9c3032004-08-07 17:40:50 +000014
Christian Heimes969fe572008-01-25 11:23:10 +000015
Georg Brandlc28e1fa2008-06-10 19:20:26 +000016def _retry_thrice(func, exc, *args, **kwargs):
Christian Heimes969fe572008-01-25 11:23:10 +000017 for i in range(3):
18 try:
Georg Brandlc28e1fa2008-06-10 19:20:26 +000019 return func(*args, **kwargs)
20 except exc as e:
Neal Norwitz2f142582008-01-26 19:49:41 +000021 last_exc = e
Christian Heimes969fe572008-01-25 11:23:10 +000022 continue
23 except:
24 raise
25 raise last_exc
26
Georg Brandlc28e1fa2008-06-10 19:20:26 +000027def _wrap_with_retry_thrice(func, exc):
28 def wrapped(*args, **kwargs):
29 return _retry_thrice(func, exc, *args, **kwargs)
30 return wrapped
31
32# Connecting to remote hosts is flaky. Make it more robust by retrying
33# the connection several times.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000034_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
35 urllib.error.URLError)
Christian Heimes969fe572008-01-25 11:23:10 +000036
Thomas Wouters477c8d52006-05-27 19:21:47 +000037
38class AuthTests(unittest.TestCase):
39 """Tests urllib2 authentication features."""
40
41## Disabled at the moment since there is no page under python.org which
42## could be used to HTTP authentication.
43#
44# def test_basic_auth(self):
Georg Brandl24420152008-05-26 16:32:26 +000045# import http.client
Thomas Wouters477c8d52006-05-27 19:21:47 +000046#
47# test_url = "http://www.python.org/test/test_urllib2/basic_auth"
48# test_hostport = "www.python.org"
49# test_realm = 'Test Realm'
50# test_user = 'test.test_urllib2net'
51# test_password = 'blah'
52#
53# # failure
54# try:
Christian Heimes969fe572008-01-25 11:23:10 +000055# _urlopen_with_retry(test_url)
Thomas Wouters477c8d52006-05-27 19:21:47 +000056# except urllib2.HTTPError, exc:
57# self.assertEqual(exc.code, 401)
58# else:
59# self.fail("urlopen() should have failed with 401")
60#
61# # success
62# auth_handler = urllib2.HTTPBasicAuthHandler()
63# auth_handler.add_password(test_realm, test_hostport,
64# test_user, test_password)
65# opener = urllib2.build_opener(auth_handler)
66# f = opener.open('http://localhost/')
Christian Heimes969fe572008-01-25 11:23:10 +000067# response = _urlopen_with_retry("http://www.python.org/")
Thomas Wouters477c8d52006-05-27 19:21:47 +000068#
69# # The 'userinfo' URL component is deprecated by RFC 3986 for security
70# # reasons, let's not implement it! (it's already implemented for proxy
71# # specification strings (that is, URLs or authorities specifying a
72# # proxy), so we must keep that)
Georg Brandl24420152008-05-26 16:32:26 +000073# self.assertRaises(http.client.InvalidURL,
Thomas Wouters477c8d52006-05-27 19:21:47 +000074# urllib2.urlopen, "http://evil:thing@example.com")
75
76
Thomas Woutersb2137042007-02-01 18:02:27 +000077class CloseSocketTest(unittest.TestCase):
78
79 def test_close(self):
Thomas Woutersb2137042007-02-01 18:02:27 +000080 # calling .close() on urllib2's response objects should close the
81 # underlying socket
Ned Deily5a507f02014-03-26 23:31:39 -070082 url = "http://www.example.com/"
Nadeem Vawda61baebd2012-01-25 08:02:05 +020083 with support.transient_internet(url):
84 response = _urlopen_with_retry(url)
85 sock = response.fp
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +020086 self.assertFalse(sock.closed)
Nadeem Vawda61baebd2012-01-25 08:02:05 +020087 response.close()
88 self.assertTrue(sock.closed)
Thomas Woutersb2137042007-02-01 18:02:27 +000089
Thomas Wouters477c8d52006-05-27 19:21:47 +000090class OtherNetworkTests(unittest.TestCase):
91 def setUp(self):
92 if 0: # for debugging
93 import logging
94 logger = logging.getLogger("test_urllib2net")
95 logger.addHandler(logging.StreamHandler())
96
Thomas Wouters477c8d52006-05-27 19:21:47 +000097 # XXX The rest of these tests aren't very good -- they don't check much.
98 # They do sometimes catch some major disasters, though.
99
100 def test_ftp(self):
101 urls = [
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200102 'ftp://ftp.debian.org/debian/README',
103 ('ftp://ftp.debian.org/debian/non-existent-file',
104 None, urllib.error.URLError),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000105 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC'
106 '/research-reports/00README-Legal-Rules-Regs',
107 ]
108 self._test_urls(urls, self._extra_handlers())
109
Thomas Wouters477c8d52006-05-27 19:21:47 +0000110 def test_file(self):
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000111 TESTFN = support.TESTFN
Thomas Wouters477c8d52006-05-27 19:21:47 +0000112 f = open(TESTFN, 'w')
113 try:
114 f.write('hi there\n')
115 f.close()
116 urls = [
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117 'file:' + sanepathname2url(os.path.abspath(TESTFN)),
118 ('file:///nonsensename/etc/passwd', None,
119 urllib.error.URLError),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000120 ]
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000121 self._test_urls(urls, self._extra_handlers(), retry=True)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000122 finally:
123 os.remove(TESTFN)
124
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800125 self.assertRaises(ValueError, urllib.request.urlopen,'./relative_path/to/file')
126
Thomas Wouters477c8d52006-05-27 19:21:47 +0000127 # XXX Following test depends on machine configurations that are internal
128 # to CNRI. Need to set up a public server with the right authentication
129 # configuration for test purposes.
130
131## def test_cnri(self):
132## if socket.gethostname() == 'bitdiddle':
133## localhost = 'bitdiddle.cnri.reston.va.us'
134## elif socket.gethostname() == 'bitdiddle.concentric.net':
135## localhost = 'localhost'
136## else:
137## localhost = None
138## if localhost is not None:
139## urls = [
140## 'file://%s/etc/passwd' % localhost,
141## 'http://%s/simple/' % localhost,
142## 'http://%s/digest/' % localhost,
143## 'http://%s/not/found.h' % localhost,
144## ]
145
146## bauth = HTTPBasicAuthHandler()
147## bauth.add_password('basic_test_realm', localhost, 'jhylton',
148## 'password')
149## dauth = HTTPDigestAuthHandler()
150## dauth.add_password('digest_test_realm', localhost, 'jhylton',
151## 'password')
152
153## self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
154
Senthil Kumarand95cc752010-08-08 11:27:53 +0000155 def test_urlwithfrag(self):
Benjamin Peterson258f3f02014-11-05 11:27:14 -0500156 urlwith_frag = "http://www.pythontest.net/index.html#frag"
Georg Brandl5be365f2010-10-28 14:55:02 +0000157 with support.transient_internet(urlwith_frag):
158 req = urllib.request.Request(urlwith_frag)
159 res = urllib.request.urlopen(req)
160 self.assertEqual(res.geturl(),
Benjamin Peterson258f3f02014-11-05 11:27:14 -0500161 "http://www.pythontest.net/index.html#frag")
Senthil Kumarand95cc752010-08-08 11:27:53 +0000162
Senthil Kumaran83070752013-05-24 09:14:12 -0700163 def test_redirect_url_withfrag(self):
Benjamin Petersonb811a972014-11-05 13:10:08 -0500164 redirect_url_with_frag = "http://www.pythontest.net/redir/with_frag/"
Senthil Kumaran83070752013-05-24 09:14:12 -0700165 with support.transient_internet(redirect_url_with_frag):
166 req = urllib.request.Request(redirect_url_with_frag)
167 res = urllib.request.urlopen(req)
168 self.assertEqual(res.geturl(),
Benjamin Petersonb811a972014-11-05 13:10:08 -0500169 "http://www.pythontest.net/elsewhere/#frag")
Senthil Kumaran83070752013-05-24 09:14:12 -0700170
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000171 def test_custom_headers(self):
172 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000173 with support.transient_internet(url):
174 opener = urllib.request.build_opener()
175 request = urllib.request.Request(url)
176 self.assertFalse(request.header_items())
177 opener.open(request)
178 self.assertTrue(request.header_items())
179 self.assertTrue(request.has_header('User-agent'))
180 request.add_header('User-Agent','Test-Agent')
181 opener.open(request)
182 self.assertEqual(request.get_header('User-agent'),'Test-Agent')
Senthil Kumaran42ef4b12010-09-27 01:26:03 +0000183
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800184 def test_sites_no_connection_close(self):
185 # Some sites do not send Connection: close header.
186 # Verify that those work properly. (#issue12576)
187
Senthil Kumarane324c572011-07-31 11:45:14 +0800188 URL = 'http://www.imdb.com' # mangles Connection:close
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800189
Senthil Kumarane324c572011-07-31 11:45:14 +0800190 with support.transient_internet(URL):
191 try:
192 with urllib.request.urlopen(URL) as res:
193 pass
194 except ValueError as e:
195 self.fail("urlopen failed for site not sending \
196 Connection:close")
197 else:
198 self.assertTrue(res)
199
200 req = urllib.request.urlopen(URL)
201 res = req.read()
202 self.assertTrue(res)
Senthil Kumaran1299a8f2011-07-27 08:05:58 +0800203
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000204 def _test_urls(self, urls, handlers, retry=True):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 import time
206 import logging
207 debug = logging.getLogger("test_urllib2").debug
208
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000209 urlopen = urllib.request.build_opener(*handlers).open
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000210 if retry:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000211 urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
213 for url in urls:
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200214 with self.subTest(url=url):
215 if isinstance(url, tuple):
216 url, req, expected_err = url
Georg Brandl5be365f2010-10-28 14:55:02 +0000217 else:
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200218 req = expected_err = None
219
220 with support.transient_internet(url):
Georg Brandl5be365f2010-10-28 14:55:02 +0000221 try:
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200222 f = urlopen(url, req, TIMEOUT)
Berker Peksag8b63d3a2014-10-25 05:42:30 +0300223 # urllib.error.URLError is a subclass of OSError
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200224 except OSError as err:
225 if expected_err:
226 msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" %
227 (expected_err, url, req, type(err), err))
228 self.assertIsInstance(err, expected_err, msg)
229 else:
230 raise
Antoine Pitroubc2c4c92014-09-17 00:39:21 +0200231 else:
232 try:
233 with support.time_out, \
234 support.socket_peer_reset, \
235 support.ioerror_peer_reset:
236 buf = f.read()
237 debug("read %d bytes" % len(buf))
238 except socket.timeout:
239 print("<timeout: %s>" % url, file=sys.stderr)
240 f.close()
241 time.sleep(0.1)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000242
243 def _extra_handlers(self):
244 handlers = []
245
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000246 cfh = urllib.request.CacheFTPHandler()
Nadeem Vawda08f5f7a2011-07-23 14:03:00 +0200247 self.addCleanup(cfh.clear_cache)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000248 cfh.setTimeout(1)
249 handlers.append(cfh)
250
251 return handlers
252
Christian Heimesbbe741d2008-03-28 10:53:29 +0000253
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000254class TimeoutTest(unittest.TestCase):
255 def test_http_basic(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200256 self.assertIsNone(socket.getdefaulttimeout())
Ned Deily5a507f02014-03-26 23:31:39 -0700257 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000258 with support.transient_internet(url, timeout=None):
259 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200260 self.addCleanup(u.close)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200261 self.assertIsNone(u.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000262
Georg Brandlf78e02b2008-06-10 17:40:04 +0000263 def test_http_default_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200264 self.assertIsNone(socket.getdefaulttimeout())
Ned Deily5a507f02014-03-26 23:31:39 -0700265 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000266 with support.transient_internet(url):
267 socket.setdefaulttimeout(60)
268 try:
269 u = _urlopen_with_retry(url)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200270 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000271 finally:
272 socket.setdefaulttimeout(None)
273 self.assertEqual(u.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000274
275 def test_http_no_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200276 self.assertIsNone(socket.getdefaulttimeout())
Ned Deily5a507f02014-03-26 23:31:39 -0700277 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000278 with support.transient_internet(url):
279 socket.setdefaulttimeout(60)
280 try:
281 u = _urlopen_with_retry(url, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200282 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000283 finally:
284 socket.setdefaulttimeout(None)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200285 self.assertIsNone(u.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000286
Georg Brandlf78e02b2008-06-10 17:40:04 +0000287 def test_http_timeout(self):
Ned Deily5a507f02014-03-26 23:31:39 -0700288 url = "http://www.example.com"
Georg Brandl5be365f2010-10-28 14:55:02 +0000289 with support.transient_internet(url):
290 u = _urlopen_with_retry(url, timeout=120)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200291 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000292 self.assertEqual(u.fp.raw._sock.gettimeout(), 120)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000293
Benjamin Peterson87cb7872010-04-11 21:59:57 +0000294 FTP_HOST = "ftp://ftp.mirror.nl/pub/gnu/"
Christian Heimes969fe572008-01-25 11:23:10 +0000295
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000296 def test_ftp_basic(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200297 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000298 with support.transient_internet(self.FTP_HOST, timeout=None):
299 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200300 self.addCleanup(u.close)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200301 self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000302
Georg Brandlf78e02b2008-06-10 17:40:04 +0000303 def test_ftp_default_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200304 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000305 with support.transient_internet(self.FTP_HOST):
306 socket.setdefaulttimeout(60)
307 try:
308 u = _urlopen_with_retry(self.FTP_HOST)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200309 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000310 finally:
311 socket.setdefaulttimeout(None)
312 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Georg Brandlf78e02b2008-06-10 17:40:04 +0000313
314 def test_ftp_no_timeout(self):
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200315 self.assertIsNone(socket.getdefaulttimeout())
Georg Brandl5be365f2010-10-28 14:55:02 +0000316 with support.transient_internet(self.FTP_HOST):
317 socket.setdefaulttimeout(60)
318 try:
319 u = _urlopen_with_retry(self.FTP_HOST, timeout=None)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200320 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000321 finally:
322 socket.setdefaulttimeout(None)
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +0200323 self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000324
Georg Brandlf78e02b2008-06-10 17:40:04 +0000325 def test_ftp_timeout(self):
Georg Brandl5be365f2010-10-28 14:55:02 +0000326 with support.transient_internet(self.FTP_HOST):
327 u = _urlopen_with_retry(self.FTP_HOST, timeout=60)
Victor Stinnereaca5c82011-06-17 14:53:02 +0200328 self.addCleanup(u.close)
Georg Brandl5be365f2010-10-28 14:55:02 +0000329 self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000330
Thomas Wouters477c8d52006-05-27 19:21:47 +0000331
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000332if __name__ == "__main__":
Brett Cannon3e9a9ae2013-06-12 21:25:59 -0400333 unittest.main()