blob: 4d73ac0acf9f4aff330b34a5a5453a57755357e2 [file] [log] [blame]
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00001#!/usr/bin/env python
2
3import unittest
4from test import test_support
Georg Brandl1b06a1d2006-05-03 05:15:10 +00005from test.test_urllib2 import sanepathname2url
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00006
7import socket
8import urllib2
Jeremy Hylton5d9c3032004-08-07 17:40:50 +00009import os
Senthil Kumaran281b5512010-04-20 06:54:59 +000010import sys
11
12TIMEOUT = 60 # seconds
Jeremy Hylton5d9c3032004-08-07 17:40:50 +000013
Neal Norwitz769d0ee2008-01-25 06:37:23 +000014
Facundo Batista6a5a1772008-06-07 13:36:36 +000015def _retry_thrice(func, exc, *args, **kwargs):
Neal Norwitz769d0ee2008-01-25 06:37:23 +000016 for i in range(3):
17 try:
Facundo Batista6a5a1772008-06-07 13:36:36 +000018 return func(*args, **kwargs)
19 except exc, last_exc:
Neal Norwitz769d0ee2008-01-25 06:37:23 +000020 continue
21 except:
22 raise
23 raise last_exc
24
Facundo Batista6a5a1772008-06-07 13:36:36 +000025def _wrap_with_retry_thrice(func, exc):
26 def wrapped(*args, **kwargs):
27 return _retry_thrice(func, exc, *args, **kwargs)
28 return wrapped
29
30# Connecting to remote hosts is flaky. Make it more robust by retrying
31# the connection several times.
32_urlopen_with_retry = _wrap_with_retry_thrice(urllib2.urlopen, urllib2.URLError)
Neal Norwitz769d0ee2008-01-25 06:37:23 +000033
Georg Brandlfa42bd72006-04-30 07:06:11 +000034
35class AuthTests(unittest.TestCase):
36 """Tests urllib2 authentication features."""
37
38## Disabled at the moment since there is no page under python.org which
39## could be used to HTTP authentication.
40#
41# def test_basic_auth(self):
42# import httplib
43#
44# test_url = "http://www.python.org/test/test_urllib2/basic_auth"
45# test_hostport = "www.python.org"
46# test_realm = 'Test Realm'
47# test_user = 'test.test_urllib2net'
48# test_password = 'blah'
49#
50# # failure
51# try:
Neal Norwitz769d0ee2008-01-25 06:37:23 +000052# _urlopen_with_retry(test_url)
Georg Brandlfa42bd72006-04-30 07:06:11 +000053# except urllib2.HTTPError, exc:
54# self.assertEqual(exc.code, 401)
55# else:
56# self.fail("urlopen() should have failed with 401")
57#
58# # success
59# auth_handler = urllib2.HTTPBasicAuthHandler()
60# auth_handler.add_password(test_realm, test_hostport,
61# test_user, test_password)
62# opener = urllib2.build_opener(auth_handler)
63# f = opener.open('http://localhost/')
Neal Norwitz769d0ee2008-01-25 06:37:23 +000064# response = _urlopen_with_retry("http://www.python.org/")
Georg Brandlfa42bd72006-04-30 07:06:11 +000065#
66# # The 'userinfo' URL component is deprecated by RFC 3986 for security
67# # reasons, let's not implement it! (it's already implemented for proxy
68# # specification strings (that is, URLs or authorities specifying a
69# # proxy), so we must keep that)
70# self.assertRaises(httplib.InvalidURL,
71# urllib2.urlopen, "http://evil:thing@example.com")
72
73
Georg Brandldd7b0522007-01-21 10:35:10 +000074class CloseSocketTest(unittest.TestCase):
75
76 def test_close(self):
Georg Brandla4f46e12010-02-07 17:03:15 +000077 import httplib
Georg Brandldd7b0522007-01-21 10:35:10 +000078
79 # calling .close() on urllib2's response objects should close the
80 # underlying socket
81
82 # delve deep into response to fetch socket._socketobject
Neal Norwitz769d0ee2008-01-25 06:37:23 +000083 response = _urlopen_with_retry("http://www.python.org/")
Georg Brandldd7b0522007-01-21 10:35:10 +000084 abused_fileobject = response.fp
Benjamin Peterson5c8da862009-06-30 22:57:08 +000085 self.assertTrue(abused_fileobject.__class__ is socket._fileobject)
Georg Brandldd7b0522007-01-21 10:35:10 +000086 httpresponse = abused_fileobject._sock
Benjamin Peterson5c8da862009-06-30 22:57:08 +000087 self.assertTrue(httpresponse.__class__ is httplib.HTTPResponse)
Georg Brandldd7b0522007-01-21 10:35:10 +000088 fileobject = httpresponse.fp
Benjamin Peterson5c8da862009-06-30 22:57:08 +000089 self.assertTrue(fileobject.__class__ is socket._fileobject)
Georg Brandldd7b0522007-01-21 10:35:10 +000090
Benjamin Peterson5c8da862009-06-30 22:57:08 +000091 self.assertTrue(not fileobject.closed)
Georg Brandldd7b0522007-01-21 10:35:10 +000092 response.close()
Benjamin Peterson5c8da862009-06-30 22:57:08 +000093 self.assertTrue(fileobject.closed)
Georg Brandldd7b0522007-01-21 10:35:10 +000094
Georg Brandl1b06a1d2006-05-03 05:15:10 +000095class OtherNetworkTests(unittest.TestCase):
96 def setUp(self):
97 if 0: # for debugging
98 import logging
99 logger = logging.getLogger("test_urllib2net")
100 logger.addHandler(logging.StreamHandler())
101
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000102 # XXX The rest of these tests aren't very good -- they don't check much.
103 # They do sometimes catch some major disasters, though.
104
105 def test_ftp(self):
106 urls = [
Gregory P. Smithe9fef692007-09-09 23:36:46 +0000107 'ftp://ftp.kernel.org/pub/linux/kernel/README',
Mark Dickinson3e4caeb2009-02-21 20:27:01 +0000108 'ftp://ftp.kernel.org/pub/linux/kernel/non-existent-file',
Gregory P. Smithe9fef692007-09-09 23:36:46 +0000109 #'ftp://ftp.kernel.org/pub/leenox/kernel/test',
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000110 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC'
111 '/research-reports/00README-Legal-Rules-Regs',
112 ]
113 self._test_urls(urls, self._extra_handlers())
114
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000115 def test_file(self):
116 TESTFN = test_support.TESTFN
117 f = open(TESTFN, 'w')
118 try:
119 f.write('hi there\n')
120 f.close()
121 urls = [
122 'file:'+sanepathname2url(os.path.abspath(TESTFN)),
Gregory P. Smithe9fef692007-09-09 23:36:46 +0000123 ('file:///nonsensename/etc/passwd', None, urllib2.URLError),
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000124 ]
Facundo Batista6a5a1772008-06-07 13:36:36 +0000125 self._test_urls(urls, self._extra_handlers(), retry=True)
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000126 finally:
127 os.remove(TESTFN)
128
Senthil Kumaran58c60622012-01-21 11:43:02 +0800129 self.assertRaises(ValueError, urllib2.urlopen,'./relative_path/to/file')
130
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000131 # XXX Following test depends on machine configurations that are internal
132 # to CNRI. Need to set up a public server with the right authentication
133 # configuration for test purposes.
134
135## def test_cnri(self):
136## if socket.gethostname() == 'bitdiddle':
137## localhost = 'bitdiddle.cnri.reston.va.us'
138## elif socket.gethostname() == 'bitdiddle.concentric.net':
139## localhost = 'localhost'
140## else:
141## localhost = None
142## if localhost is not None:
143## urls = [
144## 'file://%s/etc/passwd' % localhost,
145## 'http://%s/simple/' % localhost,
146## 'http://%s/digest/' % localhost,
147## 'http://%s/not/found.h' % localhost,
148## ]
149
150## bauth = HTTPBasicAuthHandler()
151## bauth.add_password('basic_test_realm', localhost, 'jhylton',
152## 'password')
153## dauth = HTTPDigestAuthHandler()
154## dauth.add_password('digest_test_realm', localhost, 'jhylton',
155## 'password')
156
157## self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
158
Senthil Kumaranb4ec7ee2010-08-08 11:43:45 +0000159 def test_urlwithfrag(self):
Georg Brandl08114d42012-10-28 10:51:35 +0100160 urlwith_frag = "http://docs.python.org/2/glossary.html#glossary"
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000161 with test_support.transient_internet(urlwith_frag):
162 req = urllib2.Request(urlwith_frag)
163 res = urllib2.urlopen(req)
164 self.assertEqual(res.geturl(),
Georg Brandl08114d42012-10-28 10:51:35 +0100165 "http://docs.python.org/2/glossary.html#glossary")
Senthil Kumaranb4ec7ee2010-08-08 11:43:45 +0000166
Senthil Kumarand389cb52010-09-21 01:38:15 +0000167 def test_fileno(self):
168 req = urllib2.Request("http://www.python.org")
169 opener = urllib2.build_opener()
170 res = opener.open(req)
171 try:
172 res.fileno()
173 except AttributeError:
174 self.fail("HTTPResponse object should return a valid fileno")
175 finally:
176 res.close()
177
Senthil Kumaran176c73d2010-09-27 01:40:59 +0000178 def test_custom_headers(self):
179 url = "http://www.example.com"
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000180 with test_support.transient_internet(url):
181 opener = urllib2.build_opener()
182 request = urllib2.Request(url)
183 self.assertFalse(request.header_items())
184 opener.open(request)
185 self.assertTrue(request.header_items())
186 self.assertTrue(request.has_header('User-agent'))
187 request.add_header('User-Agent','Test-Agent')
188 opener.open(request)
189 self.assertEqual(request.get_header('User-agent'),'Test-Agent')
Senthil Kumaran176c73d2010-09-27 01:40:59 +0000190
Senthil Kumaran7d7702b2011-07-27 09:37:17 +0800191 def test_sites_no_connection_close(self):
192 # Some sites do not send Connection: close header.
193 # Verify that those work properly. (#issue12576)
194
Senthil Kumaran23c21042011-07-31 11:48:54 +0800195 URL = 'http://www.imdb.com' # No Connection:close
Benjamin Peterson3facb8c2011-07-30 23:39:39 -0500196 with test_support.transient_internet(URL):
Senthil Kumaran23c21042011-07-31 11:48:54 +0800197 req = urllib2.urlopen(URL)
198 res = req.read()
199 self.assertTrue(res)
Senthil Kumaran7d7702b2011-07-27 09:37:17 +0800200
Facundo Batista6a5a1772008-06-07 13:36:36 +0000201 def _test_urls(self, urls, handlers, retry=True):
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000202 import time
203 import logging
204 debug = logging.getLogger("test_urllib2").debug
205
Facundo Batista6a5a1772008-06-07 13:36:36 +0000206 urlopen = urllib2.build_opener(*handlers).open
207 if retry:
208 urlopen = _wrap_with_retry_thrice(urlopen, urllib2.URLError)
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000209
210 for url in urls:
211 if isinstance(url, tuple):
212 url, req, expected_err = url
213 else:
214 req = expected_err = None
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000215 with test_support.transient_internet(url):
216 debug(url)
Senthil Kumaran281b5512010-04-20 06:54:59 +0000217 try:
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000218 f = urlopen(url, req, TIMEOUT)
219 except EnvironmentError as err:
220 debug(err)
221 if expected_err:
222 msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" %
223 (expected_err, url, req, type(err), err))
224 self.assertIsInstance(err, expected_err, msg)
225 except urllib2.URLError as err:
226 if isinstance(err[0], socket.timeout):
227 print >>sys.stderr, "<timeout: %s>" % url
228 continue
229 else:
230 raise
231 else:
232 try:
233 with test_support.transient_internet(url):
234 buf = f.read()
235 debug("read %d bytes" % len(buf))
236 except socket.timeout:
237 print >>sys.stderr, "<timeout: %s>" % url
238 f.close()
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000239 debug("******** next url coming up...")
240 time.sleep(0.1)
241
242 def _extra_handlers(self):
243 handlers = []
244
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000245 cfh = urllib2.CacheFTPHandler()
Nadeem Vawdab42c53e2011-07-23 15:51:16 +0200246 self.addCleanup(cfh.clear_cache)
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000247 cfh.setTimeout(1)
248 handlers.append(cfh)
249
250 return handlers
251
Gregory P. Smith0001c2e2008-03-28 08:00:44 +0000252
Facundo Batista10951d52007-06-06 17:15:23 +0000253class TimeoutTest(unittest.TestCase):
254 def test_http_basic(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000255 self.assertTrue(socket.getdefaulttimeout() is None)
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000256 url = "http://www.python.org"
257 with test_support.transient_internet(url, timeout=None):
258 u = _urlopen_with_retry(url)
259 self.assertTrue(u.fp._sock.fp._sock.gettimeout() is None)
Facundo Batista10951d52007-06-06 17:15:23 +0000260
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000261 def test_http_default_timeout(self):
262 self.assertTrue(socket.getdefaulttimeout() is None)
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000263 url = "http://www.python.org"
264 with test_support.transient_internet(url):
265 socket.setdefaulttimeout(60)
266 try:
267 u = _urlopen_with_retry(url)
268 finally:
269 socket.setdefaulttimeout(None)
270 self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 60)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000271
272 def test_http_no_timeout(self):
273 self.assertTrue(socket.getdefaulttimeout() is None)
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000274 url = "http://www.python.org"
275 with test_support.transient_internet(url):
276 socket.setdefaulttimeout(60)
277 try:
278 u = _urlopen_with_retry(url, timeout=None)
279 finally:
280 socket.setdefaulttimeout(None)
281 self.assertTrue(u.fp._sock.fp._sock.gettimeout() is None)
Facundo Batista10951d52007-06-06 17:15:23 +0000282
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000283 def test_http_timeout(self):
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000284 url = "http://www.python.org"
285 with test_support.transient_internet(url):
286 u = _urlopen_with_retry(url, timeout=120)
287 self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120)
Facundo Batista10951d52007-06-06 17:15:23 +0000288
Martin v. Löwis7bc26b92010-04-08 17:40:54 +0000289 FTP_HOST = "ftp://ftp.mirror.nl/pub/gnu/"
Neal Norwitz769d0ee2008-01-25 06:37:23 +0000290
Facundo Batista10951d52007-06-06 17:15:23 +0000291 def test_ftp_basic(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000292 self.assertTrue(socket.getdefaulttimeout() is None)
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000293 with test_support.transient_internet(self.FTP_HOST, timeout=None):
294 u = _urlopen_with_retry(self.FTP_HOST)
295 self.assertTrue(u.fp.fp._sock.gettimeout() is None)
Facundo Batista10951d52007-06-06 17:15:23 +0000296
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000297 def test_ftp_default_timeout(self):
298 self.assertTrue(socket.getdefaulttimeout() is None)
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000299 with test_support.transient_internet(self.FTP_HOST):
300 socket.setdefaulttimeout(60)
301 try:
302 u = _urlopen_with_retry(self.FTP_HOST)
303 finally:
304 socket.setdefaulttimeout(None)
305 self.assertEqual(u.fp.fp._sock.gettimeout(), 60)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000306
307 def test_ftp_no_timeout(self):
308 self.assertTrue(socket.getdefaulttimeout() is None)
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000309 with test_support.transient_internet(self.FTP_HOST):
310 socket.setdefaulttimeout(60)
311 try:
312 u = _urlopen_with_retry(self.FTP_HOST, timeout=None)
313 finally:
314 socket.setdefaulttimeout(None)
315 self.assertTrue(u.fp.fp._sock.gettimeout() is None)
Facundo Batista10951d52007-06-06 17:15:23 +0000316
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000317 def test_ftp_timeout(self):
Antoine Pitrou9f3f9c52010-10-31 13:58:00 +0000318 with test_support.transient_internet(self.FTP_HOST):
319 u = _urlopen_with_retry(self.FTP_HOST, timeout=60)
320 self.assertEqual(u.fp.fp._sock.gettimeout(), 60)
Facundo Batista10951d52007-06-06 17:15:23 +0000321
Georg Brandl1b06a1d2006-05-03 05:15:10 +0000322
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000323def test_main():
324 test_support.requires("network")
Gregory P. Smith0001c2e2008-03-28 08:00:44 +0000325 test_support.run_unittest(AuthTests,
Georg Brandldd7b0522007-01-21 10:35:10 +0000326 OtherNetworkTests,
327 CloseSocketTest,
Facundo Batista10951d52007-06-06 17:15:23 +0000328 TimeoutTest,
Georg Brandldd7b0522007-01-21 10:35:10 +0000329 )
Jeremy Hylton5d9c3032004-08-07 17:40:50 +0000330
331if __name__ == "__main__":
332 test_main()