blob: 773101ce41f6021d0f735bebbd15515342b8cdc7 [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Hai Shi79bb2c92020-08-06 19:51:29 +08003from test.support import os_helper
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +03004from test.support import socket_helper
Skip Montanaro89feabc2003-03-30 04:54:24 +00005
Antoine Pitroua98d26a2011-05-22 17:35:17 +02006import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00007import socket
Stéphane Wirtela40681d2019-02-22 14:45:36 +01008import urllib.parse
Jeremy Hylton1afc1692008-06-18 20:49:58 +00009import urllib.request
Brett Cannona71319e2003-05-14 02:18:31 +000010import os
Barry Warsaw820c1202008-06-12 04:06:45 +000011import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000012import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000013
Christian Heimesaf98da12008-01-27 15:18:18 +000014
Senthil Kumarancfdd0162014-04-14 21:31:41 -040015support.requires('network')
16
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070017
Skip Montanaro89feabc2003-03-30 04:54:24 +000018class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010019 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000020
Skip Montanaro89feabc2003-03-30 04:54:24 +000021 def setUp(self):
Victor Stinner1d0f9b32019-12-10 22:09:23 +010022 socket.setdefaulttimeout(support.INTERNET_TIMEOUT)
Skip Montanaro89feabc2003-03-30 04:54:24 +000023
24 def tearDown(self):
25 socket.setdefaulttimeout(None)
26
27 def testURLread(self):
Victor Stinner7cb92042019-07-02 14:50:19 +020028 # clear _opener global variable
29 self.addCleanup(urllib.request.urlcleanup)
30
Stéphane Wirtela40681d2019-02-22 14:45:36 +010031 domain = urllib.parse.urlparse(support.TEST_HTTP_URL).netloc
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +030032 with socket_helper.transient_internet(domain):
Stéphane Wirtela40681d2019-02-22 14:45:36 +010033 f = urllib.request.urlopen(support.TEST_HTTP_URL)
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070034 f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000035
Antoine Pitroua98d26a2011-05-22 17:35:17 +020036
Brett Cannona71319e2003-05-14 02:18:31 +000037class urlopenNetworkTests(unittest.TestCase):
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -070038 """Tests urllib.request.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000039
Brett Cannona71319e2003-05-14 02:18:31 +000040 These tests are not exhaustive. Assuming that testing using files does a
41 good job overall of some of the basic interface features. There are no
42 tests exercising the optional 'data' and 'proxies' arguments. No tests
43 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000044
Brett Cannona71319e2003-05-14 02:18:31 +000045 setUp is not used for always constructing a connection to
Berker Peksaga40b0ef2016-05-07 16:37:09 +030046 http://www.pythontest.net/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000047 and making a connection is expensive enough to warrant minimizing unneeded
48 connections.
Tim Peters813cec92003-05-16 15:35:10 +000049
Brett Cannona71319e2003-05-14 02:18:31 +000050 """
51
Berker Peksaga40b0ef2016-05-07 16:37:09 +030052 url = 'http://www.pythontest.net/'
53
Victor Stinner7cb92042019-07-02 14:50:19 +020054 def setUp(self):
55 # clear _opener global variable
56 self.addCleanup(urllib.request.urlcleanup)
57
Antoine Pitroua98d26a2011-05-22 17:35:17 +020058 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000059 def urlopen(self, *args, **kwargs):
60 resource = args[0]
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +030061 with socket_helper.transient_internet(resource):
Antoine Pitroua98d26a2011-05-22 17:35:17 +020062 r = urllib.request.urlopen(*args, **kwargs)
63 try:
64 yield r
65 finally:
66 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000067
Brett Cannona71319e2003-05-14 02:18:31 +000068 def test_basic(self):
69 # Simple test expected to pass.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030070 with self.urlopen(self.url) as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +020071 for attr in ("read", "readline", "readlines", "fileno", "close",
72 "info", "geturl"):
73 self.assertTrue(hasattr(open_url, attr), "object returned from "
74 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000075 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000076
77 def test_readlines(self):
78 # Test both readline and readlines.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030079 with self.urlopen(self.url) as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000080 self.assertIsInstance(open_url.readline(), bytes,
81 "readline did not return a string")
82 self.assertIsInstance(open_url.readlines(), list,
83 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000084
85 def test_info(self):
86 # Test 'info'.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030087 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000088 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000089 self.assertIsInstance(info_obj, email.message.Message,
90 "object returned by 'info' is not an "
91 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000092 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000093
94 def test_geturl(self):
95 # Make sure same URL as opened is returned by geturl.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030096 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000097 gotten_url = open_url.geturl()
Berker Peksaga40b0ef2016-05-07 16:37:09 +030098 self.assertEqual(gotten_url, self.url)
Brett Cannona71319e2003-05-14 02:18:31 +000099
Christian Heimes9bd667a2008-01-20 15:14:11 +0000100 def test_getcode(self):
101 # test getcode() with the fancy opener to get 404 error codes
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300102 URL = self.url + "XXXinvalidXXX"
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +0300103 with socket_helper.transient_internet(URL):
R David Murray130a5662014-06-11 17:09:43 -0400104 with self.assertWarns(DeprecationWarning):
105 open_url = urllib.request.FancyURLopener().open(URL)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200106 try:
107 code = open_url.getcode()
108 finally:
109 open_url.close()
110 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000111
Brett Cannona71319e2003-05-14 02:18:31 +0000112 def test_bad_address(self):
113 # Make sure proper exception is raised when connecting to a bogus
114 # address.
Martin Pantera7f99332015-12-16 04:36:20 +0000115
116 # Given that both VeriSign and various ISPs have in
117 # the past or are presently hijacking various invalid
118 # domain name requests in an attempt to boost traffic
119 # to their own sites, finding a domain name to use
120 # for this test is difficult. RFC2606 leads one to
121 # believe that '.invalid' should work, but experience
122 # seemed to indicate otherwise. Single character
123 # TLDs are likely to remain invalid, so this seems to
124 # be the best choice. The trailing '.' prevents a
125 # related problem: The normal DNS resolver appends
126 # the domain names from the search path if there is
127 # no '.' the end and, and if one of those domains
128 # implements a '*' rule a result is returned.
129 # However, none of this will prevent the test from
130 # failing if the ISP hijacks all invalid domain
131 # requests. The real solution would be to be able to
132 # parameterize the framework with a mock resolver.
133 bogus_domain = "sadflkjsasf.i.nvali.d."
Antoine Pitrou72fff042011-07-08 19:19:57 +0200134 try:
135 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200136 except OSError:
137 # socket.gaierror is too narrow, since getaddrinfo() may also
138 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
139 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200140 pass
141 else:
142 # This happens with some overzealous DNS providers such as OpenDNS
143 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500144 failure_explanation = ('opening an invalid URL did not raise OSError; '
145 'can be caused by a broken DNS server '
146 '(e.g. returns 404 or hijacks page)')
147 with self.assertRaises(OSError, msg=failure_explanation):
Martin Pantera7f99332015-12-16 04:36:20 +0000148 urllib.request.urlopen("http://{}/".format(bogus_domain))
Brett Cannona71319e2003-05-14 02:18:31 +0000149
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200150
Brett Cannona71319e2003-05-14 02:18:31 +0000151class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000153
Victor Stinner7cb92042019-07-02 14:50:19 +0200154 def setUp(self):
155 # remove temporary files created by urlretrieve()
156 self.addCleanup(urllib.request.urlcleanup)
157
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200158 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800159 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000160 resource = args[0]
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +0300161 with socket_helper.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800162 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200163 try:
164 yield file_location, info
165 finally:
Hai Shi79bb2c92020-08-06 19:51:29 +0800166 os_helper.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000167
Brett Cannona71319e2003-05-14 02:18:31 +0000168 def test_basic(self):
169 # Test basic functionality.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300170 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200171 self.assertTrue(os.path.exists(file_location), "file location returned by"
172 " urlretrieve is not a valid path")
Benjamin Petersona96ed632014-02-19 23:06:41 -0500173 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200174 self.assertTrue(f.read(), "reading from the file location returned"
175 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000176
177 def test_specified_path(self):
178 # Make sure that specifying the location of the file to write to works.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300179 with self.urlretrieve(self.logo,
Hai Shi79bb2c92020-08-06 19:51:29 +0800180 os_helper.TESTFN) as (file_location, info):
181 self.assertEqual(file_location, os_helper.TESTFN)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200182 self.assertTrue(os.path.exists(file_location))
Benjamin Petersona96ed632014-02-19 23:06:41 -0500183 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200184 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000185
186 def test_header(self):
187 # Make sure header returned as 2nd value from urlretrieve is good.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300188 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200189 self.assertIsInstance(info, email.message.Message,
190 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000191
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300192 logo = "http://www.pythontest.net/"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800193
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000194 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800195 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200196 datevalue = fileheaders.get('Date')
197 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
198 try:
199 time.strptime(datevalue, dateformat)
200 except ValueError:
Berker Peksagad1fd342016-05-07 16:58:41 +0300201 self.fail('Date value not in %r format' % dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000202
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800203 def test_reporthook(self):
204 records = []
Senthil Kumaran1bd7d292017-05-15 23:08:07 -0700205
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800206 def recording_reporthook(blocks, block_size, total_size):
207 records.append((blocks, block_size, total_size))
208
209 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
210 file_location, fileheaders):
211 expected_size = int(fileheaders['Content-Length'])
212
213 records_repr = repr(records) # For use in error messages.
214 self.assertGreater(len(records), 1, msg="There should always be two "
215 "calls; the first one before the transfer starts.")
216 self.assertEqual(records[0][0], 0)
217 self.assertGreater(records[0][1], 0,
218 msg="block size can't be 0 in %s" % records_repr)
219 self.assertEqual(records[0][2], expected_size)
220 self.assertEqual(records[-1][2], expected_size)
221
222 block_sizes = {block_size for _, block_size, _ in records}
223 self.assertEqual({records[0][1]}, block_sizes,
224 msg="block sizes in %s must be equal" % records_repr)
225 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
226 msg="number of blocks * block size must be"
227 " >= total size in %s" % records_repr)
228
Brett Cannona71319e2003-05-14 02:18:31 +0000229
Skip Montanaro89feabc2003-03-30 04:54:24 +0000230if __name__ == "__main__":
Senthil Kumarancfdd0162014-04-14 21:31:41 -0400231 unittest.main()