blob: 422d529a70074e8c2ded5077b520203d8cc330a3 [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00003
Antoine Pitroua98d26a2011-05-22 17:35:17 +02004import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00005import socket
Stéphane Wirtela40681d2019-02-22 14:45:36 +01006import urllib.parse
Jeremy Hylton1afc1692008-06-18 20:49:58 +00007import urllib.request
Brett Cannona71319e2003-05-14 02:18:31 +00008import os
Barry Warsaw820c1202008-06-12 04:06:45 +00009import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000010import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000011
Christian Heimesaf98da12008-01-27 15:18:18 +000012
Senthil Kumarancfdd0162014-04-14 21:31:41 -040013support.requires('network')
14
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070015
Skip Montanaro89feabc2003-03-30 04:54:24 +000016class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010017 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000018
Skip Montanaro89feabc2003-03-30 04:54:24 +000019 def setUp(self):
Victor Stinner1d0f9b32019-12-10 22:09:23 +010020 socket.setdefaulttimeout(support.INTERNET_TIMEOUT)
Skip Montanaro89feabc2003-03-30 04:54:24 +000021
22 def tearDown(self):
23 socket.setdefaulttimeout(None)
24
25 def testURLread(self):
Victor Stinner7cb92042019-07-02 14:50:19 +020026 # clear _opener global variable
27 self.addCleanup(urllib.request.urlcleanup)
28
Stéphane Wirtela40681d2019-02-22 14:45:36 +010029 domain = urllib.parse.urlparse(support.TEST_HTTP_URL).netloc
30 with support.transient_internet(domain):
31 f = urllib.request.urlopen(support.TEST_HTTP_URL)
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070032 f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000033
Antoine Pitroua98d26a2011-05-22 17:35:17 +020034
Brett Cannona71319e2003-05-14 02:18:31 +000035class urlopenNetworkTests(unittest.TestCase):
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -070036 """Tests urllib.request.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000037
Brett Cannona71319e2003-05-14 02:18:31 +000038 These tests are not exhaustive. Assuming that testing using files does a
39 good job overall of some of the basic interface features. There are no
40 tests exercising the optional 'data' and 'proxies' arguments. No tests
41 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000042
Brett Cannona71319e2003-05-14 02:18:31 +000043 setUp is not used for always constructing a connection to
Berker Peksaga40b0ef2016-05-07 16:37:09 +030044 http://www.pythontest.net/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000045 and making a connection is expensive enough to warrant minimizing unneeded
46 connections.
Tim Peters813cec92003-05-16 15:35:10 +000047
Brett Cannona71319e2003-05-14 02:18:31 +000048 """
49
Berker Peksaga40b0ef2016-05-07 16:37:09 +030050 url = 'http://www.pythontest.net/'
51
Victor Stinner7cb92042019-07-02 14:50:19 +020052 def setUp(self):
53 # clear _opener global variable
54 self.addCleanup(urllib.request.urlcleanup)
55
Antoine Pitroua98d26a2011-05-22 17:35:17 +020056 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000057 def urlopen(self, *args, **kwargs):
58 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +020059 with support.transient_internet(resource):
60 r = urllib.request.urlopen(*args, **kwargs)
61 try:
62 yield r
63 finally:
64 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000065
Brett Cannona71319e2003-05-14 02:18:31 +000066 def test_basic(self):
67 # Simple test expected to pass.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030068 with self.urlopen(self.url) as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +020069 for attr in ("read", "readline", "readlines", "fileno", "close",
70 "info", "geturl"):
71 self.assertTrue(hasattr(open_url, attr), "object returned from "
72 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000073 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000074
75 def test_readlines(self):
76 # Test both readline and readlines.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030077 with self.urlopen(self.url) as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000078 self.assertIsInstance(open_url.readline(), bytes,
79 "readline did not return a string")
80 self.assertIsInstance(open_url.readlines(), list,
81 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000082
83 def test_info(self):
84 # Test 'info'.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030085 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000086 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000087 self.assertIsInstance(info_obj, email.message.Message,
88 "object returned by 'info' is not an "
89 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000090 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000091
92 def test_geturl(self):
93 # Make sure same URL as opened is returned by geturl.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030094 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000095 gotten_url = open_url.geturl()
Berker Peksaga40b0ef2016-05-07 16:37:09 +030096 self.assertEqual(gotten_url, self.url)
Brett Cannona71319e2003-05-14 02:18:31 +000097
Christian Heimes9bd667a2008-01-20 15:14:11 +000098 def test_getcode(self):
99 # test getcode() with the fancy opener to get 404 error codes
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300100 URL = self.url + "XXXinvalidXXX"
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200101 with support.transient_internet(URL):
R David Murray130a5662014-06-11 17:09:43 -0400102 with self.assertWarns(DeprecationWarning):
103 open_url = urllib.request.FancyURLopener().open(URL)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200104 try:
105 code = open_url.getcode()
106 finally:
107 open_url.close()
108 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000109
Brett Cannona71319e2003-05-14 02:18:31 +0000110 def test_bad_address(self):
111 # Make sure proper exception is raised when connecting to a bogus
112 # address.
Martin Pantera7f99332015-12-16 04:36:20 +0000113
114 # Given that both VeriSign and various ISPs have in
115 # the past or are presently hijacking various invalid
116 # domain name requests in an attempt to boost traffic
117 # to their own sites, finding a domain name to use
118 # for this test is difficult. RFC2606 leads one to
119 # believe that '.invalid' should work, but experience
120 # seemed to indicate otherwise. Single character
121 # TLDs are likely to remain invalid, so this seems to
122 # be the best choice. The trailing '.' prevents a
123 # related problem: The normal DNS resolver appends
124 # the domain names from the search path if there is
125 # no '.' the end and, and if one of those domains
126 # implements a '*' rule a result is returned.
127 # However, none of this will prevent the test from
128 # failing if the ISP hijacks all invalid domain
129 # requests. The real solution would be to be able to
130 # parameterize the framework with a mock resolver.
131 bogus_domain = "sadflkjsasf.i.nvali.d."
Antoine Pitrou72fff042011-07-08 19:19:57 +0200132 try:
133 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200134 except OSError:
135 # socket.gaierror is too narrow, since getaddrinfo() may also
136 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
137 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200138 pass
139 else:
140 # This happens with some overzealous DNS providers such as OpenDNS
141 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500142 failure_explanation = ('opening an invalid URL did not raise OSError; '
143 'can be caused by a broken DNS server '
144 '(e.g. returns 404 or hijacks page)')
145 with self.assertRaises(OSError, msg=failure_explanation):
Martin Pantera7f99332015-12-16 04:36:20 +0000146 urllib.request.urlopen("http://{}/".format(bogus_domain))
Brett Cannona71319e2003-05-14 02:18:31 +0000147
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200148
Brett Cannona71319e2003-05-14 02:18:31 +0000149class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000151
Victor Stinner7cb92042019-07-02 14:50:19 +0200152 def setUp(self):
153 # remove temporary files created by urlretrieve()
154 self.addCleanup(urllib.request.urlcleanup)
155
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200156 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800157 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000158 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200159 with support.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800160 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200161 try:
162 yield file_location, info
163 finally:
164 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000165
Brett Cannona71319e2003-05-14 02:18:31 +0000166 def test_basic(self):
167 # Test basic functionality.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300168 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200169 self.assertTrue(os.path.exists(file_location), "file location returned by"
170 " urlretrieve is not a valid path")
Benjamin Petersona96ed632014-02-19 23:06:41 -0500171 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200172 self.assertTrue(f.read(), "reading from the file location returned"
173 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000174
175 def test_specified_path(self):
176 # Make sure that specifying the location of the file to write to works.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300177 with self.urlretrieve(self.logo,
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200178 support.TESTFN) as (file_location, info):
179 self.assertEqual(file_location, support.TESTFN)
180 self.assertTrue(os.path.exists(file_location))
Benjamin Petersona96ed632014-02-19 23:06:41 -0500181 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200182 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000183
184 def test_header(self):
185 # Make sure header returned as 2nd value from urlretrieve is good.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300186 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200187 self.assertIsInstance(info, email.message.Message,
188 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000189
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300190 logo = "http://www.pythontest.net/"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800191
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000192 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800193 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200194 datevalue = fileheaders.get('Date')
195 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
196 try:
197 time.strptime(datevalue, dateformat)
198 except ValueError:
Berker Peksagad1fd342016-05-07 16:58:41 +0300199 self.fail('Date value not in %r format' % dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000200
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800201 def test_reporthook(self):
202 records = []
Senthil Kumaran1bd7d292017-05-15 23:08:07 -0700203
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800204 def recording_reporthook(blocks, block_size, total_size):
205 records.append((blocks, block_size, total_size))
206
207 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
208 file_location, fileheaders):
209 expected_size = int(fileheaders['Content-Length'])
210
211 records_repr = repr(records) # For use in error messages.
212 self.assertGreater(len(records), 1, msg="There should always be two "
213 "calls; the first one before the transfer starts.")
214 self.assertEqual(records[0][0], 0)
215 self.assertGreater(records[0][1], 0,
216 msg="block size can't be 0 in %s" % records_repr)
217 self.assertEqual(records[0][2], expected_size)
218 self.assertEqual(records[-1][2], expected_size)
219
220 block_sizes = {block_size for _, block_size, _ in records}
221 self.assertEqual({records[0][1]}, block_sizes,
222 msg="block sizes in %s must be equal" % records_repr)
223 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
224 msg="number of blocks * block size must be"
225 " >= total size in %s" % records_repr)
226
Brett Cannona71319e2003-05-14 02:18:31 +0000227
Skip Montanaro89feabc2003-03-30 04:54:24 +0000228if __name__ == "__main__":
Senthil Kumarancfdd0162014-04-14 21:31:41 -0400229 unittest.main()