blob: 28680aa6b2405d795cecdd3b2bcf6d6c2764d78f [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +03003from test.support import socket_helper
Skip Montanaro89feabc2003-03-30 04:54:24 +00004
Antoine Pitroua98d26a2011-05-22 17:35:17 +02005import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00006import socket
Stéphane Wirtela40681d2019-02-22 14:45:36 +01007import urllib.parse
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
Brett Cannona71319e2003-05-14 02:18:31 +00009import os
Barry Warsaw820c1202008-06-12 04:06:45 +000010import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000011import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000012
Christian Heimesaf98da12008-01-27 15:18:18 +000013
Senthil Kumarancfdd0162014-04-14 21:31:41 -040014support.requires('network')
15
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070016
Skip Montanaro89feabc2003-03-30 04:54:24 +000017class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010018 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000019
Skip Montanaro89feabc2003-03-30 04:54:24 +000020 def setUp(self):
Victor Stinner1d0f9b32019-12-10 22:09:23 +010021 socket.setdefaulttimeout(support.INTERNET_TIMEOUT)
Skip Montanaro89feabc2003-03-30 04:54:24 +000022
23 def tearDown(self):
24 socket.setdefaulttimeout(None)
25
26 def testURLread(self):
Victor Stinner7cb92042019-07-02 14:50:19 +020027 # clear _opener global variable
28 self.addCleanup(urllib.request.urlcleanup)
29
Stéphane Wirtela40681d2019-02-22 14:45:36 +010030 domain = urllib.parse.urlparse(support.TEST_HTTP_URL).netloc
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +030031 with socket_helper.transient_internet(domain):
Stéphane Wirtela40681d2019-02-22 14:45:36 +010032 f = urllib.request.urlopen(support.TEST_HTTP_URL)
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070033 f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000034
Antoine Pitroua98d26a2011-05-22 17:35:17 +020035
Brett Cannona71319e2003-05-14 02:18:31 +000036class urlopenNetworkTests(unittest.TestCase):
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -070037 """Tests urllib.request.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000038
Brett Cannona71319e2003-05-14 02:18:31 +000039 These tests are not exhaustive. Assuming that testing using files does a
40 good job overall of some of the basic interface features. There are no
41 tests exercising the optional 'data' and 'proxies' arguments. No tests
42 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000043
Brett Cannona71319e2003-05-14 02:18:31 +000044 setUp is not used for always constructing a connection to
Berker Peksaga40b0ef2016-05-07 16:37:09 +030045 http://www.pythontest.net/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000046 and making a connection is expensive enough to warrant minimizing unneeded
47 connections.
Tim Peters813cec92003-05-16 15:35:10 +000048
Brett Cannona71319e2003-05-14 02:18:31 +000049 """
50
Berker Peksaga40b0ef2016-05-07 16:37:09 +030051 url = 'http://www.pythontest.net/'
52
Victor Stinner7cb92042019-07-02 14:50:19 +020053 def setUp(self):
54 # clear _opener global variable
55 self.addCleanup(urllib.request.urlcleanup)
56
Antoine Pitroua98d26a2011-05-22 17:35:17 +020057 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000058 def urlopen(self, *args, **kwargs):
59 resource = args[0]
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +030060 with socket_helper.transient_internet(resource):
Antoine Pitroua98d26a2011-05-22 17:35:17 +020061 r = urllib.request.urlopen(*args, **kwargs)
62 try:
63 yield r
64 finally:
65 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000066
Brett Cannona71319e2003-05-14 02:18:31 +000067 def test_basic(self):
68 # Simple test expected to pass.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030069 with self.urlopen(self.url) as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +020070 for attr in ("read", "readline", "readlines", "fileno", "close",
71 "info", "geturl"):
72 self.assertTrue(hasattr(open_url, attr), "object returned from "
73 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000074 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000075
76 def test_readlines(self):
77 # Test both readline and readlines.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030078 with self.urlopen(self.url) as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000079 self.assertIsInstance(open_url.readline(), bytes,
80 "readline did not return a string")
81 self.assertIsInstance(open_url.readlines(), list,
82 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000083
84 def test_info(self):
85 # Test 'info'.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030086 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000087 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000088 self.assertIsInstance(info_obj, email.message.Message,
89 "object returned by 'info' is not an "
90 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000091 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000092
93 def test_geturl(self):
94 # Make sure same URL as opened is returned by geturl.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030095 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000096 gotten_url = open_url.geturl()
Berker Peksaga40b0ef2016-05-07 16:37:09 +030097 self.assertEqual(gotten_url, self.url)
Brett Cannona71319e2003-05-14 02:18:31 +000098
Christian Heimes9bd667a2008-01-20 15:14:11 +000099 def test_getcode(self):
100 # test getcode() with the fancy opener to get 404 error codes
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300101 URL = self.url + "XXXinvalidXXX"
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +0300102 with socket_helper.transient_internet(URL):
R David Murray130a5662014-06-11 17:09:43 -0400103 with self.assertWarns(DeprecationWarning):
104 open_url = urllib.request.FancyURLopener().open(URL)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200105 try:
106 code = open_url.getcode()
107 finally:
108 open_url.close()
109 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000110
Brett Cannona71319e2003-05-14 02:18:31 +0000111 def test_bad_address(self):
112 # Make sure proper exception is raised when connecting to a bogus
113 # address.
Martin Pantera7f99332015-12-16 04:36:20 +0000114
115 # Given that both VeriSign and various ISPs have in
116 # the past or are presently hijacking various invalid
117 # domain name requests in an attempt to boost traffic
118 # to their own sites, finding a domain name to use
119 # for this test is difficult. RFC2606 leads one to
120 # believe that '.invalid' should work, but experience
121 # seemed to indicate otherwise. Single character
122 # TLDs are likely to remain invalid, so this seems to
123 # be the best choice. The trailing '.' prevents a
124 # related problem: The normal DNS resolver appends
125 # the domain names from the search path if there is
126 # no '.' the end and, and if one of those domains
127 # implements a '*' rule a result is returned.
128 # However, none of this will prevent the test from
129 # failing if the ISP hijacks all invalid domain
130 # requests. The real solution would be to be able to
131 # parameterize the framework with a mock resolver.
132 bogus_domain = "sadflkjsasf.i.nvali.d."
Antoine Pitrou72fff042011-07-08 19:19:57 +0200133 try:
134 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200135 except OSError:
136 # socket.gaierror is too narrow, since getaddrinfo() may also
137 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
138 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200139 pass
140 else:
141 # This happens with some overzealous DNS providers such as OpenDNS
142 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500143 failure_explanation = ('opening an invalid URL did not raise OSError; '
144 'can be caused by a broken DNS server '
145 '(e.g. returns 404 or hijacks page)')
146 with self.assertRaises(OSError, msg=failure_explanation):
Martin Pantera7f99332015-12-16 04:36:20 +0000147 urllib.request.urlopen("http://{}/".format(bogus_domain))
Brett Cannona71319e2003-05-14 02:18:31 +0000148
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200149
Brett Cannona71319e2003-05-14 02:18:31 +0000150class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000151 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000152
Victor Stinner7cb92042019-07-02 14:50:19 +0200153 def setUp(self):
154 # remove temporary files created by urlretrieve()
155 self.addCleanup(urllib.request.urlcleanup)
156
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200157 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800158 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000159 resource = args[0]
Serhiy Storchakabfb1cf42020-04-29 10:36:20 +0300160 with socket_helper.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800161 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200162 try:
163 yield file_location, info
164 finally:
165 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000166
Brett Cannona71319e2003-05-14 02:18:31 +0000167 def test_basic(self):
168 # Test basic functionality.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300169 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200170 self.assertTrue(os.path.exists(file_location), "file location returned by"
171 " urlretrieve is not a valid path")
Benjamin Petersona96ed632014-02-19 23:06:41 -0500172 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200173 self.assertTrue(f.read(), "reading from the file location returned"
174 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000175
176 def test_specified_path(self):
177 # Make sure that specifying the location of the file to write to works.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300178 with self.urlretrieve(self.logo,
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200179 support.TESTFN) as (file_location, info):
180 self.assertEqual(file_location, support.TESTFN)
181 self.assertTrue(os.path.exists(file_location))
Benjamin Petersona96ed632014-02-19 23:06:41 -0500182 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200183 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000184
185 def test_header(self):
186 # Make sure header returned as 2nd value from urlretrieve is good.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300187 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200188 self.assertIsInstance(info, email.message.Message,
189 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000190
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300191 logo = "http://www.pythontest.net/"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800192
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000193 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800194 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200195 datevalue = fileheaders.get('Date')
196 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
197 try:
198 time.strptime(datevalue, dateformat)
199 except ValueError:
Berker Peksagad1fd342016-05-07 16:58:41 +0300200 self.fail('Date value not in %r format' % dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000201
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800202 def test_reporthook(self):
203 records = []
Senthil Kumaran1bd7d292017-05-15 23:08:07 -0700204
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800205 def recording_reporthook(blocks, block_size, total_size):
206 records.append((blocks, block_size, total_size))
207
208 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
209 file_location, fileheaders):
210 expected_size = int(fileheaders['Content-Length'])
211
212 records_repr = repr(records) # For use in error messages.
213 self.assertGreater(len(records), 1, msg="There should always be two "
214 "calls; the first one before the transfer starts.")
215 self.assertEqual(records[0][0], 0)
216 self.assertGreater(records[0][1], 0,
217 msg="block size can't be 0 in %s" % records_repr)
218 self.assertEqual(records[0][2], expected_size)
219 self.assertEqual(records[-1][2], expected_size)
220
221 block_sizes = {block_size for _, block_size, _ in records}
222 self.assertEqual({records[0][1]}, block_sizes,
223 msg="block sizes in %s must be equal" % records_repr)
224 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
225 msg="number of blocks * block size must be"
226 " >= total size in %s" % records_repr)
227
Brett Cannona71319e2003-05-14 02:18:31 +0000228
Skip Montanaro89feabc2003-03-30 04:54:24 +0000229if __name__ == "__main__":
Senthil Kumarancfdd0162014-04-14 21:31:41 -0400230 unittest.main()