blob: 4103b6c07505d53bd88db487508dba9f207d75a7 [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00003
Antoine Pitroua98d26a2011-05-22 17:35:17 +02004import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00005import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import urllib.request
Brett Cannona71319e2003-05-14 02:18:31 +00007import os
Barry Warsaw820c1202008-06-12 04:06:45 +00008import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00009import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000010
Christian Heimesaf98da12008-01-27 15:18:18 +000011
Senthil Kumarancfdd0162014-04-14 21:31:41 -040012support.requires('network')
13
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070014
Skip Montanaro89feabc2003-03-30 04:54:24 +000015class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010016 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000017
Senthil Kumaranbd8f1452010-12-15 04:02:45 +000018 TIMEOUT = 30.0
Skip Montanaro89feabc2003-03-30 04:54:24 +000019
20 def setUp(self):
21 socket.setdefaulttimeout(self.TIMEOUT)
22
23 def tearDown(self):
24 socket.setdefaulttimeout(None)
25
26 def testURLread(self):
Ned Deily5a507f02014-03-26 23:31:39 -070027 with support.transient_internet("www.example.com"):
28 f = urllib.request.urlopen("http://www.example.com/")
Senthil Kumaran1bd7d292017-05-15 23:08:07 -070029 f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000030
Antoine Pitroua98d26a2011-05-22 17:35:17 +020031
Brett Cannona71319e2003-05-14 02:18:31 +000032class urlopenNetworkTests(unittest.TestCase):
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -070033 """Tests urllib.request.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000034
Brett Cannona71319e2003-05-14 02:18:31 +000035 These tests are not exhaustive. Assuming that testing using files does a
36 good job overall of some of the basic interface features. There are no
37 tests exercising the optional 'data' and 'proxies' arguments. No tests
38 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000039
Brett Cannona71319e2003-05-14 02:18:31 +000040 setUp is not used for always constructing a connection to
Berker Peksaga40b0ef2016-05-07 16:37:09 +030041 http://www.pythontest.net/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000042 and making a connection is expensive enough to warrant minimizing unneeded
43 connections.
Tim Peters813cec92003-05-16 15:35:10 +000044
Brett Cannona71319e2003-05-14 02:18:31 +000045 """
46
Berker Peksaga40b0ef2016-05-07 16:37:09 +030047 url = 'http://www.pythontest.net/'
48
Antoine Pitroua98d26a2011-05-22 17:35:17 +020049 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000050 def urlopen(self, *args, **kwargs):
51 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +020052 with support.transient_internet(resource):
53 r = urllib.request.urlopen(*args, **kwargs)
54 try:
55 yield r
56 finally:
57 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000058
Brett Cannona71319e2003-05-14 02:18:31 +000059 def test_basic(self):
60 # Simple test expected to pass.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030061 with self.urlopen(self.url) as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +020062 for attr in ("read", "readline", "readlines", "fileno", "close",
63 "info", "geturl"):
64 self.assertTrue(hasattr(open_url, attr), "object returned from "
65 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000066 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000067
68 def test_readlines(self):
69 # Test both readline and readlines.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030070 with self.urlopen(self.url) as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000071 self.assertIsInstance(open_url.readline(), bytes,
72 "readline did not return a string")
73 self.assertIsInstance(open_url.readlines(), list,
74 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000075
76 def test_info(self):
77 # Test 'info'.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030078 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000079 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000080 self.assertIsInstance(info_obj, email.message.Message,
81 "object returned by 'info' is not an "
82 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000083 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000084
85 def test_geturl(self):
86 # Make sure same URL as opened is returned by geturl.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030087 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000088 gotten_url = open_url.geturl()
Berker Peksaga40b0ef2016-05-07 16:37:09 +030089 self.assertEqual(gotten_url, self.url)
Brett Cannona71319e2003-05-14 02:18:31 +000090
Christian Heimes9bd667a2008-01-20 15:14:11 +000091 def test_getcode(self):
92 # test getcode() with the fancy opener to get 404 error codes
Berker Peksaga40b0ef2016-05-07 16:37:09 +030093 URL = self.url + "XXXinvalidXXX"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020094 with support.transient_internet(URL):
R David Murray130a5662014-06-11 17:09:43 -040095 with self.assertWarns(DeprecationWarning):
96 open_url = urllib.request.FancyURLopener().open(URL)
Antoine Pitroua98d26a2011-05-22 17:35:17 +020097 try:
98 code = open_url.getcode()
99 finally:
100 open_url.close()
101 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000102
Brett Cannona71319e2003-05-14 02:18:31 +0000103 def test_bad_address(self):
104 # Make sure proper exception is raised when connecting to a bogus
105 # address.
Martin Pantera7f99332015-12-16 04:36:20 +0000106
107 # Given that both VeriSign and various ISPs have in
108 # the past or are presently hijacking various invalid
109 # domain name requests in an attempt to boost traffic
110 # to their own sites, finding a domain name to use
111 # for this test is difficult. RFC2606 leads one to
112 # believe that '.invalid' should work, but experience
113 # seemed to indicate otherwise. Single character
114 # TLDs are likely to remain invalid, so this seems to
115 # be the best choice. The trailing '.' prevents a
116 # related problem: The normal DNS resolver appends
117 # the domain names from the search path if there is
118 # no '.' the end and, and if one of those domains
119 # implements a '*' rule a result is returned.
120 # However, none of this will prevent the test from
121 # failing if the ISP hijacks all invalid domain
122 # requests. The real solution would be to be able to
123 # parameterize the framework with a mock resolver.
124 bogus_domain = "sadflkjsasf.i.nvali.d."
Antoine Pitrou72fff042011-07-08 19:19:57 +0200125 try:
126 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200127 except OSError:
128 # socket.gaierror is too narrow, since getaddrinfo() may also
129 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
130 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200131 pass
132 else:
133 # This happens with some overzealous DNS providers such as OpenDNS
134 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500135 failure_explanation = ('opening an invalid URL did not raise OSError; '
136 'can be caused by a broken DNS server '
137 '(e.g. returns 404 or hijacks page)')
138 with self.assertRaises(OSError, msg=failure_explanation):
Martin Pantera7f99332015-12-16 04:36:20 +0000139 urllib.request.urlopen("http://{}/".format(bogus_domain))
Brett Cannona71319e2003-05-14 02:18:31 +0000140
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200141
Brett Cannona71319e2003-05-14 02:18:31 +0000142class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000143 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000144
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200145 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800146 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000147 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200148 with support.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800149 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200150 try:
151 yield file_location, info
152 finally:
153 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000154
Brett Cannona71319e2003-05-14 02:18:31 +0000155 def test_basic(self):
156 # Test basic functionality.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300157 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200158 self.assertTrue(os.path.exists(file_location), "file location returned by"
159 " urlretrieve is not a valid path")
Benjamin Petersona96ed632014-02-19 23:06:41 -0500160 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200161 self.assertTrue(f.read(), "reading from the file location returned"
162 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000163
164 def test_specified_path(self):
165 # Make sure that specifying the location of the file to write to works.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300166 with self.urlretrieve(self.logo,
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200167 support.TESTFN) as (file_location, info):
168 self.assertEqual(file_location, support.TESTFN)
169 self.assertTrue(os.path.exists(file_location))
Benjamin Petersona96ed632014-02-19 23:06:41 -0500170 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200171 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000172
173 def test_header(self):
174 # Make sure header returned as 2nd value from urlretrieve is good.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300175 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200176 self.assertIsInstance(info, email.message.Message,
177 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000178
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300179 logo = "http://www.pythontest.net/"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800180
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000181 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800182 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200183 datevalue = fileheaders.get('Date')
184 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
185 try:
186 time.strptime(datevalue, dateformat)
187 except ValueError:
Berker Peksagad1fd342016-05-07 16:58:41 +0300188 self.fail('Date value not in %r format' % dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000189
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800190 def test_reporthook(self):
191 records = []
Senthil Kumaran1bd7d292017-05-15 23:08:07 -0700192
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800193 def recording_reporthook(blocks, block_size, total_size):
194 records.append((blocks, block_size, total_size))
195
196 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
197 file_location, fileheaders):
198 expected_size = int(fileheaders['Content-Length'])
199
200 records_repr = repr(records) # For use in error messages.
201 self.assertGreater(len(records), 1, msg="There should always be two "
202 "calls; the first one before the transfer starts.")
203 self.assertEqual(records[0][0], 0)
204 self.assertGreater(records[0][1], 0,
205 msg="block size can't be 0 in %s" % records_repr)
206 self.assertEqual(records[0][2], expected_size)
207 self.assertEqual(records[-1][2], expected_size)
208
209 block_sizes = {block_size for _, block_size, _ in records}
210 self.assertEqual({records[0][1]}, block_sizes,
211 msg="block sizes in %s must be equal" % records_repr)
212 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
213 msg="number of blocks * block size must be"
214 " >= total size in %s" % records_repr)
215
Brett Cannona71319e2003-05-14 02:18:31 +0000216
Skip Montanaro89feabc2003-03-30 04:54:24 +0000217if __name__ == "__main__":
Senthil Kumarancfdd0162014-04-14 21:31:41 -0400218 unittest.main()