blob: b9b926265c839d6968d3b7f0a911c15e6c5da8ac [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00003
Antoine Pitroua98d26a2011-05-22 17:35:17 +02004import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00005import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import urllib.request
Skip Montanaro89feabc2003-03-30 04:54:24 +00007import sys
Brett Cannona71319e2003-05-14 02:18:31 +00008import os
Barry Warsaw820c1202008-06-12 04:06:45 +00009import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000010import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000011
Christian Heimesaf98da12008-01-27 15:18:18 +000012
Senthil Kumarancfdd0162014-04-14 21:31:41 -040013support.requires('network')
14
Skip Montanaro89feabc2003-03-30 04:54:24 +000015class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010016 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000017
Senthil Kumaranbd8f1452010-12-15 04:02:45 +000018 TIMEOUT = 30.0
Skip Montanaro89feabc2003-03-30 04:54:24 +000019
20 def setUp(self):
21 socket.setdefaulttimeout(self.TIMEOUT)
22
23 def tearDown(self):
24 socket.setdefaulttimeout(None)
25
26 def testURLread(self):
Ned Deily5a507f02014-03-26 23:31:39 -070027 with support.transient_internet("www.example.com"):
28 f = urllib.request.urlopen("http://www.example.com/")
Antoine Pitroud9faa202011-03-26 18:38:06 +010029 x = f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000030
Antoine Pitroua98d26a2011-05-22 17:35:17 +020031
Brett Cannona71319e2003-05-14 02:18:31 +000032class urlopenNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000033 """Tests urllib.reqest.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000034
Brett Cannona71319e2003-05-14 02:18:31 +000035 These tests are not exhaustive. Assuming that testing using files does a
36 good job overall of some of the basic interface features. There are no
37 tests exercising the optional 'data' and 'proxies' arguments. No tests
38 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000039
Brett Cannona71319e2003-05-14 02:18:31 +000040 setUp is not used for always constructing a connection to
Ned Deily5a507f02014-03-26 23:31:39 -070041 http://www.example.com/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000042 and making a connection is expensive enough to warrant minimizing unneeded
43 connections.
Tim Peters813cec92003-05-16 15:35:10 +000044
Brett Cannona71319e2003-05-14 02:18:31 +000045 """
46
Antoine Pitroua98d26a2011-05-22 17:35:17 +020047 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000048 def urlopen(self, *args, **kwargs):
49 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +020050 with support.transient_internet(resource):
51 r = urllib.request.urlopen(*args, **kwargs)
52 try:
53 yield r
54 finally:
55 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000056
Brett Cannona71319e2003-05-14 02:18:31 +000057 def test_basic(self):
58 # Simple test expected to pass.
Ned Deily5a507f02014-03-26 23:31:39 -070059 with self.urlopen("http://www.example.com/") as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +020060 for attr in ("read", "readline", "readlines", "fileno", "close",
61 "info", "geturl"):
62 self.assertTrue(hasattr(open_url, attr), "object returned from "
63 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000064 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000065
66 def test_readlines(self):
67 # Test both readline and readlines.
Ned Deily5a507f02014-03-26 23:31:39 -070068 with self.urlopen("http://www.example.com/") as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000069 self.assertIsInstance(open_url.readline(), bytes,
70 "readline did not return a string")
71 self.assertIsInstance(open_url.readlines(), list,
72 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000073
74 def test_info(self):
75 # Test 'info'.
Ned Deily5a507f02014-03-26 23:31:39 -070076 with self.urlopen("http://www.example.com/") as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000077 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000078 self.assertIsInstance(info_obj, email.message.Message,
79 "object returned by 'info' is not an "
80 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000081 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000082
83 def test_geturl(self):
84 # Make sure same URL as opened is returned by geturl.
Ned Deily5a507f02014-03-26 23:31:39 -070085 URL = "http://www.example.com/"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020086 with self.urlopen(URL) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000087 gotten_url = open_url.geturl()
Antoine Pitroua98d26a2011-05-22 17:35:17 +020088 self.assertEqual(gotten_url, URL)
Brett Cannona71319e2003-05-14 02:18:31 +000089
Christian Heimes9bd667a2008-01-20 15:14:11 +000090 def test_getcode(self):
91 # test getcode() with the fancy opener to get 404 error codes
Ned Deily5a507f02014-03-26 23:31:39 -070092 URL = "http://www.example.com/XXXinvalidXXX"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020093 with support.transient_internet(URL):
R David Murray130a5662014-06-11 17:09:43 -040094 with self.assertWarns(DeprecationWarning):
95 open_url = urllib.request.FancyURLopener().open(URL)
Antoine Pitroua98d26a2011-05-22 17:35:17 +020096 try:
97 code = open_url.getcode()
98 finally:
99 open_url.close()
100 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000101
Zachary Ware9fe6d862013-12-08 00:20:35 -0600102 # On Windows, socket handles are not file descriptors; this
103 # test can't pass on Windows.
104 @unittest.skipIf(sys.platform in ('win32',), 'not appropriate for Windows')
Brett Cannona71319e2003-05-14 02:18:31 +0000105 def test_fileno(self):
106 # Make sure fd returned by fileno is valid.
Ned Deily223082f2014-03-27 01:39:28 -0700107 with self.urlopen("http://www.google.com/", timeout=None) as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200108 fd = open_url.fileno()
Benjamin Petersona96ed632014-02-19 23:06:41 -0500109 with os.fdopen(fd, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200110 self.assertTrue(f.read(), "reading from file created using fd "
111 "returned by fileno failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000112
113 def test_bad_address(self):
114 # Make sure proper exception is raised when connecting to a bogus
115 # address.
Martin Pantera7f99332015-12-16 04:36:20 +0000116
117 # Given that both VeriSign and various ISPs have in
118 # the past or are presently hijacking various invalid
119 # domain name requests in an attempt to boost traffic
120 # to their own sites, finding a domain name to use
121 # for this test is difficult. RFC2606 leads one to
122 # believe that '.invalid' should work, but experience
123 # seemed to indicate otherwise. Single character
124 # TLDs are likely to remain invalid, so this seems to
125 # be the best choice. The trailing '.' prevents a
126 # related problem: The normal DNS resolver appends
127 # the domain names from the search path if there is
128 # no '.' the end and, and if one of those domains
129 # implements a '*' rule a result is returned.
130 # However, none of this will prevent the test from
131 # failing if the ISP hijacks all invalid domain
132 # requests. The real solution would be to be able to
133 # parameterize the framework with a mock resolver.
134 bogus_domain = "sadflkjsasf.i.nvali.d."
Antoine Pitrou72fff042011-07-08 19:19:57 +0200135 try:
136 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200137 except OSError:
138 # socket.gaierror is too narrow, since getaddrinfo() may also
139 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
140 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200141 pass
142 else:
143 # This happens with some overzealous DNS providers such as OpenDNS
144 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500145 failure_explanation = ('opening an invalid URL did not raise OSError; '
146 'can be caused by a broken DNS server '
147 '(e.g. returns 404 or hijacks page)')
148 with self.assertRaises(OSError, msg=failure_explanation):
Martin Pantera7f99332015-12-16 04:36:20 +0000149 urllib.request.urlopen("http://{}/".format(bogus_domain))
Brett Cannona71319e2003-05-14 02:18:31 +0000150
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200151
Brett Cannona71319e2003-05-14 02:18:31 +0000152class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000153 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000154
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200155 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800156 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000157 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200158 with support.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800159 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200160 try:
161 yield file_location, info
162 finally:
163 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000164
Brett Cannona71319e2003-05-14 02:18:31 +0000165 def test_basic(self):
166 # Test basic functionality.
Ned Deily5a507f02014-03-26 23:31:39 -0700167 with self.urlretrieve("http://www.example.com/") as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200168 self.assertTrue(os.path.exists(file_location), "file location returned by"
169 " urlretrieve is not a valid path")
Benjamin Petersona96ed632014-02-19 23:06:41 -0500170 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200171 self.assertTrue(f.read(), "reading from the file location returned"
172 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000173
174 def test_specified_path(self):
175 # Make sure that specifying the location of the file to write to works.
Ned Deily5a507f02014-03-26 23:31:39 -0700176 with self.urlretrieve("http://www.example.com/",
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200177 support.TESTFN) as (file_location, info):
178 self.assertEqual(file_location, support.TESTFN)
179 self.assertTrue(os.path.exists(file_location))
Benjamin Petersona96ed632014-02-19 23:06:41 -0500180 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200181 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000182
183 def test_header(self):
184 # Make sure header returned as 2nd value from urlretrieve is good.
Ned Deily5a507f02014-03-26 23:31:39 -0700185 with self.urlretrieve("http://www.example.com/") as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200186 self.assertIsInstance(info, email.message.Message,
187 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000188
Ned Deily5a507f02014-03-26 23:31:39 -0700189 logo = "http://www.example.com/"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800190
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000191 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800192 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200193 datevalue = fileheaders.get('Date')
194 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
195 try:
196 time.strptime(datevalue, dateformat)
197 except ValueError:
198 self.fail('Date value not in %r format', dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000199
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800200 def test_reporthook(self):
201 records = []
202 def recording_reporthook(blocks, block_size, total_size):
203 records.append((blocks, block_size, total_size))
204
205 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
206 file_location, fileheaders):
207 expected_size = int(fileheaders['Content-Length'])
208
209 records_repr = repr(records) # For use in error messages.
210 self.assertGreater(len(records), 1, msg="There should always be two "
211 "calls; the first one before the transfer starts.")
212 self.assertEqual(records[0][0], 0)
213 self.assertGreater(records[0][1], 0,
214 msg="block size can't be 0 in %s" % records_repr)
215 self.assertEqual(records[0][2], expected_size)
216 self.assertEqual(records[-1][2], expected_size)
217
218 block_sizes = {block_size for _, block_size, _ in records}
219 self.assertEqual({records[0][1]}, block_sizes,
220 msg="block sizes in %s must be equal" % records_repr)
221 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
222 msg="number of blocks * block size must be"
223 " >= total size in %s" % records_repr)
224
Brett Cannona71319e2003-05-14 02:18:31 +0000225
Skip Montanaro89feabc2003-03-30 04:54:24 +0000226if __name__ == "__main__":
Senthil Kumarancfdd0162014-04-14 21:31:41 -0400227 unittest.main()