blob: 949716c2b567e0008ba25266aea0060b1a2dc3fb [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00003
Antoine Pitroua98d26a2011-05-22 17:35:17 +02004import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00005import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import urllib.request
Brett Cannona71319e2003-05-14 02:18:31 +00007import os
Barry Warsaw820c1202008-06-12 04:06:45 +00008import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +00009import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000010
Christian Heimesaf98da12008-01-27 15:18:18 +000011
Senthil Kumarancfdd0162014-04-14 21:31:41 -040012support.requires('network')
13
Skip Montanaro89feabc2003-03-30 04:54:24 +000014class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010015 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000016
Senthil Kumaranbd8f1452010-12-15 04:02:45 +000017 TIMEOUT = 30.0
Skip Montanaro89feabc2003-03-30 04:54:24 +000018
19 def setUp(self):
20 socket.setdefaulttimeout(self.TIMEOUT)
21
22 def tearDown(self):
23 socket.setdefaulttimeout(None)
24
25 def testURLread(self):
Ned Deily5a507f02014-03-26 23:31:39 -070026 with support.transient_internet("www.example.com"):
27 f = urllib.request.urlopen("http://www.example.com/")
Antoine Pitroud9faa202011-03-26 18:38:06 +010028 x = f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000029
Antoine Pitroua98d26a2011-05-22 17:35:17 +020030
Brett Cannona71319e2003-05-14 02:18:31 +000031class urlopenNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 """Tests urllib.reqest.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000033
Brett Cannona71319e2003-05-14 02:18:31 +000034 These tests are not exhaustive. Assuming that testing using files does a
35 good job overall of some of the basic interface features. There are no
36 tests exercising the optional 'data' and 'proxies' arguments. No tests
37 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000038
Brett Cannona71319e2003-05-14 02:18:31 +000039 setUp is not used for always constructing a connection to
Berker Peksaga40b0ef2016-05-07 16:37:09 +030040 http://www.pythontest.net/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000041 and making a connection is expensive enough to warrant minimizing unneeded
42 connections.
Tim Peters813cec92003-05-16 15:35:10 +000043
Brett Cannona71319e2003-05-14 02:18:31 +000044 """
45
Berker Peksaga40b0ef2016-05-07 16:37:09 +030046 url = 'http://www.pythontest.net/'
47
Antoine Pitroua98d26a2011-05-22 17:35:17 +020048 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000049 def urlopen(self, *args, **kwargs):
50 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +020051 with support.transient_internet(resource):
52 r = urllib.request.urlopen(*args, **kwargs)
53 try:
54 yield r
55 finally:
56 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000057
Brett Cannona71319e2003-05-14 02:18:31 +000058 def test_basic(self):
59 # Simple test expected to pass.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030060 with self.urlopen(self.url) as open_url:
Antoine Pitroua98d26a2011-05-22 17:35:17 +020061 for attr in ("read", "readline", "readlines", "fileno", "close",
62 "info", "geturl"):
63 self.assertTrue(hasattr(open_url, attr), "object returned from "
64 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000065 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000066
67 def test_readlines(self):
68 # Test both readline and readlines.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030069 with self.urlopen(self.url) as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000070 self.assertIsInstance(open_url.readline(), bytes,
71 "readline did not return a string")
72 self.assertIsInstance(open_url.readlines(), list,
73 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000074
75 def test_info(self):
76 # Test 'info'.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030077 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000078 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000079 self.assertIsInstance(info_obj, email.message.Message,
80 "object returned by 'info' is not an "
81 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000082 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000083
84 def test_geturl(self):
85 # Make sure same URL as opened is returned by geturl.
Berker Peksaga40b0ef2016-05-07 16:37:09 +030086 with self.urlopen(self.url) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000087 gotten_url = open_url.geturl()
Berker Peksaga40b0ef2016-05-07 16:37:09 +030088 self.assertEqual(gotten_url, self.url)
Brett Cannona71319e2003-05-14 02:18:31 +000089
Christian Heimes9bd667a2008-01-20 15:14:11 +000090 def test_getcode(self):
91 # test getcode() with the fancy opener to get 404 error codes
Berker Peksaga40b0ef2016-05-07 16:37:09 +030092 URL = self.url + "XXXinvalidXXX"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020093 with support.transient_internet(URL):
R David Murray130a5662014-06-11 17:09:43 -040094 with self.assertWarns(DeprecationWarning):
95 open_url = urllib.request.FancyURLopener().open(URL)
Antoine Pitroua98d26a2011-05-22 17:35:17 +020096 try:
97 code = open_url.getcode()
98 finally:
99 open_url.close()
100 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000101
Brett Cannona71319e2003-05-14 02:18:31 +0000102 def test_bad_address(self):
103 # Make sure proper exception is raised when connecting to a bogus
104 # address.
Martin Pantera7f99332015-12-16 04:36:20 +0000105
106 # Given that both VeriSign and various ISPs have in
107 # the past or are presently hijacking various invalid
108 # domain name requests in an attempt to boost traffic
109 # to their own sites, finding a domain name to use
110 # for this test is difficult. RFC2606 leads one to
111 # believe that '.invalid' should work, but experience
112 # seemed to indicate otherwise. Single character
113 # TLDs are likely to remain invalid, so this seems to
114 # be the best choice. The trailing '.' prevents a
115 # related problem: The normal DNS resolver appends
116 # the domain names from the search path if there is
117 # no '.' the end and, and if one of those domains
118 # implements a '*' rule a result is returned.
119 # However, none of this will prevent the test from
120 # failing if the ISP hijacks all invalid domain
121 # requests. The real solution would be to be able to
122 # parameterize the framework with a mock resolver.
123 bogus_domain = "sadflkjsasf.i.nvali.d."
Antoine Pitrou72fff042011-07-08 19:19:57 +0200124 try:
125 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200126 except OSError:
127 # socket.gaierror is too narrow, since getaddrinfo() may also
128 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
129 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200130 pass
131 else:
132 # This happens with some overzealous DNS providers such as OpenDNS
133 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500134 failure_explanation = ('opening an invalid URL did not raise OSError; '
135 'can be caused by a broken DNS server '
136 '(e.g. returns 404 or hijacks page)')
137 with self.assertRaises(OSError, msg=failure_explanation):
Martin Pantera7f99332015-12-16 04:36:20 +0000138 urllib.request.urlopen("http://{}/".format(bogus_domain))
Brett Cannona71319e2003-05-14 02:18:31 +0000139
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200140
Brett Cannona71319e2003-05-14 02:18:31 +0000141class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000142 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000143
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200144 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800145 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000146 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200147 with support.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800148 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200149 try:
150 yield file_location, info
151 finally:
152 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000153
Brett Cannona71319e2003-05-14 02:18:31 +0000154 def test_basic(self):
155 # Test basic functionality.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300156 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200157 self.assertTrue(os.path.exists(file_location), "file location returned by"
158 " urlretrieve is not a valid path")
Benjamin Petersona96ed632014-02-19 23:06:41 -0500159 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200160 self.assertTrue(f.read(), "reading from the file location returned"
161 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000162
163 def test_specified_path(self):
164 # Make sure that specifying the location of the file to write to works.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300165 with self.urlretrieve(self.logo,
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200166 support.TESTFN) as (file_location, info):
167 self.assertEqual(file_location, support.TESTFN)
168 self.assertTrue(os.path.exists(file_location))
Benjamin Petersona96ed632014-02-19 23:06:41 -0500169 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200170 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000171
172 def test_header(self):
173 # Make sure header returned as 2nd value from urlretrieve is good.
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300174 with self.urlretrieve(self.logo) as (file_location, info):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200175 self.assertIsInstance(info, email.message.Message,
176 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000177
Berker Peksaga40b0ef2016-05-07 16:37:09 +0300178 logo = "http://www.pythontest.net/"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800179
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000180 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800181 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200182 datevalue = fileheaders.get('Date')
183 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
184 try:
185 time.strptime(datevalue, dateformat)
186 except ValueError:
Berker Peksagad1fd342016-05-07 16:58:41 +0300187 self.fail('Date value not in %r format' % dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000188
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800189 def test_reporthook(self):
190 records = []
191 def recording_reporthook(blocks, block_size, total_size):
192 records.append((blocks, block_size, total_size))
193
194 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
195 file_location, fileheaders):
196 expected_size = int(fileheaders['Content-Length'])
197
198 records_repr = repr(records) # For use in error messages.
199 self.assertGreater(len(records), 1, msg="There should always be two "
200 "calls; the first one before the transfer starts.")
201 self.assertEqual(records[0][0], 0)
202 self.assertGreater(records[0][1], 0,
203 msg="block size can't be 0 in %s" % records_repr)
204 self.assertEqual(records[0][2], expected_size)
205 self.assertEqual(records[-1][2], expected_size)
206
207 block_sizes = {block_size for _, block_size, _ in records}
208 self.assertEqual({records[0][1]}, block_sizes,
209 msg="block sizes in %s must be equal" % records_repr)
210 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
211 msg="number of blocks * block size must be"
212 " >= total size in %s" % records_repr)
213
Brett Cannona71319e2003-05-14 02:18:31 +0000214
Skip Montanaro89feabc2003-03-30 04:54:24 +0000215if __name__ == "__main__":
Senthil Kumarancfdd0162014-04-14 21:31:41 -0400216 unittest.main()