blob: 38afb69250c68e8180031ac9a155752891e9967a [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00003
Antoine Pitroua98d26a2011-05-22 17:35:17 +02004import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00005import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00006import urllib.request
Skip Montanaro89feabc2003-03-30 04:54:24 +00007import sys
Brett Cannona71319e2003-05-14 02:18:31 +00008import os
Barry Warsaw820c1202008-06-12 04:06:45 +00009import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000010import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000011
Christian Heimesaf98da12008-01-27 15:18:18 +000012
Skip Montanaro89feabc2003-03-30 04:54:24 +000013class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010014 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000015
Senthil Kumaranbd8f1452010-12-15 04:02:45 +000016 TIMEOUT = 30.0
Skip Montanaro89feabc2003-03-30 04:54:24 +000017
18 def setUp(self):
19 socket.setdefaulttimeout(self.TIMEOUT)
20
21 def tearDown(self):
22 socket.setdefaulttimeout(None)
23
24 def testURLread(self):
Senthil Kumaranee2538b2010-10-17 10:52:12 +000025 with support.transient_internet("www.python.org"):
26 f = urllib.request.urlopen("http://www.python.org/")
Antoine Pitroud9faa202011-03-26 18:38:06 +010027 x = f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000028
Antoine Pitroua98d26a2011-05-22 17:35:17 +020029
Brett Cannona71319e2003-05-14 02:18:31 +000030class urlopenNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000031 """Tests urllib.reqest.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000032
Brett Cannona71319e2003-05-14 02:18:31 +000033 These tests are not exhaustive. Assuming that testing using files does a
34 good job overall of some of the basic interface features. There are no
35 tests exercising the optional 'data' and 'proxies' arguments. No tests
36 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000037
Brett Cannona71319e2003-05-14 02:18:31 +000038 setUp is not used for always constructing a connection to
39 http://www.python.org/ since there a few tests that don't use that address
40 and making a connection is expensive enough to warrant minimizing unneeded
41 connections.
Tim Peters813cec92003-05-16 15:35:10 +000042
Brett Cannona71319e2003-05-14 02:18:31 +000043 """
44
Antoine Pitroua98d26a2011-05-22 17:35:17 +020045 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000046 def urlopen(self, *args, **kwargs):
47 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +020048 with support.transient_internet(resource):
49 r = urllib.request.urlopen(*args, **kwargs)
50 try:
51 yield r
52 finally:
53 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000054
Brett Cannona71319e2003-05-14 02:18:31 +000055 def test_basic(self):
56 # Simple test expected to pass.
Antoine Pitroua98d26a2011-05-22 17:35:17 +020057 with self.urlopen("http://www.python.org/") as open_url:
58 for attr in ("read", "readline", "readlines", "fileno", "close",
59 "info", "geturl"):
60 self.assertTrue(hasattr(open_url, attr), "object returned from "
61 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000062 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000063
64 def test_readlines(self):
65 # Test both readline and readlines.
Antoine Pitroua98d26a2011-05-22 17:35:17 +020066 with self.urlopen("http://www.python.org/") as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000067 self.assertIsInstance(open_url.readline(), bytes,
68 "readline did not return a string")
69 self.assertIsInstance(open_url.readlines(), list,
70 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000071
72 def test_info(self):
73 # Test 'info'.
Antoine Pitroua98d26a2011-05-22 17:35:17 +020074 with self.urlopen("http://www.python.org/") as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000075 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000076 self.assertIsInstance(info_obj, email.message.Message,
77 "object returned by 'info' is not an "
78 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000079 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000080
81 def test_geturl(self):
82 # Make sure same URL as opened is returned by geturl.
83 URL = "http://www.python.org/"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020084 with self.urlopen(URL) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000085 gotten_url = open_url.geturl()
Antoine Pitroua98d26a2011-05-22 17:35:17 +020086 self.assertEqual(gotten_url, URL)
Brett Cannona71319e2003-05-14 02:18:31 +000087
Christian Heimes9bd667a2008-01-20 15:14:11 +000088 def test_getcode(self):
89 # test getcode() with the fancy opener to get 404 error codes
90 URL = "http://www.python.org/XXXinvalidXXX"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020091 with support.transient_internet(URL):
92 open_url = urllib.request.FancyURLopener().open(URL)
93 try:
94 code = open_url.getcode()
95 finally:
96 open_url.close()
97 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +000098
Zachary Ware9fe6d862013-12-08 00:20:35 -060099 # On Windows, socket handles are not file descriptors; this
100 # test can't pass on Windows.
101 @unittest.skipIf(sys.platform in ('win32',), 'not appropriate for Windows')
Brett Cannona71319e2003-05-14 02:18:31 +0000102 def test_fileno(self):
103 # Make sure fd returned by fileno is valid.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200104 with self.urlopen("http://www.python.org/", timeout=None) as open_url:
105 fd = open_url.fileno()
Benjamin Petersonf6218a22014-02-19 22:56:35 -0500106 with os.fdopen(fd, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200107 self.assertTrue(f.read(), "reading from file created using fd "
108 "returned by fileno failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000109
110 def test_bad_address(self):
111 # Make sure proper exception is raised when connecting to a bogus
112 # address.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200113 bogus_domain = "sadflkjsasf.i.nvali.d"
114 try:
115 socket.gethostbyname(bogus_domain)
Antoine Pitrou6b5a38c2013-05-25 13:08:13 +0200116 except OSError:
117 # socket.gaierror is too narrow, since getaddrinfo() may also
118 # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04),
119 # i.e. Python's TimeoutError.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200120 pass
121 else:
122 # This happens with some overzealous DNS providers such as OpenDNS
123 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannona71319e2003-05-14 02:18:31 +0000124 self.assertRaises(IOError,
Tim Peters0aab0022003-09-20 22:16:26 +0000125 # SF patch 809915: In Sep 2003, VeriSign started
126 # highjacking invalid .com and .net addresses to
127 # boost traffic to their own site. This test
128 # started failing then. One hopes the .invalid
129 # domain will be spared to serve its defined
130 # purpose.
131 # urllib.urlopen, "http://www.sadflkjsasadf.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000132 urllib.request.urlopen,
Antoine Pitrou8fd33d32008-12-15 13:08:55 +0000133 "http://sadflkjsasf.i.nvali.d/")
Brett Cannona71319e2003-05-14 02:18:31 +0000134
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200135
Brett Cannona71319e2003-05-14 02:18:31 +0000136class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000137 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000138
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200139 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800140 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000141 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200142 with support.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800143 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200144 try:
145 yield file_location, info
146 finally:
147 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000148
Brett Cannona71319e2003-05-14 02:18:31 +0000149 def test_basic(self):
150 # Test basic functionality.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200151 with self.urlretrieve("http://www.python.org/") as (file_location, info):
152 self.assertTrue(os.path.exists(file_location), "file location returned by"
153 " urlretrieve is not a valid path")
Benjamin Petersonf6218a22014-02-19 22:56:35 -0500154 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200155 self.assertTrue(f.read(), "reading from the file location returned"
156 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000157
158 def test_specified_path(self):
159 # Make sure that specifying the location of the file to write to works.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200160 with self.urlretrieve("http://www.python.org/",
161 support.TESTFN) as (file_location, info):
162 self.assertEqual(file_location, support.TESTFN)
163 self.assertTrue(os.path.exists(file_location))
Benjamin Petersonf6218a22014-02-19 22:56:35 -0500164 with open(file_location, 'rb') as f:
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200165 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000166
167 def test_header(self):
168 # Make sure header returned as 2nd value from urlretrieve is good.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200169 with self.urlretrieve("http://www.python.org/") as (file_location, info):
170 self.assertIsInstance(info, email.message.Message,
171 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000172
Benjamin Petersona96fd482014-02-19 23:06:24 -0500173 logo = "http://www.python.org/static/community_logos/python-logo-master-v3-TM.png"
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800174
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000175 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800176 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200177 datevalue = fileheaders.get('Date')
178 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
179 try:
180 time.strptime(datevalue, dateformat)
181 except ValueError:
182 self.fail('Date value not in %r format', dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000183
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800184 def test_reporthook(self):
185 records = []
186 def recording_reporthook(blocks, block_size, total_size):
187 records.append((blocks, block_size, total_size))
188
189 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
190 file_location, fileheaders):
191 expected_size = int(fileheaders['Content-Length'])
192
193 records_repr = repr(records) # For use in error messages.
194 self.assertGreater(len(records), 1, msg="There should always be two "
195 "calls; the first one before the transfer starts.")
196 self.assertEqual(records[0][0], 0)
197 self.assertGreater(records[0][1], 0,
198 msg="block size can't be 0 in %s" % records_repr)
199 self.assertEqual(records[0][2], expected_size)
200 self.assertEqual(records[-1][2], expected_size)
201
202 block_sizes = {block_size for _, block_size, _ in records}
203 self.assertEqual({records[0][1]}, block_sizes,
204 msg="block sizes in %s must be equal" % records_repr)
205 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
206 msg="number of blocks * block size must be"
207 " >= total size in %s" % records_repr)
208
Brett Cannona71319e2003-05-14 02:18:31 +0000209
Skip Montanaro89feabc2003-03-30 04:54:24 +0000210def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000211 support.requires('network')
212 support.run_unittest(URLTimeoutTest,
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200213 urlopenNetworkTests,
214 urlretrieveNetworkTests)
Skip Montanaro89feabc2003-03-30 04:54:24 +0000215
216if __name__ == "__main__":
217 test_main()