blob: 896d7c93e3cc1fd32eeb107c95bb45a2979eb7e4 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Skip Montanaro89feabc2003-03-30 04:54:24 +00002
3import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00004from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00005
Antoine Pitroua98d26a2011-05-22 17:35:17 +02006import contextlib
Skip Montanaro89feabc2003-03-30 04:54:24 +00007import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00008import urllib.request
Skip Montanaro89feabc2003-03-30 04:54:24 +00009import sys
Brett Cannona71319e2003-05-14 02:18:31 +000010import os
Barry Warsaw820c1202008-06-12 04:06:45 +000011import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000012import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000013
Christian Heimesaf98da12008-01-27 15:18:18 +000014
Skip Montanaro89feabc2003-03-30 04:54:24 +000015class URLTimeoutTest(unittest.TestCase):
Antoine Pitroud9faa202011-03-26 18:38:06 +010016 # XXX this test doesn't seem to test anything useful.
Skip Montanaro89feabc2003-03-30 04:54:24 +000017
Senthil Kumaranbd8f1452010-12-15 04:02:45 +000018 TIMEOUT = 30.0
Skip Montanaro89feabc2003-03-30 04:54:24 +000019
20 def setUp(self):
21 socket.setdefaulttimeout(self.TIMEOUT)
22
23 def tearDown(self):
24 socket.setdefaulttimeout(None)
25
26 def testURLread(self):
Senthil Kumaranee2538b2010-10-17 10:52:12 +000027 with support.transient_internet("www.python.org"):
28 f = urllib.request.urlopen("http://www.python.org/")
Antoine Pitroud9faa202011-03-26 18:38:06 +010029 x = f.read()
Skip Montanaro89feabc2003-03-30 04:54:24 +000030
Antoine Pitroua98d26a2011-05-22 17:35:17 +020031
Brett Cannona71319e2003-05-14 02:18:31 +000032class urlopenNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000033 """Tests urllib.reqest.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000034
Brett Cannona71319e2003-05-14 02:18:31 +000035 These tests are not exhaustive. Assuming that testing using files does a
36 good job overall of some of the basic interface features. There are no
37 tests exercising the optional 'data' and 'proxies' arguments. No tests
38 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000039
Brett Cannona71319e2003-05-14 02:18:31 +000040 setUp is not used for always constructing a connection to
41 http://www.python.org/ since there a few tests that don't use that address
42 and making a connection is expensive enough to warrant minimizing unneeded
43 connections.
Tim Peters813cec92003-05-16 15:35:10 +000044
Brett Cannona71319e2003-05-14 02:18:31 +000045 """
46
Antoine Pitroua98d26a2011-05-22 17:35:17 +020047 @contextlib.contextmanager
Senthil Kumaranee2538b2010-10-17 10:52:12 +000048 def urlopen(self, *args, **kwargs):
49 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +020050 with support.transient_internet(resource):
51 r = urllib.request.urlopen(*args, **kwargs)
52 try:
53 yield r
54 finally:
55 r.close()
Christian Heimesaf98da12008-01-27 15:18:18 +000056
Brett Cannona71319e2003-05-14 02:18:31 +000057 def test_basic(self):
58 # Simple test expected to pass.
Antoine Pitroua98d26a2011-05-22 17:35:17 +020059 with self.urlopen("http://www.python.org/") as open_url:
60 for attr in ("read", "readline", "readlines", "fileno", "close",
61 "info", "geturl"):
62 self.assertTrue(hasattr(open_url, attr), "object returned from "
63 "urlopen lacks the %s attribute" % attr)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000064 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000065
66 def test_readlines(self):
67 # Test both readline and readlines.
Antoine Pitroua98d26a2011-05-22 17:35:17 +020068 with self.urlopen("http://www.python.org/") as open_url:
Ezio Melottie9615932010-01-24 19:26:24 +000069 self.assertIsInstance(open_url.readline(), bytes,
70 "readline did not return a string")
71 self.assertIsInstance(open_url.readlines(), list,
72 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000073
74 def test_info(self):
75 # Test 'info'.
Antoine Pitroua98d26a2011-05-22 17:35:17 +020076 with self.urlopen("http://www.python.org/") as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000077 info_obj = open_url.info()
Ezio Melottie9615932010-01-24 19:26:24 +000078 self.assertIsInstance(info_obj, email.message.Message,
79 "object returned by 'info' is not an "
80 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000081 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000082
83 def test_geturl(self):
84 # Make sure same URL as opened is returned by geturl.
85 URL = "http://www.python.org/"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020086 with self.urlopen(URL) as open_url:
Brett Cannona71319e2003-05-14 02:18:31 +000087 gotten_url = open_url.geturl()
Antoine Pitroua98d26a2011-05-22 17:35:17 +020088 self.assertEqual(gotten_url, URL)
Brett Cannona71319e2003-05-14 02:18:31 +000089
Christian Heimes9bd667a2008-01-20 15:14:11 +000090 def test_getcode(self):
91 # test getcode() with the fancy opener to get 404 error codes
92 URL = "http://www.python.org/XXXinvalidXXX"
Antoine Pitroua98d26a2011-05-22 17:35:17 +020093 with support.transient_internet(URL):
94 open_url = urllib.request.FancyURLopener().open(URL)
95 try:
96 code = open_url.getcode()
97 finally:
98 open_url.close()
99 self.assertEqual(code, 404)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000100
Brett Cannona71319e2003-05-14 02:18:31 +0000101 def test_fileno(self):
Amaury Forgeot d'Arcbdbddf82008-08-01 00:06:49 +0000102 if sys.platform in ('win32',):
Tim Peters813cec92003-05-16 15:35:10 +0000103 # On Windows, socket handles are not file descriptors; this
104 # test can't pass on Windows.
105 return
Brett Cannona71319e2003-05-14 02:18:31 +0000106 # Make sure fd returned by fileno is valid.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200107 with self.urlopen("http://www.python.org/", timeout=None) as open_url:
108 fd = open_url.fileno()
109 with os.fdopen(fd, encoding='utf-8') as f:
110 self.assertTrue(f.read(), "reading from file created using fd "
111 "returned by fileno failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000112
113 def test_bad_address(self):
114 # Make sure proper exception is raised when connecting to a bogus
115 # address.
Antoine Pitrou72fff042011-07-08 19:19:57 +0200116 bogus_domain = "sadflkjsasf.i.nvali.d"
117 try:
118 socket.gethostbyname(bogus_domain)
119 except socket.gaierror:
120 pass
121 else:
122 # This happens with some overzealous DNS providers such as OpenDNS
123 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannonb463c482013-01-11 11:17:53 -0500124 failure_explanation = ('opening an invalid URL did not raise OSError; '
125 'can be caused by a broken DNS server '
126 '(e.g. returns 404 or hijacks page)')
127 with self.assertRaises(OSError, msg=failure_explanation):
128 # SF patch 809915: In Sep 2003, VeriSign started highjacking
129 # invalid .com and .net addresses to boost traffic to their own
130 # site. This test started failing then. One hopes the .invalid
131 # domain will be spared to serve its defined purpose.
132 urllib.request.urlopen("http://sadflkjsasf.i.nvali.d/")
Brett Cannona71319e2003-05-14 02:18:31 +0000133
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200134
Brett Cannona71319e2003-05-14 02:18:31 +0000135class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000137
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200138 @contextlib.contextmanager
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800139 def urlretrieve(self, *args, **kwargs):
Senthil Kumaranee2538b2010-10-17 10:52:12 +0000140 resource = args[0]
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200141 with support.transient_internet(resource):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800142 file_location, info = urllib.request.urlretrieve(*args, **kwargs)
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200143 try:
144 yield file_location, info
145 finally:
146 support.unlink(file_location)
Christian Heimesaf98da12008-01-27 15:18:18 +0000147
Brett Cannona71319e2003-05-14 02:18:31 +0000148 def test_basic(self):
149 # Test basic functionality.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200150 with self.urlretrieve("http://www.python.org/") as (file_location, info):
151 self.assertTrue(os.path.exists(file_location), "file location returned by"
152 " urlretrieve is not a valid path")
153 with open(file_location, encoding='utf-8') as f:
154 self.assertTrue(f.read(), "reading from the file location returned"
155 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000156
157 def test_specified_path(self):
158 # Make sure that specifying the location of the file to write to works.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200159 with self.urlretrieve("http://www.python.org/",
160 support.TESTFN) as (file_location, info):
161 self.assertEqual(file_location, support.TESTFN)
162 self.assertTrue(os.path.exists(file_location))
163 with open(file_location, encoding='utf-8') as f:
164 self.assertTrue(f.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000165
166 def test_header(self):
167 # Make sure header returned as 2nd value from urlretrieve is good.
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200168 with self.urlretrieve("http://www.python.org/") as (file_location, info):
169 self.assertIsInstance(info, email.message.Message,
170 "info is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000171
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800172 logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
173
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000174 def test_data_header(self):
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800175 with self.urlretrieve(self.logo) as (file_location, fileheaders):
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200176 datevalue = fileheaders.get('Date')
177 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
178 try:
179 time.strptime(datevalue, dateformat)
180 except ValueError:
181 self.fail('Date value not in %r format', dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000182
Gregory P. Smith6b0bdab2012-11-10 13:43:44 -0800183 def test_reporthook(self):
184 records = []
185 def recording_reporthook(blocks, block_size, total_size):
186 records.append((blocks, block_size, total_size))
187
188 with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
189 file_location, fileheaders):
190 expected_size = int(fileheaders['Content-Length'])
191
192 records_repr = repr(records) # For use in error messages.
193 self.assertGreater(len(records), 1, msg="There should always be two "
194 "calls; the first one before the transfer starts.")
195 self.assertEqual(records[0][0], 0)
196 self.assertGreater(records[0][1], 0,
197 msg="block size can't be 0 in %s" % records_repr)
198 self.assertEqual(records[0][2], expected_size)
199 self.assertEqual(records[-1][2], expected_size)
200
201 block_sizes = {block_size for _, block_size, _ in records}
202 self.assertEqual({records[0][1]}, block_sizes,
203 msg="block sizes in %s must be equal" % records_repr)
204 self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
205 msg="number of blocks * block size must be"
206 " >= total size in %s" % records_repr)
207
Brett Cannona71319e2003-05-14 02:18:31 +0000208
Skip Montanaro89feabc2003-03-30 04:54:24 +0000209def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000210 support.requires('network')
211 support.run_unittest(URLTimeoutTest,
Antoine Pitroua98d26a2011-05-22 17:35:17 +0200212 urlopenNetworkTests,
213 urlretrieveNetworkTests)
Skip Montanaro89feabc2003-03-30 04:54:24 +0000214
215if __name__ == "__main__":
216 test_main()