Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 1 | import unittest |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 2 | from test import support |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 3 | |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 4 | import contextlib |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 5 | import socket |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 6 | import urllib.request |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 7 | import sys |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 8 | import os |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 9 | import email.message |
Senthil Kumaran | f6c456d | 2010-05-01 08:29:18 +0000 | [diff] [blame] | 10 | import time |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 11 | |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 12 | |
Senthil Kumaran | cfdd016 | 2014-04-14 21:31:41 -0400 | [diff] [blame] | 13 | support.requires('network') |
| 14 | |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 15 | class URLTimeoutTest(unittest.TestCase): |
Antoine Pitrou | d9faa20 | 2011-03-26 18:38:06 +0100 | [diff] [blame] | 16 | # XXX this test doesn't seem to test anything useful. |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 17 | |
Senthil Kumaran | bd8f145 | 2010-12-15 04:02:45 +0000 | [diff] [blame] | 18 | TIMEOUT = 30.0 |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 19 | |
| 20 | def setUp(self): |
| 21 | socket.setdefaulttimeout(self.TIMEOUT) |
| 22 | |
| 23 | def tearDown(self): |
| 24 | socket.setdefaulttimeout(None) |
| 25 | |
| 26 | def testURLread(self): |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 27 | with support.transient_internet("www.example.com"): |
| 28 | f = urllib.request.urlopen("http://www.example.com/") |
Antoine Pitrou | d9faa20 | 2011-03-26 18:38:06 +0100 | [diff] [blame] | 29 | x = f.read() |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 30 | |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 31 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 32 | class urlopenNetworkTests(unittest.TestCase): |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 33 | """Tests urllib.reqest.urlopen using the network. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 34 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 35 | These tests are not exhaustive. Assuming that testing using files does a |
| 36 | good job overall of some of the basic interface features. There are no |
| 37 | tests exercising the optional 'data' and 'proxies' arguments. No tests |
| 38 | for transparent redirection have been written. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 39 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 40 | setUp is not used for always constructing a connection to |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 41 | http://www.example.com/ since there a few tests that don't use that address |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 42 | and making a connection is expensive enough to warrant minimizing unneeded |
| 43 | connections. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 44 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 45 | """ |
| 46 | |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 47 | @contextlib.contextmanager |
Senthil Kumaran | ee2538b | 2010-10-17 10:52:12 +0000 | [diff] [blame] | 48 | def urlopen(self, *args, **kwargs): |
| 49 | resource = args[0] |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 50 | with support.transient_internet(resource): |
| 51 | r = urllib.request.urlopen(*args, **kwargs) |
| 52 | try: |
| 53 | yield r |
| 54 | finally: |
| 55 | r.close() |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 56 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 57 | def test_basic(self): |
| 58 | # Simple test expected to pass. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 59 | with self.urlopen("http://www.example.com/") as open_url: |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 60 | for attr in ("read", "readline", "readlines", "fileno", "close", |
| 61 | "info", "geturl"): |
| 62 | self.assertTrue(hasattr(open_url, attr), "object returned from " |
| 63 | "urlopen lacks the %s attribute" % attr) |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 64 | self.assertTrue(open_url.read(), "calling 'read' failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 65 | |
| 66 | def test_readlines(self): |
| 67 | # Test both readline and readlines. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 68 | with self.urlopen("http://www.example.com/") as open_url: |
Ezio Melotti | e961593 | 2010-01-24 19:26:24 +0000 | [diff] [blame] | 69 | self.assertIsInstance(open_url.readline(), bytes, |
| 70 | "readline did not return a string") |
| 71 | self.assertIsInstance(open_url.readlines(), list, |
| 72 | "readlines did not return a list") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 73 | |
| 74 | def test_info(self): |
| 75 | # Test 'info'. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 76 | with self.urlopen("http://www.example.com/") as open_url: |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 77 | info_obj = open_url.info() |
Ezio Melotti | e961593 | 2010-01-24 19:26:24 +0000 | [diff] [blame] | 78 | self.assertIsInstance(info_obj, email.message.Message, |
| 79 | "object returned by 'info' is not an " |
| 80 | "instance of email.message.Message") |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 81 | self.assertEqual(info_obj.get_content_subtype(), "html") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 82 | |
| 83 | def test_geturl(self): |
| 84 | # Make sure same URL as opened is returned by geturl. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 85 | URL = "http://www.example.com/" |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 86 | with self.urlopen(URL) as open_url: |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 87 | gotten_url = open_url.geturl() |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 88 | self.assertEqual(gotten_url, URL) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 89 | |
Christian Heimes | 9bd667a | 2008-01-20 15:14:11 +0000 | [diff] [blame] | 90 | def test_getcode(self): |
| 91 | # test getcode() with the fancy opener to get 404 error codes |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 92 | URL = "http://www.example.com/XXXinvalidXXX" |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 93 | with support.transient_internet(URL): |
R David Murray | 130a566 | 2014-06-11 17:09:43 -0400 | [diff] [blame] | 94 | with self.assertWarns(DeprecationWarning): |
| 95 | open_url = urllib.request.FancyURLopener().open(URL) |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 96 | try: |
| 97 | code = open_url.getcode() |
| 98 | finally: |
| 99 | open_url.close() |
| 100 | self.assertEqual(code, 404) |
Christian Heimes | 9bd667a | 2008-01-20 15:14:11 +0000 | [diff] [blame] | 101 | |
Zachary Ware | 9fe6d86 | 2013-12-08 00:20:35 -0600 | [diff] [blame] | 102 | # On Windows, socket handles are not file descriptors; this |
| 103 | # test can't pass on Windows. |
| 104 | @unittest.skipIf(sys.platform in ('win32',), 'not appropriate for Windows') |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 105 | def test_fileno(self): |
| 106 | # Make sure fd returned by fileno is valid. |
Ned Deily | 223082f | 2014-03-27 01:39:28 -0700 | [diff] [blame] | 107 | with self.urlopen("http://www.google.com/", timeout=None) as open_url: |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 108 | fd = open_url.fileno() |
Benjamin Peterson | a96ed63 | 2014-02-19 23:06:41 -0500 | [diff] [blame] | 109 | with os.fdopen(fd, 'rb') as f: |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 110 | self.assertTrue(f.read(), "reading from file created using fd " |
| 111 | "returned by fileno failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 112 | |
| 113 | def test_bad_address(self): |
| 114 | # Make sure proper exception is raised when connecting to a bogus |
| 115 | # address. |
Antoine Pitrou | 72fff04 | 2011-07-08 19:19:57 +0200 | [diff] [blame] | 116 | bogus_domain = "sadflkjsasf.i.nvali.d" |
| 117 | try: |
| 118 | socket.gethostbyname(bogus_domain) |
Antoine Pitrou | 6b5a38c | 2013-05-25 13:08:13 +0200 | [diff] [blame] | 119 | except OSError: |
| 120 | # socket.gaierror is too narrow, since getaddrinfo() may also |
| 121 | # fail with EAI_SYSTEM and ETIMEDOUT (seen on Ubuntu 13.04), |
| 122 | # i.e. Python's TimeoutError. |
Antoine Pitrou | 72fff04 | 2011-07-08 19:19:57 +0200 | [diff] [blame] | 123 | pass |
| 124 | else: |
| 125 | # This happens with some overzealous DNS providers such as OpenDNS |
| 126 | self.skipTest("%r should not resolve for test to work" % bogus_domain) |
Brett Cannon | b463c48 | 2013-01-11 11:17:53 -0500 | [diff] [blame] | 127 | failure_explanation = ('opening an invalid URL did not raise OSError; ' |
| 128 | 'can be caused by a broken DNS server ' |
| 129 | '(e.g. returns 404 or hijacks page)') |
| 130 | with self.assertRaises(OSError, msg=failure_explanation): |
| 131 | # SF patch 809915: In Sep 2003, VeriSign started highjacking |
| 132 | # invalid .com and .net addresses to boost traffic to their own |
| 133 | # site. This test started failing then. One hopes the .invalid |
| 134 | # domain will be spared to serve its defined purpose. |
| 135 | urllib.request.urlopen("http://sadflkjsasf.i.nvali.d/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 136 | |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 137 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 138 | class urlretrieveNetworkTests(unittest.TestCase): |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 139 | """Tests urllib.request.urlretrieve using the network.""" |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 140 | |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 141 | @contextlib.contextmanager |
Gregory P. Smith | 6b0bdab | 2012-11-10 13:43:44 -0800 | [diff] [blame] | 142 | def urlretrieve(self, *args, **kwargs): |
Senthil Kumaran | ee2538b | 2010-10-17 10:52:12 +0000 | [diff] [blame] | 143 | resource = args[0] |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 144 | with support.transient_internet(resource): |
Gregory P. Smith | 6b0bdab | 2012-11-10 13:43:44 -0800 | [diff] [blame] | 145 | file_location, info = urllib.request.urlretrieve(*args, **kwargs) |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 146 | try: |
| 147 | yield file_location, info |
| 148 | finally: |
| 149 | support.unlink(file_location) |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 150 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 151 | def test_basic(self): |
| 152 | # Test basic functionality. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 153 | with self.urlretrieve("http://www.example.com/") as (file_location, info): |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 154 | self.assertTrue(os.path.exists(file_location), "file location returned by" |
| 155 | " urlretrieve is not a valid path") |
Benjamin Peterson | a96ed63 | 2014-02-19 23:06:41 -0500 | [diff] [blame] | 156 | with open(file_location, 'rb') as f: |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 157 | self.assertTrue(f.read(), "reading from the file location returned" |
| 158 | " by urlretrieve failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 159 | |
| 160 | def test_specified_path(self): |
| 161 | # Make sure that specifying the location of the file to write to works. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 162 | with self.urlretrieve("http://www.example.com/", |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 163 | support.TESTFN) as (file_location, info): |
| 164 | self.assertEqual(file_location, support.TESTFN) |
| 165 | self.assertTrue(os.path.exists(file_location)) |
Benjamin Peterson | a96ed63 | 2014-02-19 23:06:41 -0500 | [diff] [blame] | 166 | with open(file_location, 'rb') as f: |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 167 | self.assertTrue(f.read(), "reading from temporary file failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 168 | |
| 169 | def test_header(self): |
| 170 | # Make sure header returned as 2nd value from urlretrieve is good. |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 171 | with self.urlretrieve("http://www.example.com/") as (file_location, info): |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 172 | self.assertIsInstance(info, email.message.Message, |
| 173 | "info is not an instance of email.message.Message") |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 174 | |
Ned Deily | 5a507f0 | 2014-03-26 23:31:39 -0700 | [diff] [blame] | 175 | logo = "http://www.example.com/" |
Gregory P. Smith | 6b0bdab | 2012-11-10 13:43:44 -0800 | [diff] [blame] | 176 | |
Senthil Kumaran | f6c456d | 2010-05-01 08:29:18 +0000 | [diff] [blame] | 177 | def test_data_header(self): |
Gregory P. Smith | 6b0bdab | 2012-11-10 13:43:44 -0800 | [diff] [blame] | 178 | with self.urlretrieve(self.logo) as (file_location, fileheaders): |
Antoine Pitrou | a98d26a | 2011-05-22 17:35:17 +0200 | [diff] [blame] | 179 | datevalue = fileheaders.get('Date') |
| 180 | dateformat = '%a, %d %b %Y %H:%M:%S GMT' |
| 181 | try: |
| 182 | time.strptime(datevalue, dateformat) |
| 183 | except ValueError: |
| 184 | self.fail('Date value not in %r format', dateformat) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 185 | |
Gregory P. Smith | 6b0bdab | 2012-11-10 13:43:44 -0800 | [diff] [blame] | 186 | def test_reporthook(self): |
| 187 | records = [] |
| 188 | def recording_reporthook(blocks, block_size, total_size): |
| 189 | records.append((blocks, block_size, total_size)) |
| 190 | |
| 191 | with self.urlretrieve(self.logo, reporthook=recording_reporthook) as ( |
| 192 | file_location, fileheaders): |
| 193 | expected_size = int(fileheaders['Content-Length']) |
| 194 | |
| 195 | records_repr = repr(records) # For use in error messages. |
| 196 | self.assertGreater(len(records), 1, msg="There should always be two " |
| 197 | "calls; the first one before the transfer starts.") |
| 198 | self.assertEqual(records[0][0], 0) |
| 199 | self.assertGreater(records[0][1], 0, |
| 200 | msg="block size can't be 0 in %s" % records_repr) |
| 201 | self.assertEqual(records[0][2], expected_size) |
| 202 | self.assertEqual(records[-1][2], expected_size) |
| 203 | |
| 204 | block_sizes = {block_size for _, block_size, _ in records} |
| 205 | self.assertEqual({records[0][1]}, block_sizes, |
| 206 | msg="block sizes in %s must be equal" % records_repr) |
| 207 | self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size, |
| 208 | msg="number of blocks * block size must be" |
| 209 | " >= total size in %s" % records_repr) |
| 210 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 211 | |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 212 | if __name__ == "__main__": |
Senthil Kumaran | cfdd016 | 2014-04-14 21:31:41 -0400 | [diff] [blame] | 213 | unittest.main() |