Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 1 | import unittest |
| 2 | from test import test_support |
| 3 | |
| 4 | import socket |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 5 | import urllib |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 6 | import sys |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 7 | import os |
Senthil Kumaran | 1b7f9e5 | 2010-05-01 08:01:56 +0000 | [diff] [blame] | 8 | import time |
| 9 | |
Benjamin Peterson | b206473 | 2014-11-23 20:55:24 -0600 | [diff] [blame] | 10 | try: |
| 11 | import ssl |
| 12 | except ImportError: |
| 13 | ssl = None |
| 14 | |
| 15 | here = os.path.dirname(__file__) |
| 16 | # Self-signed cert file for self-signed.pythontest.net |
| 17 | CERT_selfsigned_pythontestdotnet = os.path.join(here, 'selfsigned_pythontestdotnet.pem') |
| 18 | |
Ezio Melotti | a2d4653 | 2010-01-30 07:22:54 +0000 | [diff] [blame] | 19 | mimetools = test_support.import_module("mimetools", deprecated=True) |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 20 | |
Neal Norwitz | 5be3067 | 2008-01-26 05:54:48 +0000 | [diff] [blame] | 21 | |
| 22 | def _open_with_retry(func, host, *args, **kwargs): |
| 23 | # Connecting to remote hosts is flaky. Make it more robust |
| 24 | # by retrying the connection several times. |
| 25 | for i in range(3): |
| 26 | try: |
| 27 | return func(host, *args, **kwargs) |
| 28 | except IOError, last_exc: |
| 29 | continue |
| 30 | except: |
| 31 | raise |
| 32 | raise last_exc |
| 33 | |
| 34 | |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 35 | class URLTimeoutTest(unittest.TestCase): |
| 36 | |
| 37 | TIMEOUT = 10.0 |
| 38 | |
| 39 | def setUp(self): |
| 40 | socket.setdefaulttimeout(self.TIMEOUT) |
| 41 | |
| 42 | def tearDown(self): |
| 43 | socket.setdefaulttimeout(None) |
| 44 | |
| 45 | def testURLread(self): |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 46 | f = _open_with_retry(urllib.urlopen, "http://www.example.com/") |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 47 | x = f.read() |
| 48 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 49 | class urlopenNetworkTests(unittest.TestCase): |
| 50 | """Tests urllib.urlopen using the network. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 51 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 52 | These tests are not exhaustive. Assuming that testing using files does a |
| 53 | good job overall of some of the basic interface features. There are no |
| 54 | tests exercising the optional 'data' and 'proxies' arguments. No tests |
| 55 | for transparent redirection have been written. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 56 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 57 | setUp is not used for always constructing a connection to |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 58 | http://www.example.com/ since there a few tests that don't use that address |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 59 | and making a connection is expensive enough to warrant minimizing unneeded |
| 60 | connections. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 61 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 62 | """ |
| 63 | |
Neal Norwitz | 5be3067 | 2008-01-26 05:54:48 +0000 | [diff] [blame] | 64 | def urlopen(self, *args): |
| 65 | return _open_with_retry(urllib.urlopen, *args) |
| 66 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 67 | def test_basic(self): |
| 68 | # Simple test expected to pass. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 69 | open_url = self.urlopen("http://www.example.com/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 70 | for attr in ("read", "readline", "readlines", "fileno", "close", |
| 71 | "info", "geturl"): |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 72 | self.assertTrue(hasattr(open_url, attr), "object returned from " |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 73 | "urlopen lacks the %s attribute" % attr) |
| 74 | try: |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 75 | self.assertTrue(open_url.read(), "calling 'read' failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 76 | finally: |
| 77 | open_url.close() |
| 78 | |
| 79 | def test_readlines(self): |
| 80 | # Test both readline and readlines. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 81 | open_url = self.urlopen("http://www.example.com/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 82 | try: |
Ezio Melotti | b0f5adc | 2010-01-24 16:58:36 +0000 | [diff] [blame] | 83 | self.assertIsInstance(open_url.readline(), basestring, |
| 84 | "readline did not return a string") |
| 85 | self.assertIsInstance(open_url.readlines(), list, |
| 86 | "readlines did not return a list") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 87 | finally: |
| 88 | open_url.close() |
| 89 | |
| 90 | def test_info(self): |
| 91 | # Test 'info'. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 92 | open_url = self.urlopen("http://www.example.com/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 93 | try: |
| 94 | info_obj = open_url.info() |
| 95 | finally: |
| 96 | open_url.close() |
Ezio Melotti | b0f5adc | 2010-01-24 16:58:36 +0000 | [diff] [blame] | 97 | self.assertIsInstance(info_obj, mimetools.Message, |
| 98 | "object returned by 'info' is not an " |
| 99 | "instance of mimetools.Message") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 100 | self.assertEqual(info_obj.getsubtype(), "html") |
| 101 | |
| 102 | def test_geturl(self): |
| 103 | # Make sure same URL as opened is returned by geturl. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 104 | URL = "http://www.example.com/" |
Neal Norwitz | 5be3067 | 2008-01-26 05:54:48 +0000 | [diff] [blame] | 105 | open_url = self.urlopen(URL) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 106 | try: |
| 107 | gotten_url = open_url.geturl() |
| 108 | finally: |
| 109 | open_url.close() |
| 110 | self.assertEqual(gotten_url, URL) |
| 111 | |
Georg Brandl | 9b0d46d | 2008-01-20 11:43:03 +0000 | [diff] [blame] | 112 | def test_getcode(self): |
| 113 | # test getcode() with the fancy opener to get 404 error codes |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 114 | URL = "http://www.example.com/XXXinvalidXXX" |
Georg Brandl | 9b0d46d | 2008-01-20 11:43:03 +0000 | [diff] [blame] | 115 | open_url = urllib.FancyURLopener().open(URL) |
| 116 | try: |
| 117 | code = open_url.getcode() |
| 118 | finally: |
| 119 | open_url.close() |
| 120 | self.assertEqual(code, 404) |
| 121 | |
Zachary Ware | 1f70221 | 2013-12-10 14:09:20 -0600 | [diff] [blame] | 122 | @unittest.skipIf(sys.platform in ('win32',), 'not appropriate for Windows') |
| 123 | @unittest.skipUnless(hasattr(os, 'fdopen'), 'os.fdopen not available') |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 124 | def test_fileno(self): |
| 125 | # Make sure fd returned by fileno is valid. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 126 | open_url = self.urlopen("http://www.example.com/") |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 127 | fd = open_url.fileno() |
| 128 | FILE = os.fdopen(fd) |
| 129 | try: |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 130 | self.assertTrue(FILE.read(), "reading from file created using fd " |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 131 | "returned by fileno failed") |
| 132 | finally: |
| 133 | FILE.close() |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 134 | |
| 135 | def test_bad_address(self): |
| 136 | # Make sure proper exception is raised when connecting to a bogus |
| 137 | # address. |
Antoine Pitrou | a4d58d2 | 2011-07-08 19:14:19 +0200 | [diff] [blame] | 138 | bogus_domain = "sadflkjsasf.i.nvali.d" |
| 139 | try: |
| 140 | socket.gethostbyname(bogus_domain) |
| 141 | except socket.gaierror: |
| 142 | pass |
| 143 | else: |
| 144 | # This happens with some overzealous DNS providers such as OpenDNS |
| 145 | self.skipTest("%r should not resolve for test to work" % bogus_domain) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 146 | self.assertRaises(IOError, |
Tim Peters | 0aab002 | 2003-09-20 22:16:26 +0000 | [diff] [blame] | 147 | # SF patch 809915: In Sep 2003, VeriSign started |
| 148 | # highjacking invalid .com and .net addresses to |
| 149 | # boost traffic to their own site. This test |
| 150 | # started failing then. One hopes the .invalid |
| 151 | # domain will be spared to serve its defined |
| 152 | # purpose. |
| 153 | # urllib.urlopen, "http://www.sadflkjsasadf.com/") |
Antoine Pitrou | cc5b64a | 2008-12-15 00:39:51 +0000 | [diff] [blame] | 154 | urllib.urlopen, "http://sadflkjsasf.i.nvali.d/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 155 | |
| 156 | class urlretrieveNetworkTests(unittest.TestCase): |
| 157 | """Tests urllib.urlretrieve using the network.""" |
| 158 | |
Neal Norwitz | 5be3067 | 2008-01-26 05:54:48 +0000 | [diff] [blame] | 159 | def urlretrieve(self, *args): |
| 160 | return _open_with_retry(urllib.urlretrieve, *args) |
| 161 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 162 | def test_basic(self): |
| 163 | # Test basic functionality. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 164 | file_location,info = self.urlretrieve("http://www.example.com/") |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 165 | self.assertTrue(os.path.exists(file_location), "file location returned by" |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 166 | " urlretrieve is not a valid path") |
| 167 | FILE = file(file_location) |
| 168 | try: |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 169 | self.assertTrue(FILE.read(), "reading from the file location returned" |
Jeremy Hylton | bd9f520 | 2003-07-17 16:31:00 +0000 | [diff] [blame] | 170 | " by urlretrieve failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 171 | finally: |
| 172 | FILE.close() |
| 173 | os.unlink(file_location) |
| 174 | |
| 175 | def test_specified_path(self): |
| 176 | # Make sure that specifying the location of the file to write to works. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 177 | file_location,info = self.urlretrieve("http://www.example.com/", |
Neal Norwitz | 5be3067 | 2008-01-26 05:54:48 +0000 | [diff] [blame] | 178 | test_support.TESTFN) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 179 | self.assertEqual(file_location, test_support.TESTFN) |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 180 | self.assertTrue(os.path.exists(file_location)) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 181 | FILE = file(file_location) |
| 182 | try: |
Benjamin Peterson | 5c8da86 | 2009-06-30 22:57:08 +0000 | [diff] [blame] | 183 | self.assertTrue(FILE.read(), "reading from temporary file failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 184 | finally: |
| 185 | FILE.close() |
| 186 | os.unlink(file_location) |
| 187 | |
| 188 | def test_header(self): |
| 189 | # Make sure header returned as 2nd value from urlretrieve is good. |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 190 | file_location, header = self.urlretrieve("http://www.example.com/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 191 | os.unlink(file_location) |
Ezio Melotti | b0f5adc | 2010-01-24 16:58:36 +0000 | [diff] [blame] | 192 | self.assertIsInstance(header, mimetools.Message, |
| 193 | "header is not an instance of mimetools.Message") |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 194 | |
Senthil Kumaran | 1b7f9e5 | 2010-05-01 08:01:56 +0000 | [diff] [blame] | 195 | def test_data_header(self): |
Ned Deily | c727533 | 2014-03-26 23:25:02 -0700 | [diff] [blame] | 196 | logo = "http://www.example.com/" |
Senthil Kumaran | 1b7f9e5 | 2010-05-01 08:01:56 +0000 | [diff] [blame] | 197 | file_location, fileheaders = self.urlretrieve(logo) |
| 198 | os.unlink(file_location) |
| 199 | datevalue = fileheaders.getheader('Date') |
| 200 | dateformat = '%a, %d %b %Y %H:%M:%S GMT' |
| 201 | try: |
| 202 | time.strptime(datevalue, dateformat) |
| 203 | except ValueError: |
| 204 | self.fail('Date value not in %r format', dateformat) |
| 205 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 206 | |
Benjamin Peterson | b206473 | 2014-11-23 20:55:24 -0600 | [diff] [blame] | 207 | @unittest.skipIf(ssl is None, "requires ssl") |
| 208 | class urlopen_HttpsTests(unittest.TestCase): |
| 209 | |
| 210 | def test_context_argument(self): |
| 211 | context = ssl.create_default_context(cafile=CERT_selfsigned_pythontestdotnet) |
| 212 | response = urllib.urlopen("https://self-signed.pythontest.net", context=context) |
| 213 | self.assertIn("Python", response.read()) |
| 214 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 215 | |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 216 | def test_main(): |
| 217 | test_support.requires('network') |
Florent Xicluna | 6257a7b | 2010-03-31 22:01:03 +0000 | [diff] [blame] | 218 | with test_support.check_py3k_warnings( |
| 219 | ("urllib.urlopen.. has been removed", DeprecationWarning)): |
Brett Cannon | 8bb8fa5 | 2008-07-02 01:57:08 +0000 | [diff] [blame] | 220 | test_support.run_unittest(URLTimeoutTest, |
| 221 | urlopenNetworkTests, |
Benjamin Peterson | b206473 | 2014-11-23 20:55:24 -0600 | [diff] [blame] | 222 | urlretrieveNetworkTests, |
| 223 | urlopen_HttpsTests) |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 224 | |
| 225 | if __name__ == "__main__": |
| 226 | test_main() |