Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 2 | |
| 3 | import unittest |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 4 | from test import support |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 5 | |
| 6 | import socket |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 7 | import urllib.request |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 8 | import sys |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 9 | import os |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 10 | import email.message |
Senthil Kumaran | f6c456d | 2010-05-01 08:29:18 +0000 | [diff] [blame] | 11 | import time |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 12 | |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 13 | |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 14 | class URLTimeoutTest(unittest.TestCase): |
Antoine Pitrou | d9faa20 | 2011-03-26 18:38:06 +0100 | [diff] [blame^] | 15 | # XXX this test doesn't seem to test anything useful. |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 16 | |
Senthil Kumaran | bd8f145 | 2010-12-15 04:02:45 +0000 | [diff] [blame] | 17 | TIMEOUT = 30.0 |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 18 | |
| 19 | def setUp(self): |
| 20 | socket.setdefaulttimeout(self.TIMEOUT) |
| 21 | |
| 22 | def tearDown(self): |
| 23 | socket.setdefaulttimeout(None) |
| 24 | |
| 25 | def testURLread(self): |
Senthil Kumaran | ee2538b | 2010-10-17 10:52:12 +0000 | [diff] [blame] | 26 | with support.transient_internet("www.python.org"): |
| 27 | f = urllib.request.urlopen("http://www.python.org/") |
Antoine Pitrou | d9faa20 | 2011-03-26 18:38:06 +0100 | [diff] [blame^] | 28 | x = f.read() |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 29 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 30 | class urlopenNetworkTests(unittest.TestCase): |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 31 | """Tests urllib.reqest.urlopen using the network. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 32 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 33 | These tests are not exhaustive. Assuming that testing using files does a |
| 34 | good job overall of some of the basic interface features. There are no |
| 35 | tests exercising the optional 'data' and 'proxies' arguments. No tests |
| 36 | for transparent redirection have been written. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 37 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 38 | setUp is not used for always constructing a connection to |
| 39 | http://www.python.org/ since there a few tests that don't use that address |
| 40 | and making a connection is expensive enough to warrant minimizing unneeded |
| 41 | connections. |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 42 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 43 | """ |
| 44 | |
Senthil Kumaran | ee2538b | 2010-10-17 10:52:12 +0000 | [diff] [blame] | 45 | def urlopen(self, *args, **kwargs): |
| 46 | resource = args[0] |
Antoine Pitrou | b651949 | 2011-03-26 18:36:42 +0100 | [diff] [blame] | 47 | cm = support.transient_internet(resource) |
| 48 | cm.__enter__() |
| 49 | self.addCleanup(cm.__exit__, None, None, None) |
| 50 | return urllib.request.urlopen(*args, **kwargs) |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 51 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 52 | def test_basic(self): |
| 53 | # Simple test expected to pass. |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 54 | open_url = self.urlopen("http://www.python.org/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 55 | for attr in ("read", "readline", "readlines", "fileno", "close", |
| 56 | "info", "geturl"): |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 57 | self.assertTrue(hasattr(open_url, attr), "object returned from " |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 58 | "urlopen lacks the %s attribute" % attr) |
| 59 | try: |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 60 | self.assertTrue(open_url.read(), "calling 'read' failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 61 | finally: |
| 62 | open_url.close() |
| 63 | |
| 64 | def test_readlines(self): |
| 65 | # Test both readline and readlines. |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 66 | open_url = self.urlopen("http://www.python.org/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 67 | try: |
Ezio Melotti | e961593 | 2010-01-24 19:26:24 +0000 | [diff] [blame] | 68 | self.assertIsInstance(open_url.readline(), bytes, |
| 69 | "readline did not return a string") |
| 70 | self.assertIsInstance(open_url.readlines(), list, |
| 71 | "readlines did not return a list") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 72 | finally: |
| 73 | open_url.close() |
| 74 | |
| 75 | def test_info(self): |
| 76 | # Test 'info'. |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 77 | open_url = self.urlopen("http://www.python.org/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 78 | try: |
| 79 | info_obj = open_url.info() |
| 80 | finally: |
| 81 | open_url.close() |
Ezio Melotti | e961593 | 2010-01-24 19:26:24 +0000 | [diff] [blame] | 82 | self.assertIsInstance(info_obj, email.message.Message, |
| 83 | "object returned by 'info' is not an " |
| 84 | "instance of email.message.Message") |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 85 | self.assertEqual(info_obj.get_content_subtype(), "html") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 86 | |
| 87 | def test_geturl(self): |
| 88 | # Make sure same URL as opened is returned by geturl. |
| 89 | URL = "http://www.python.org/" |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 90 | open_url = self.urlopen(URL) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 91 | try: |
| 92 | gotten_url = open_url.geturl() |
| 93 | finally: |
| 94 | open_url.close() |
| 95 | self.assertEqual(gotten_url, URL) |
| 96 | |
Christian Heimes | 9bd667a | 2008-01-20 15:14:11 +0000 | [diff] [blame] | 97 | def test_getcode(self): |
| 98 | # test getcode() with the fancy opener to get 404 error codes |
| 99 | URL = "http://www.python.org/XXXinvalidXXX" |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 100 | open_url = urllib.request.FancyURLopener().open(URL) |
Christian Heimes | 9bd667a | 2008-01-20 15:14:11 +0000 | [diff] [blame] | 101 | try: |
| 102 | code = open_url.getcode() |
| 103 | finally: |
| 104 | open_url.close() |
| 105 | self.assertEqual(code, 404) |
| 106 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 107 | def test_fileno(self): |
Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 108 | if sys.platform in ('win32',): |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 109 | # On Windows, socket handles are not file descriptors; this |
| 110 | # test can't pass on Windows. |
| 111 | return |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 112 | # Make sure fd returned by fileno is valid. |
Senthil Kumaran | ee2538b | 2010-10-17 10:52:12 +0000 | [diff] [blame] | 113 | open_url = self.urlopen("http://www.python.org/", timeout=None) |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 114 | fd = open_url.fileno() |
Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 115 | FILE = os.fdopen(fd, encoding='utf-8') |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 116 | try: |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 117 | self.assertTrue(FILE.read(), "reading from file created using fd " |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 118 | "returned by fileno failed") |
| 119 | finally: |
| 120 | FILE.close() |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 121 | |
| 122 | def test_bad_address(self): |
| 123 | # Make sure proper exception is raised when connecting to a bogus |
| 124 | # address. |
| 125 | self.assertRaises(IOError, |
Tim Peters | 0aab002 | 2003-09-20 22:16:26 +0000 | [diff] [blame] | 126 | # SF patch 809915: In Sep 2003, VeriSign started |
| 127 | # highjacking invalid .com and .net addresses to |
| 128 | # boost traffic to their own site. This test |
| 129 | # started failing then. One hopes the .invalid |
| 130 | # domain will be spared to serve its defined |
| 131 | # purpose. |
| 132 | # urllib.urlopen, "http://www.sadflkjsasadf.com/") |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 133 | urllib.request.urlopen, |
Antoine Pitrou | 8fd33d3 | 2008-12-15 13:08:55 +0000 | [diff] [blame] | 134 | "http://sadflkjsasf.i.nvali.d/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 135 | |
| 136 | class urlretrieveNetworkTests(unittest.TestCase): |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 137 | """Tests urllib.request.urlretrieve using the network.""" |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 138 | |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 139 | def urlretrieve(self, *args): |
Senthil Kumaran | ee2538b | 2010-10-17 10:52:12 +0000 | [diff] [blame] | 140 | resource = args[0] |
Antoine Pitrou | b651949 | 2011-03-26 18:36:42 +0100 | [diff] [blame] | 141 | cm = support.transient_internet(resource) |
| 142 | cm.__enter__() |
| 143 | self.addCleanup(cm.__exit__, None, None, None) |
| 144 | return urllib.request.urlretrieve(*args) |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 145 | |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 146 | def test_basic(self): |
| 147 | # Test basic functionality. |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 148 | file_location,info = self.urlretrieve("http://www.python.org/") |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 149 | self.assertTrue(os.path.exists(file_location), "file location returned by" |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 150 | " urlretrieve is not a valid path") |
Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 151 | FILE = open(file_location, encoding='utf-8') |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 152 | try: |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 153 | self.assertTrue(FILE.read(), "reading from the file location returned" |
Jeremy Hylton | bd9f520 | 2003-07-17 16:31:00 +0000 | [diff] [blame] | 154 | " by urlretrieve failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 155 | finally: |
| 156 | FILE.close() |
| 157 | os.unlink(file_location) |
| 158 | |
| 159 | def test_specified_path(self): |
| 160 | # Make sure that specifying the location of the file to write to works. |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 161 | file_location,info = self.urlretrieve("http://www.python.org/", |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 162 | support.TESTFN) |
| 163 | self.assertEqual(file_location, support.TESTFN) |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 164 | self.assertTrue(os.path.exists(file_location)) |
Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 165 | FILE = open(file_location, encoding='utf-8') |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 166 | try: |
Benjamin Peterson | c9c0f20 | 2009-06-30 23:06:06 +0000 | [diff] [blame] | 167 | self.assertTrue(FILE.read(), "reading from temporary file failed") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 168 | finally: |
| 169 | FILE.close() |
| 170 | os.unlink(file_location) |
| 171 | |
| 172 | def test_header(self): |
| 173 | # Make sure header returned as 2nd value from urlretrieve is good. |
Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 174 | file_location, header = self.urlretrieve("http://www.python.org/") |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 175 | os.unlink(file_location) |
Ezio Melotti | e961593 | 2010-01-24 19:26:24 +0000 | [diff] [blame] | 176 | self.assertIsInstance(header, email.message.Message, |
| 177 | "header is not an instance of email.message.Message") |
Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 178 | |
Senthil Kumaran | f6c456d | 2010-05-01 08:29:18 +0000 | [diff] [blame] | 179 | def test_data_header(self): |
| 180 | logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png" |
| 181 | file_location, fileheaders = self.urlretrieve(logo) |
| 182 | os.unlink(file_location) |
| 183 | datevalue = fileheaders.get('Date') |
| 184 | dateformat = '%a, %d %b %Y %H:%M:%S GMT' |
| 185 | try: |
| 186 | time.strptime(datevalue, dateformat) |
| 187 | except ValueError: |
| 188 | self.fail('Date value not in %r format', dateformat) |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 189 | |
| 190 | |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 191 | def test_main(): |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 192 | support.requires('network') |
| 193 | support.run_unittest(URLTimeoutTest, |
Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 194 | urlopenNetworkTests, |
| 195 | urlretrieveNetworkTests) |
Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 196 | |
| 197 | if __name__ == "__main__": |
| 198 | test_main() |