| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python | 
 | 2 |  | 
 | 3 | import unittest | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 4 | from test import support | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 5 |  | 
 | 6 | import socket | 
| Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 7 | import urllib.request | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 8 | import sys | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 9 | import os | 
| Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 10 | import email.message | 
| Senthil Kumaran | 5a3bc65 | 2010-05-01 08:32:23 +0000 | [diff] [blame] | 11 | import time | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 12 |  | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 13 |  | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 14 | class URLTimeoutTest(unittest.TestCase): | 
 | 15 |  | 
 | 16 |     TIMEOUT = 10.0 | 
 | 17 |  | 
 | 18 |     def setUp(self): | 
 | 19 |         socket.setdefaulttimeout(self.TIMEOUT) | 
 | 20 |  | 
 | 21 |     def tearDown(self): | 
 | 22 |         socket.setdefaulttimeout(None) | 
 | 23 |  | 
 | 24 |     def testURLread(self): | 
| Senthil Kumaran | 378e6db | 2010-10-17 11:01:46 +0000 | [diff] [blame] | 25 |         with support.transient_internet("www.python.org"): | 
 | 26 |             f = urllib.request.urlopen("http://www.python.org/") | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 27 |         x = f.read() | 
 | 28 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 29 | class urlopenNetworkTests(unittest.TestCase): | 
| Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 30 |     """Tests urllib.reqest.urlopen using the network. | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 31 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 32 |     These tests are not exhaustive.  Assuming that testing using files does a | 
 | 33 |     good job overall of some of the basic interface features.  There are no | 
 | 34 |     tests exercising the optional 'data' and 'proxies' arguments.  No tests | 
 | 35 |     for transparent redirection have been written. | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 36 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 37 |     setUp is not used for always constructing a connection to | 
 | 38 |     http://www.python.org/ since there a few tests that don't use that address | 
 | 39 |     and making a connection is expensive enough to warrant minimizing unneeded | 
 | 40 |     connections. | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 41 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 42 |     """ | 
 | 43 |  | 
| Senthil Kumaran | 378e6db | 2010-10-17 11:01:46 +0000 | [diff] [blame] | 44 |     def urlopen(self, *args, **kwargs): | 
 | 45 |         resource = args[0] | 
 | 46 |         with support.transient_internet(resource): | 
 | 47 |             return urllib.request.urlopen(*args, **kwargs) | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 48 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 49 |     def test_basic(self): | 
 | 50 |         # Simple test expected to pass. | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 51 |         open_url = self.urlopen("http://www.python.org/") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 52 |         for attr in ("read", "readline", "readlines", "fileno", "close", | 
 | 53 |                      "info", "geturl"): | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 54 |             self.assertTrue(hasattr(open_url, attr), "object returned from " | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 55 |                             "urlopen lacks the %s attribute" % attr) | 
 | 56 |         try: | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 57 |             self.assertTrue(open_url.read(), "calling 'read' failed") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 58 |         finally: | 
 | 59 |             open_url.close() | 
 | 60 |  | 
 | 61 |     def test_readlines(self): | 
 | 62 |         # Test both readline and readlines. | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 63 |         open_url = self.urlopen("http://www.python.org/") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 64 |         try: | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 65 |             self.assertTrue(isinstance(open_url.readline(), bytes), | 
| Jeremy Hylton | 3e18615 | 2007-08-04 03:46:11 +0000 | [diff] [blame] | 66 |                          "readline did not return bytes") | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 67 |             self.assertTrue(isinstance(open_url.readlines(), list), | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 68 |                          "readlines did not return a list") | 
 | 69 |         finally: | 
 | 70 |             open_url.close() | 
 | 71 |  | 
 | 72 |     def test_info(self): | 
 | 73 |         # Test 'info'. | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 74 |         open_url = self.urlopen("http://www.python.org/") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 75 |         try: | 
 | 76 |             info_obj = open_url.info() | 
 | 77 |         finally: | 
 | 78 |             open_url.close() | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 79 |             self.assertTrue(isinstance(info_obj, email.message.Message), | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 80 |                          "object returned by 'info' is not an instance of " | 
| Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 81 |                          "email.message.Message") | 
 | 82 |             self.assertEqual(info_obj.get_content_subtype(), "html") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 83 |  | 
 | 84 |     def test_geturl(self): | 
 | 85 |         # Make sure same URL as opened is returned by geturl. | 
 | 86 |         URL = "http://www.python.org/" | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 87 |         open_url = self.urlopen(URL) | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 88 |         try: | 
 | 89 |             gotten_url = open_url.geturl() | 
 | 90 |         finally: | 
 | 91 |             open_url.close() | 
 | 92 |         self.assertEqual(gotten_url, URL) | 
 | 93 |  | 
| Christian Heimes | 9bd667a | 2008-01-20 15:14:11 +0000 | [diff] [blame] | 94 |     def test_getcode(self): | 
 | 95 |         # test getcode() with the fancy opener to get 404 error codes | 
 | 96 |         URL = "http://www.python.org/XXXinvalidXXX" | 
| Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 97 |         open_url = urllib.request.FancyURLopener().open(URL) | 
| Christian Heimes | 9bd667a | 2008-01-20 15:14:11 +0000 | [diff] [blame] | 98 |         try: | 
 | 99 |             code = open_url.getcode() | 
 | 100 |         finally: | 
 | 101 |             open_url.close() | 
 | 102 |         self.assertEqual(code, 404) | 
 | 103 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 104 |     def test_fileno(self): | 
| Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 105 |         if sys.platform in ('win32',): | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 106 |             # On Windows, socket handles are not file descriptors; this | 
 | 107 |             # test can't pass on Windows. | 
 | 108 |             return | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 109 |         # Make sure fd returned by fileno is valid. | 
| Senthil Kumaran | 378e6db | 2010-10-17 11:01:46 +0000 | [diff] [blame] | 110 |         open_url = self.urlopen("http://www.python.org/", timeout=None) | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 111 |         fd = open_url.fileno() | 
| Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 112 |         FILE = os.fdopen(fd, encoding='utf-8') | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 113 |         try: | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 114 |             self.assertTrue(FILE.read(), "reading from file created using fd " | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 115 |                                       "returned by fileno failed") | 
 | 116 |         finally: | 
 | 117 |             FILE.close() | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 118 |  | 
 | 119 |     def test_bad_address(self): | 
 | 120 |         # Make sure proper exception is raised when connecting to a bogus | 
 | 121 |         # address. | 
 | 122 |         self.assertRaises(IOError, | 
| Tim Peters | 0aab002 | 2003-09-20 22:16:26 +0000 | [diff] [blame] | 123 |                           # SF patch 809915:  In Sep 2003, VeriSign started | 
 | 124 |                           # highjacking invalid .com and .net addresses to | 
 | 125 |                           # boost traffic to their own site.  This test | 
 | 126 |                           # started failing then.  One hopes the .invalid | 
 | 127 |                           # domain will be spared to serve its defined | 
 | 128 |                           # purpose. | 
 | 129 |                           # urllib.urlopen, "http://www.sadflkjsasadf.com/") | 
| Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 130 |                           urllib.request.urlopen, | 
| Antoine Pitrou | 8fd33d3 | 2008-12-15 13:08:55 +0000 | [diff] [blame] | 131 |                           "http://sadflkjsasf.i.nvali.d/") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 132 |  | 
 | 133 | class urlretrieveNetworkTests(unittest.TestCase): | 
| Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 134 |     """Tests urllib.request.urlretrieve using the network.""" | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 135 |  | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 136 |     def urlretrieve(self, *args): | 
| Senthil Kumaran | 378e6db | 2010-10-17 11:01:46 +0000 | [diff] [blame] | 137 |         resource = args[0] | 
 | 138 |         with support.transient_internet(resource): | 
 | 139 |             return urllib.request.urlretrieve(*args) | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 140 |  | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 141 |     def test_basic(self): | 
 | 142 |         # Test basic functionality. | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 143 |         file_location,info = self.urlretrieve("http://www.python.org/") | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 144 |         self.assertTrue(os.path.exists(file_location), "file location returned by" | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 145 |                         " urlretrieve is not a valid path") | 
| Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 146 |         FILE = open(file_location, encoding='utf-8') | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 147 |         try: | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 148 |             self.assertTrue(FILE.read(), "reading from the file location returned" | 
| Jeremy Hylton | bd9f520 | 2003-07-17 16:31:00 +0000 | [diff] [blame] | 149 |                          " by urlretrieve failed") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 150 |         finally: | 
 | 151 |             FILE.close() | 
 | 152 |             os.unlink(file_location) | 
 | 153 |  | 
 | 154 |     def test_specified_path(self): | 
 | 155 |         # Make sure that specifying the location of the file to write to works. | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 156 |         file_location,info = self.urlretrieve("http://www.python.org/", | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 157 |                                               support.TESTFN) | 
 | 158 |         self.assertEqual(file_location, support.TESTFN) | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 159 |         self.assertTrue(os.path.exists(file_location)) | 
| Amaury Forgeot d'Arc | bdbddf8 | 2008-08-01 00:06:49 +0000 | [diff] [blame] | 160 |         FILE = open(file_location, encoding='utf-8') | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 161 |         try: | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 162 |             self.assertTrue(FILE.read(), "reading from temporary file failed") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 163 |         finally: | 
 | 164 |             FILE.close() | 
 | 165 |             os.unlink(file_location) | 
 | 166 |  | 
 | 167 |     def test_header(self): | 
 | 168 |         # Make sure header returned as 2nd value from urlretrieve is good. | 
| Christian Heimes | af98da1 | 2008-01-27 15:18:18 +0000 | [diff] [blame] | 169 |         file_location, header = self.urlretrieve("http://www.python.org/") | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 170 |         os.unlink(file_location) | 
| Georg Brandl | ab91fde | 2009-08-13 08:51:18 +0000 | [diff] [blame] | 171 |         self.assertTrue(isinstance(header, email.message.Message), | 
| Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 172 |                      "header is not an instance of email.message.Message") | 
| Tim Peters | 813cec9 | 2003-05-16 15:35:10 +0000 | [diff] [blame] | 173 |  | 
| Senthil Kumaran | 5a3bc65 | 2010-05-01 08:32:23 +0000 | [diff] [blame] | 174 |     def test_data_header(self): | 
 | 175 |         logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png" | 
 | 176 |         file_location, fileheaders = self.urlretrieve(logo) | 
 | 177 |         os.unlink(file_location) | 
 | 178 |         datevalue = fileheaders.get('Date') | 
 | 179 |         dateformat = '%a, %d %b %Y %H:%M:%S GMT' | 
 | 180 |         try: | 
 | 181 |             time.strptime(datevalue, dateformat) | 
 | 182 |         except ValueError: | 
 | 183 |             self.fail('Date value not in %r format', dateformat) | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 184 |  | 
 | 185 |  | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 186 | def test_main(): | 
| Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 187 |     support.requires('network') | 
 | 188 |     support.run_unittest(URLTimeoutTest, | 
| Brett Cannon | a71319e | 2003-05-14 02:18:31 +0000 | [diff] [blame] | 189 |                               urlopenNetworkTests, | 
 | 190 |                               urlretrieveNetworkTests) | 
| Skip Montanaro | 89feabc | 2003-03-30 04:54:24 +0000 | [diff] [blame] | 191 |  | 
 | 192 | if __name__ == "__main__": | 
 | 193 |     test_main() |