blob: 9f24b7ad9d5aec04f2069ddfc0410fee38656564 [file] [log] [blame]
Skip Montanaro89feabc2003-03-30 04:54:24 +00001import unittest
2from test import test_support
3
4import socket
Brett Cannona71319e2003-05-14 02:18:31 +00005import urllib
Skip Montanaro89feabc2003-03-30 04:54:24 +00006import sys
Brett Cannona71319e2003-05-14 02:18:31 +00007import os
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +00008import time
9
Ezio Melottia2d46532010-01-30 07:22:54 +000010mimetools = test_support.import_module("mimetools", deprecated=True)
Skip Montanaro89feabc2003-03-30 04:54:24 +000011
Neal Norwitz5be30672008-01-26 05:54:48 +000012
13def _open_with_retry(func, host, *args, **kwargs):
14 # Connecting to remote hosts is flaky. Make it more robust
15 # by retrying the connection several times.
16 for i in range(3):
17 try:
18 return func(host, *args, **kwargs)
19 except IOError, last_exc:
20 continue
21 except:
22 raise
23 raise last_exc
24
25
Skip Montanaro89feabc2003-03-30 04:54:24 +000026class URLTimeoutTest(unittest.TestCase):
27
28 TIMEOUT = 10.0
29
30 def setUp(self):
31 socket.setdefaulttimeout(self.TIMEOUT)
32
33 def tearDown(self):
34 socket.setdefaulttimeout(None)
35
36 def testURLread(self):
Ned Deilyc7275332014-03-26 23:25:02 -070037 f = _open_with_retry(urllib.urlopen, "http://www.example.com/")
Skip Montanaro89feabc2003-03-30 04:54:24 +000038 x = f.read()
39
Brett Cannona71319e2003-05-14 02:18:31 +000040class urlopenNetworkTests(unittest.TestCase):
41 """Tests urllib.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000042
Brett Cannona71319e2003-05-14 02:18:31 +000043 These tests are not exhaustive. Assuming that testing using files does a
44 good job overall of some of the basic interface features. There are no
45 tests exercising the optional 'data' and 'proxies' arguments. No tests
46 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000047
Brett Cannona71319e2003-05-14 02:18:31 +000048 setUp is not used for always constructing a connection to
Ned Deilyc7275332014-03-26 23:25:02 -070049 http://www.example.com/ since there a few tests that don't use that address
Brett Cannona71319e2003-05-14 02:18:31 +000050 and making a connection is expensive enough to warrant minimizing unneeded
51 connections.
Tim Peters813cec92003-05-16 15:35:10 +000052
Brett Cannona71319e2003-05-14 02:18:31 +000053 """
54
Neal Norwitz5be30672008-01-26 05:54:48 +000055 def urlopen(self, *args):
56 return _open_with_retry(urllib.urlopen, *args)
57
Brett Cannona71319e2003-05-14 02:18:31 +000058 def test_basic(self):
59 # Simple test expected to pass.
Ned Deilyc7275332014-03-26 23:25:02 -070060 open_url = self.urlopen("http://www.example.com/")
Brett Cannona71319e2003-05-14 02:18:31 +000061 for attr in ("read", "readline", "readlines", "fileno", "close",
62 "info", "geturl"):
Benjamin Peterson5c8da862009-06-30 22:57:08 +000063 self.assertTrue(hasattr(open_url, attr), "object returned from "
Brett Cannona71319e2003-05-14 02:18:31 +000064 "urlopen lacks the %s attribute" % attr)
65 try:
Benjamin Peterson5c8da862009-06-30 22:57:08 +000066 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000067 finally:
68 open_url.close()
69
70 def test_readlines(self):
71 # Test both readline and readlines.
Ned Deilyc7275332014-03-26 23:25:02 -070072 open_url = self.urlopen("http://www.example.com/")
Brett Cannona71319e2003-05-14 02:18:31 +000073 try:
Ezio Melottib0f5adc2010-01-24 16:58:36 +000074 self.assertIsInstance(open_url.readline(), basestring,
75 "readline did not return a string")
76 self.assertIsInstance(open_url.readlines(), list,
77 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000078 finally:
79 open_url.close()
80
81 def test_info(self):
82 # Test 'info'.
Ned Deilyc7275332014-03-26 23:25:02 -070083 open_url = self.urlopen("http://www.example.com/")
Brett Cannona71319e2003-05-14 02:18:31 +000084 try:
85 info_obj = open_url.info()
86 finally:
87 open_url.close()
Ezio Melottib0f5adc2010-01-24 16:58:36 +000088 self.assertIsInstance(info_obj, mimetools.Message,
89 "object returned by 'info' is not an "
90 "instance of mimetools.Message")
Brett Cannona71319e2003-05-14 02:18:31 +000091 self.assertEqual(info_obj.getsubtype(), "html")
92
93 def test_geturl(self):
94 # Make sure same URL as opened is returned by geturl.
Ned Deilyc7275332014-03-26 23:25:02 -070095 URL = "http://www.example.com/"
Neal Norwitz5be30672008-01-26 05:54:48 +000096 open_url = self.urlopen(URL)
Brett Cannona71319e2003-05-14 02:18:31 +000097 try:
98 gotten_url = open_url.geturl()
99 finally:
100 open_url.close()
101 self.assertEqual(gotten_url, URL)
102
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000103 def test_getcode(self):
104 # test getcode() with the fancy opener to get 404 error codes
Ned Deilyc7275332014-03-26 23:25:02 -0700105 URL = "http://www.example.com/XXXinvalidXXX"
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000106 open_url = urllib.FancyURLopener().open(URL)
107 try:
108 code = open_url.getcode()
109 finally:
110 open_url.close()
111 self.assertEqual(code, 404)
112
Zachary Ware1f702212013-12-10 14:09:20 -0600113 @unittest.skipIf(sys.platform in ('win32',), 'not appropriate for Windows')
114 @unittest.skipUnless(hasattr(os, 'fdopen'), 'os.fdopen not available')
Brett Cannona71319e2003-05-14 02:18:31 +0000115 def test_fileno(self):
116 # Make sure fd returned by fileno is valid.
Ned Deilyc7275332014-03-26 23:25:02 -0700117 open_url = self.urlopen("http://www.example.com/")
Tim Peters813cec92003-05-16 15:35:10 +0000118 fd = open_url.fileno()
119 FILE = os.fdopen(fd)
120 try:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000121 self.assertTrue(FILE.read(), "reading from file created using fd "
Tim Peters813cec92003-05-16 15:35:10 +0000122 "returned by fileno failed")
123 finally:
124 FILE.close()
Brett Cannona71319e2003-05-14 02:18:31 +0000125
126 def test_bad_address(self):
127 # Make sure proper exception is raised when connecting to a bogus
128 # address.
Antoine Pitroua4d58d22011-07-08 19:14:19 +0200129 bogus_domain = "sadflkjsasf.i.nvali.d"
130 try:
131 socket.gethostbyname(bogus_domain)
132 except socket.gaierror:
133 pass
134 else:
135 # This happens with some overzealous DNS providers such as OpenDNS
136 self.skipTest("%r should not resolve for test to work" % bogus_domain)
Brett Cannona71319e2003-05-14 02:18:31 +0000137 self.assertRaises(IOError,
Tim Peters0aab0022003-09-20 22:16:26 +0000138 # SF patch 809915: In Sep 2003, VeriSign started
139 # highjacking invalid .com and .net addresses to
140 # boost traffic to their own site. This test
141 # started failing then. One hopes the .invalid
142 # domain will be spared to serve its defined
143 # purpose.
144 # urllib.urlopen, "http://www.sadflkjsasadf.com/")
Antoine Pitroucc5b64a2008-12-15 00:39:51 +0000145 urllib.urlopen, "http://sadflkjsasf.i.nvali.d/")
Brett Cannona71319e2003-05-14 02:18:31 +0000146
147class urlretrieveNetworkTests(unittest.TestCase):
148 """Tests urllib.urlretrieve using the network."""
149
Neal Norwitz5be30672008-01-26 05:54:48 +0000150 def urlretrieve(self, *args):
151 return _open_with_retry(urllib.urlretrieve, *args)
152
Brett Cannona71319e2003-05-14 02:18:31 +0000153 def test_basic(self):
154 # Test basic functionality.
Ned Deilyc7275332014-03-26 23:25:02 -0700155 file_location,info = self.urlretrieve("http://www.example.com/")
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000156 self.assertTrue(os.path.exists(file_location), "file location returned by"
Brett Cannona71319e2003-05-14 02:18:31 +0000157 " urlretrieve is not a valid path")
158 FILE = file(file_location)
159 try:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000160 self.assertTrue(FILE.read(), "reading from the file location returned"
Jeremy Hyltonbd9f5202003-07-17 16:31:00 +0000161 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000162 finally:
163 FILE.close()
164 os.unlink(file_location)
165
166 def test_specified_path(self):
167 # Make sure that specifying the location of the file to write to works.
Ned Deilyc7275332014-03-26 23:25:02 -0700168 file_location,info = self.urlretrieve("http://www.example.com/",
Neal Norwitz5be30672008-01-26 05:54:48 +0000169 test_support.TESTFN)
Brett Cannona71319e2003-05-14 02:18:31 +0000170 self.assertEqual(file_location, test_support.TESTFN)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000171 self.assertTrue(os.path.exists(file_location))
Brett Cannona71319e2003-05-14 02:18:31 +0000172 FILE = file(file_location)
173 try:
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000174 self.assertTrue(FILE.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000175 finally:
176 FILE.close()
177 os.unlink(file_location)
178
179 def test_header(self):
180 # Make sure header returned as 2nd value from urlretrieve is good.
Ned Deilyc7275332014-03-26 23:25:02 -0700181 file_location, header = self.urlretrieve("http://www.example.com/")
Brett Cannona71319e2003-05-14 02:18:31 +0000182 os.unlink(file_location)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000183 self.assertIsInstance(header, mimetools.Message,
184 "header is not an instance of mimetools.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000185
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +0000186 def test_data_header(self):
Ned Deilyc7275332014-03-26 23:25:02 -0700187 logo = "http://www.example.com/"
Senthil Kumaran1b7f9e52010-05-01 08:01:56 +0000188 file_location, fileheaders = self.urlretrieve(logo)
189 os.unlink(file_location)
190 datevalue = fileheaders.getheader('Date')
191 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
192 try:
193 time.strptime(datevalue, dateformat)
194 except ValueError:
195 self.fail('Date value not in %r format', dateformat)
196
Brett Cannona71319e2003-05-14 02:18:31 +0000197
198
Skip Montanaro89feabc2003-03-30 04:54:24 +0000199def test_main():
200 test_support.requires('network')
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000201 with test_support.check_py3k_warnings(
202 ("urllib.urlopen.. has been removed", DeprecationWarning)):
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000203 test_support.run_unittest(URLTimeoutTest,
204 urlopenNetworkTests,
205 urlretrieveNetworkTests)
Skip Montanaro89feabc2003-03-30 04:54:24 +0000206
207if __name__ == "__main__":
208 test_main()