blob: c2388b8054dbfb915461fa8925660139a13c0c25 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Skip Montanaro89feabc2003-03-30 04:54:24 +00002
3import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00004from test import support
Skip Montanaro89feabc2003-03-30 04:54:24 +00005
6import socket
Jeremy Hylton1afc1692008-06-18 20:49:58 +00007import urllib.request
Skip Montanaro89feabc2003-03-30 04:54:24 +00008import sys
Brett Cannona71319e2003-05-14 02:18:31 +00009import os
Barry Warsaw820c1202008-06-12 04:06:45 +000010import email.message
Senthil Kumaranf6c456d2010-05-01 08:29:18 +000011import time
Skip Montanaro89feabc2003-03-30 04:54:24 +000012
Christian Heimesaf98da12008-01-27 15:18:18 +000013
14def _open_with_retry(func, host, *args, **kwargs):
15 # Connecting to remote hosts is flaky. Make it more robust
16 # by retrying the connection several times.
Christian Heimes061ce7f2008-01-27 15:45:24 +000017 last_exc = None
Christian Heimesaf98da12008-01-27 15:18:18 +000018 for i in range(3):
19 try:
20 return func(host, *args, **kwargs)
Christian Heimes061ce7f2008-01-27 15:45:24 +000021 except IOError as err:
22 last_exc = err
Christian Heimesaf98da12008-01-27 15:18:18 +000023 continue
24 except:
25 raise
26 raise last_exc
27
28
Skip Montanaro89feabc2003-03-30 04:54:24 +000029class URLTimeoutTest(unittest.TestCase):
30
31 TIMEOUT = 10.0
32
33 def setUp(self):
34 socket.setdefaulttimeout(self.TIMEOUT)
35
36 def tearDown(self):
37 socket.setdefaulttimeout(None)
38
39 def testURLread(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000040 f = _open_with_retry(urllib.request.urlopen, "http://www.python.org/")
Skip Montanaro89feabc2003-03-30 04:54:24 +000041 x = f.read()
42
Brett Cannona71319e2003-05-14 02:18:31 +000043class urlopenNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 """Tests urllib.reqest.urlopen using the network.
Tim Peters813cec92003-05-16 15:35:10 +000045
Brett Cannona71319e2003-05-14 02:18:31 +000046 These tests are not exhaustive. Assuming that testing using files does a
47 good job overall of some of the basic interface features. There are no
48 tests exercising the optional 'data' and 'proxies' arguments. No tests
49 for transparent redirection have been written.
Tim Peters813cec92003-05-16 15:35:10 +000050
Brett Cannona71319e2003-05-14 02:18:31 +000051 setUp is not used for always constructing a connection to
52 http://www.python.org/ since there a few tests that don't use that address
53 and making a connection is expensive enough to warrant minimizing unneeded
54 connections.
Tim Peters813cec92003-05-16 15:35:10 +000055
Brett Cannona71319e2003-05-14 02:18:31 +000056 """
57
Christian Heimesaf98da12008-01-27 15:18:18 +000058 def urlopen(self, *args):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059 return _open_with_retry(urllib.request.urlopen, *args)
Christian Heimesaf98da12008-01-27 15:18:18 +000060
Brett Cannona71319e2003-05-14 02:18:31 +000061 def test_basic(self):
62 # Simple test expected to pass.
Christian Heimesaf98da12008-01-27 15:18:18 +000063 open_url = self.urlopen("http://www.python.org/")
Brett Cannona71319e2003-05-14 02:18:31 +000064 for attr in ("read", "readline", "readlines", "fileno", "close",
65 "info", "geturl"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000066 self.assertTrue(hasattr(open_url, attr), "object returned from "
Brett Cannona71319e2003-05-14 02:18:31 +000067 "urlopen lacks the %s attribute" % attr)
68 try:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000069 self.assertTrue(open_url.read(), "calling 'read' failed")
Brett Cannona71319e2003-05-14 02:18:31 +000070 finally:
71 open_url.close()
72
73 def test_readlines(self):
74 # Test both readline and readlines.
Christian Heimesaf98da12008-01-27 15:18:18 +000075 open_url = self.urlopen("http://www.python.org/")
Brett Cannona71319e2003-05-14 02:18:31 +000076 try:
Ezio Melottie9615932010-01-24 19:26:24 +000077 self.assertIsInstance(open_url.readline(), bytes,
78 "readline did not return a string")
79 self.assertIsInstance(open_url.readlines(), list,
80 "readlines did not return a list")
Brett Cannona71319e2003-05-14 02:18:31 +000081 finally:
82 open_url.close()
83
84 def test_info(self):
85 # Test 'info'.
Christian Heimesaf98da12008-01-27 15:18:18 +000086 open_url = self.urlopen("http://www.python.org/")
Brett Cannona71319e2003-05-14 02:18:31 +000087 try:
88 info_obj = open_url.info()
89 finally:
90 open_url.close()
Ezio Melottie9615932010-01-24 19:26:24 +000091 self.assertIsInstance(info_obj, email.message.Message,
92 "object returned by 'info' is not an "
93 "instance of email.message.Message")
Barry Warsaw820c1202008-06-12 04:06:45 +000094 self.assertEqual(info_obj.get_content_subtype(), "html")
Brett Cannona71319e2003-05-14 02:18:31 +000095
96 def test_geturl(self):
97 # Make sure same URL as opened is returned by geturl.
98 URL = "http://www.python.org/"
Christian Heimesaf98da12008-01-27 15:18:18 +000099 open_url = self.urlopen(URL)
Brett Cannona71319e2003-05-14 02:18:31 +0000100 try:
101 gotten_url = open_url.geturl()
102 finally:
103 open_url.close()
104 self.assertEqual(gotten_url, URL)
105
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106 def test_getcode(self):
107 # test getcode() with the fancy opener to get 404 error codes
108 URL = "http://www.python.org/XXXinvalidXXX"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000109 open_url = urllib.request.FancyURLopener().open(URL)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000110 try:
111 code = open_url.getcode()
112 finally:
113 open_url.close()
114 self.assertEqual(code, 404)
115
Brett Cannona71319e2003-05-14 02:18:31 +0000116 def test_fileno(self):
Amaury Forgeot d'Arcbdbddf82008-08-01 00:06:49 +0000117 if sys.platform in ('win32',):
Tim Peters813cec92003-05-16 15:35:10 +0000118 # On Windows, socket handles are not file descriptors; this
119 # test can't pass on Windows.
120 return
Brett Cannona71319e2003-05-14 02:18:31 +0000121 # Make sure fd returned by fileno is valid.
Christian Heimesaf98da12008-01-27 15:18:18 +0000122 open_url = self.urlopen("http://www.python.org/")
Tim Peters813cec92003-05-16 15:35:10 +0000123 fd = open_url.fileno()
Amaury Forgeot d'Arcbdbddf82008-08-01 00:06:49 +0000124 FILE = os.fdopen(fd, encoding='utf-8')
Tim Peters813cec92003-05-16 15:35:10 +0000125 try:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000126 self.assertTrue(FILE.read(), "reading from file created using fd "
Tim Peters813cec92003-05-16 15:35:10 +0000127 "returned by fileno failed")
128 finally:
129 FILE.close()
Brett Cannona71319e2003-05-14 02:18:31 +0000130
131 def test_bad_address(self):
132 # Make sure proper exception is raised when connecting to a bogus
133 # address.
134 self.assertRaises(IOError,
Tim Peters0aab0022003-09-20 22:16:26 +0000135 # SF patch 809915: In Sep 2003, VeriSign started
136 # highjacking invalid .com and .net addresses to
137 # boost traffic to their own site. This test
138 # started failing then. One hopes the .invalid
139 # domain will be spared to serve its defined
140 # purpose.
141 # urllib.urlopen, "http://www.sadflkjsasadf.com/")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000142 urllib.request.urlopen,
Antoine Pitrou8fd33d32008-12-15 13:08:55 +0000143 "http://sadflkjsasf.i.nvali.d/")
Brett Cannona71319e2003-05-14 02:18:31 +0000144
145class urlretrieveNetworkTests(unittest.TestCase):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000146 """Tests urllib.request.urlretrieve using the network."""
Brett Cannona71319e2003-05-14 02:18:31 +0000147
Christian Heimesaf98da12008-01-27 15:18:18 +0000148 def urlretrieve(self, *args):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000149 return _open_with_retry(urllib.request.urlretrieve, *args)
Christian Heimesaf98da12008-01-27 15:18:18 +0000150
Brett Cannona71319e2003-05-14 02:18:31 +0000151 def test_basic(self):
152 # Test basic functionality.
Christian Heimesaf98da12008-01-27 15:18:18 +0000153 file_location,info = self.urlretrieve("http://www.python.org/")
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000154 self.assertTrue(os.path.exists(file_location), "file location returned by"
Brett Cannona71319e2003-05-14 02:18:31 +0000155 " urlretrieve is not a valid path")
Amaury Forgeot d'Arcbdbddf82008-08-01 00:06:49 +0000156 FILE = open(file_location, encoding='utf-8')
Brett Cannona71319e2003-05-14 02:18:31 +0000157 try:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000158 self.assertTrue(FILE.read(), "reading from the file location returned"
Jeremy Hyltonbd9f5202003-07-17 16:31:00 +0000159 " by urlretrieve failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000160 finally:
161 FILE.close()
162 os.unlink(file_location)
163
164 def test_specified_path(self):
165 # Make sure that specifying the location of the file to write to works.
Christian Heimesaf98da12008-01-27 15:18:18 +0000166 file_location,info = self.urlretrieve("http://www.python.org/",
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000167 support.TESTFN)
168 self.assertEqual(file_location, support.TESTFN)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000169 self.assertTrue(os.path.exists(file_location))
Amaury Forgeot d'Arcbdbddf82008-08-01 00:06:49 +0000170 FILE = open(file_location, encoding='utf-8')
Brett Cannona71319e2003-05-14 02:18:31 +0000171 try:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000172 self.assertTrue(FILE.read(), "reading from temporary file failed")
Brett Cannona71319e2003-05-14 02:18:31 +0000173 finally:
174 FILE.close()
175 os.unlink(file_location)
176
177 def test_header(self):
178 # Make sure header returned as 2nd value from urlretrieve is good.
Christian Heimesaf98da12008-01-27 15:18:18 +0000179 file_location, header = self.urlretrieve("http://www.python.org/")
Brett Cannona71319e2003-05-14 02:18:31 +0000180 os.unlink(file_location)
Ezio Melottie9615932010-01-24 19:26:24 +0000181 self.assertIsInstance(header, email.message.Message,
182 "header is not an instance of email.message.Message")
Tim Peters813cec92003-05-16 15:35:10 +0000183
Senthil Kumaranf6c456d2010-05-01 08:29:18 +0000184 def test_data_header(self):
185 logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
186 file_location, fileheaders = self.urlretrieve(logo)
187 os.unlink(file_location)
188 datevalue = fileheaders.get('Date')
189 dateformat = '%a, %d %b %Y %H:%M:%S GMT'
190 try:
191 time.strptime(datevalue, dateformat)
192 except ValueError:
193 self.fail('Date value not in %r format', dateformat)
Brett Cannona71319e2003-05-14 02:18:31 +0000194
195
Skip Montanaro89feabc2003-03-30 04:54:24 +0000196def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000197 support.requires('network')
198 support.run_unittest(URLTimeoutTest,
Brett Cannona71319e2003-05-14 02:18:31 +0000199 urlopenNetworkTests,
200 urlretrieveNetworkTests)
Skip Montanaro89feabc2003-03-30 04:54:24 +0000201
202if __name__ == "__main__":
203 test_main()