blob: 11e5dad5bf5aea37d4fdba0ba45b9eb7e42acb23 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Georg Brandl5a650a22005-08-26 08:51:34 +000012import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000013
Brett Cannon74bfd702003-04-25 09:39:47 +000014def hexescape(char):
15 """Escape char as RFC 2396 specifies"""
16 hex_repr = hex(ord(char))[2:].upper()
17 if len(hex_repr) == 1:
18 hex_repr = "0%s" % hex_repr
19 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Jeremy Hylton1afc1692008-06-18 20:49:58 +000021# Shortcut for testing FancyURLopener
22_urlopener = None
23def urlopen(url, data=None, proxies=None):
24 """urlopen(url [, data]) -> open file-like object"""
25 global _urlopener
26 if proxies is not None:
27 opener = urllib.request.FancyURLopener(proxies=proxies)
28 elif not _urlopener:
29 opener = urllib.request.FancyURLopener()
30 _urlopener = opener
31 else:
32 opener = _urlopener
33 if data is None:
34 return opener.open(url)
35 else:
36 return opener.open(url, data)
37
Brett Cannon74bfd702003-04-25 09:39:47 +000038class urlopen_FileTests(unittest.TestCase):
39 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000040
Brett Cannon74bfd702003-04-25 09:39:47 +000041 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000042 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000043
Brett Cannon74bfd702003-04-25 09:39:47 +000044 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000045
Brett Cannon74bfd702003-04-25 09:39:47 +000046 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000047 # Create a temp file to use for testing
48 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
49 "ascii")
50 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000051 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000052 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000053 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000054 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000055 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000056 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000057
Brett Cannon74bfd702003-04-25 09:39:47 +000058 def tearDown(self):
59 """Shut down the open object"""
60 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000061 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000062
Brett Cannon74bfd702003-04-25 09:39:47 +000063 def test_interface(self):
64 # Make sure object returned by urlopen() has the specified methods
65 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000066 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000067 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000068 "object returned by urlopen() lacks %s attribute" %
69 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000070
Brett Cannon74bfd702003-04-25 09:39:47 +000071 def test_read(self):
72 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000073
Brett Cannon74bfd702003-04-25 09:39:47 +000074 def test_readline(self):
75 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000076 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000077 "calling readline() after exhausting the file did not"
78 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000079
Brett Cannon74bfd702003-04-25 09:39:47 +000080 def test_readlines(self):
81 lines_list = self.returned_obj.readlines()
82 self.assertEqual(len(lines_list), 1,
83 "readlines() returned the wrong number of lines")
84 self.assertEqual(lines_list[0], self.text,
85 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000086
Brett Cannon74bfd702003-04-25 09:39:47 +000087 def test_fileno(self):
88 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000089 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000090 self.assertEqual(os.read(file_num, len(self.text)), self.text,
91 "Reading on the file descriptor returned by fileno() "
92 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000093
Brett Cannon74bfd702003-04-25 09:39:47 +000094 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080095 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000096 # by the tearDown() method for the test
97 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000098
Brett Cannon74bfd702003-04-25 09:39:47 +000099 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000100 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000101
Brett Cannon74bfd702003-04-25 09:39:47 +0000102 def test_geturl(self):
103 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000104
Christian Heimes9bd667a2008-01-20 15:14:11 +0000105 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000106 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000107
Brett Cannon74bfd702003-04-25 09:39:47 +0000108 def test_iter(self):
109 # Test iterator
110 # Don't need to count number of iterations since test would fail the
111 # instant it returned anything beyond the first line from the
112 # comparison
113 for line in self.returned_obj.__iter__():
114 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000115
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000116class ProxyTests(unittest.TestCase):
117
118 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000119 # Records changes to env vars
120 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000121 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000122 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000123 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000124 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000125
126 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000127 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000128 self.env.__exit__()
129 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000130
131 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000132 self.env.set('NO_PROXY', 'localhost')
133 proxies = urllib.request.getproxies_environment()
134 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000135 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000136
137
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000138class urlopen_HttpTests(unittest.TestCase):
139 """Test urlopen() opening a fake http connection."""
140
141 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000142 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000143 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000144 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000145 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000146 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000147 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000148 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000149 if self.closed: return b""
150 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000151 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000152 if self.closed: return b""
153 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000154 def close(self):
155 self.io_refs -= 1
156 if self.io_refs == 0:
157 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000158 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000159 def connect(self):
160 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000161 self._connection_class = http.client.HTTPConnection
162 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000163
164 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000165 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000166
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000167 def check_read(self, ver):
168 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000169 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000170 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000171 self.assertEqual(fp.readline(), b"Hello!")
172 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000173 self.assertEqual(fp.geturl(), 'http://python.org/')
174 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000175 finally:
176 self.unfakehttp()
177
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800178 def test_willclose(self):
179 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800180 try:
181 resp = urlopen("http://www.python.org")
182 self.assertTrue(resp.fp.will_close)
183 finally:
184 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800185
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000186 def test_read_0_9(self):
187 # "0.9" response accepted (but not "simple responses" without
188 # a status line)
189 self.check_read(b"0.9")
190
191 def test_read_1_0(self):
192 self.check_read(b"1.0")
193
194 def test_read_1_1(self):
195 self.check_read(b"1.1")
196
Christian Heimes57dddfb2008-01-02 18:30:52 +0000197 def test_read_bogus(self):
198 # urlopen() should raise IOError for many error codes.
199 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
200Date: Wed, 02 Jan 2008 03:03:54 GMT
201Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
202Connection: close
203Content-Type: text/html; charset=iso-8859-1
204''')
205 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000206 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000207 finally:
208 self.unfakehttp()
209
guido@google.coma119df92011-03-29 11:41:02 -0700210 def test_invalid_redirect(self):
211 # urlopen() should raise IOError for many error codes.
212 self.fakehttp(b'''HTTP/1.1 302 Found
213Date: Wed, 02 Jan 2008 03:03:54 GMT
214Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
215Location: file://guidocomputer.athome.com:/python/license
216Connection: close
217Content-Type: text/html; charset=iso-8859-1
218''')
219 try:
220 self.assertRaises(urllib.error.HTTPError, urlopen,
221 "http://python.org/")
222 finally:
223 self.unfakehttp()
224
Guido van Rossumd8faa362007-04-27 19:54:29 +0000225 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000226 # urlopen() raises IOError if the underlying socket does not send any
227 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000228 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000229 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000230 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000231 finally:
232 self.unfakehttp()
233
Senthil Kumarande0eb242010-08-01 17:53:37 +0000234 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000235 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000236 try:
237 fp = urlopen("http://user:pass@python.org/")
238 self.assertEqual(fp.readline(), b"Hello!")
239 self.assertEqual(fp.readline(), b"")
240 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
241 self.assertEqual(fp.getcode(), 200)
242 finally:
243 self.unfakehttp()
244
Brett Cannon19691362003-04-29 05:08:06 +0000245class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000246 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000247
Brett Cannon19691362003-04-29 05:08:06 +0000248 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000249 # Create a list of temporary files. Each item in the list is a file
250 # name (absolute path or relative to the current working directory).
251 # All files in this list will be deleted in the tearDown method. Note,
252 # this only helps to makes sure temporary files get deleted, but it
253 # does nothing about trying to close files that may still be open. It
254 # is the responsibility of the developer to properly close files even
255 # when exceptional conditions occur.
256 self.tempFiles = []
257
Brett Cannon19691362003-04-29 05:08:06 +0000258 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000259 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000260 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000261 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000262 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000263 FILE.write(self.text)
264 FILE.close()
265 finally:
266 try: FILE.close()
267 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000268
269 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000270 # Delete the temporary files.
271 for each in self.tempFiles:
272 try: os.remove(each)
273 except: pass
274
275 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000276 filePath = os.path.abspath(filePath)
277 try:
278 filePath.encode("utf8")
279 except UnicodeEncodeError:
280 raise unittest.SkipTest("filePath is not encodable to utf8")
281 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000282
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000283 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000284 """Creates a new temporary file containing the specified data,
285 registers the file for deletion during the test fixture tear down, and
286 returns the absolute path of the file."""
287
288 newFd, newFilePath = tempfile.mkstemp()
289 try:
290 self.registerFileForCleanUp(newFilePath)
291 newFile = os.fdopen(newFd, "wb")
292 newFile.write(data)
293 newFile.close()
294 finally:
295 try: newFile.close()
296 except: pass
297 return newFilePath
298
299 def registerFileForCleanUp(self, fileName):
300 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000301
302 def test_basic(self):
303 # Make sure that a local file just gets its own location returned and
304 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000305 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000306 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000307 self.assertIsInstance(result[1], email.message.Message,
308 "did not get a email.message.Message instance "
309 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000310
311 def test_copy(self):
312 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000313 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000314 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000315 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000316 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000317 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000318 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000319 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000320 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000321 try:
322 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000323 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000324 finally:
325 try: FILE.close()
326 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000327 self.assertEqual(self.text, text)
328
329 def test_reporthook(self):
330 # Make sure that the reporthook works.
331 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000332 self.assertIsInstance(count, int)
333 self.assertIsInstance(block_size, int)
334 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000335 self.assertEqual(count, count_holder[0])
336 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000337 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000338 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000339 urllib.request.urlretrieve(
340 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000341 second_temp, hooktester)
342
343 def test_reporthook_0_bytes(self):
344 # Test on zero length file. Should call reporthook only 1 time.
345 report = []
346 def hooktester(count, block_size, total_size, _report=report):
347 _report.append((count, block_size, total_size))
348 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000350 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000351 self.assertEqual(len(report), 1)
352 self.assertEqual(report[0][2], 0)
353
354 def test_reporthook_5_bytes(self):
355 # Test on 5 byte file. Should call reporthook only 2 times (once when
356 # the "network connection" is established and once when the block is
357 # read). Since the block size is 8192 bytes, only one block read is
358 # required to read the entire file.
359 report = []
360 def hooktester(count, block_size, total_size, _report=report):
361 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000362 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000363 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000364 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000365 self.assertEqual(len(report), 2)
366 self.assertEqual(report[0][1], 8192)
367 self.assertEqual(report[0][2], 5)
368
369 def test_reporthook_8193_bytes(self):
370 # Test on 8193 byte file. Should call reporthook only 3 times (once
371 # when the "network connection" is established, once for the next 8192
372 # bytes, and once for the last byte).
373 report = []
374 def hooktester(count, block_size, total_size, _report=report):
375 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000376 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000377 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000378 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000379 self.assertEqual(len(report), 3)
380 self.assertEqual(report[0][1], 8192)
381 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000382
Brett Cannon74bfd702003-04-25 09:39:47 +0000383class QuotingTests(unittest.TestCase):
384 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000385
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000386 According to RFC 2396 (Uniform Resource Identifiers), to escape a
387 character you write it as '%' + <2 character US-ASCII hex value>.
388 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
389 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000390
391 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000392
Brett Cannon74bfd702003-04-25 09:39:47 +0000393 Reserved characters : ";/?:@&=+$,"
394 Have special meaning in URIs and must be escaped if not being used for
395 their special meaning
396 Data characters : letters, digits, and "-_.!~*'()"
397 Unreserved and do not need to be escaped; can be, though, if desired
398 Control characters : 0x00 - 0x1F, 0x7F
399 Have no use in URIs so must be escaped
400 space : 0x20
401 Must be escaped
402 Delimiters : '<>#%"'
403 Must be escaped
404 Unwise : "{}|\^[]`"
405 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000406
Brett Cannon74bfd702003-04-25 09:39:47 +0000407 """
408
409 def test_never_quote(self):
410 # Make sure quote() does not quote letters, digits, and "_,.-"
411 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
412 "abcdefghijklmnopqrstuvwxyz",
413 "0123456789",
414 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000415 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000416 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000417 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000418 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000419 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000420 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000421
422 def test_default_safe(self):
423 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000424 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000425
426 def test_safe(self):
427 # Test setting 'safe' parameter does what it should do
428 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000429 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000430 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000431 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000432 result = urllib.parse.quote_plus(quote_by_default,
433 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000434 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000435 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000436 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000437 # Safe expressed as bytes rather than str
438 result = urllib.parse.quote(quote_by_default, safe=b"<>")
439 self.assertEqual(quote_by_default, result,
440 "using quote(): %r != %r" % (quote_by_default, result))
441 # "Safe" non-ASCII characters should have no effect
442 # (Since URIs are not allowed to have non-ASCII characters)
443 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
444 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
445 self.assertEqual(expect, result,
446 "using quote(): %r != %r" %
447 (expect, result))
448 # Same as above, but using a bytes rather than str
449 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
450 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
451 self.assertEqual(expect, result,
452 "using quote(): %r != %r" %
453 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000454
455 def test_default_quoting(self):
456 # Make sure all characters that should be quoted are by default sans
457 # space (separate test for that).
458 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
459 should_quote.append('<>#%"{}|\^[]`')
460 should_quote.append(chr(127)) # For 0x7F
461 should_quote = ''.join(should_quote)
462 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000463 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000464 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000465 "using quote(): "
466 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000467 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000468 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000469 self.assertEqual(hexescape(char), result,
470 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000471 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000472 (char, hexescape(char), result))
473 del should_quote
474 partial_quote = "ab[]cd"
475 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000476 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000477 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000478 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000479 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000480 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000481
482 def test_quoting_space(self):
483 # Make sure quote() and quote_plus() handle spaces as specified in
484 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000485 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000486 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000487 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000489 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000490 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000491 given = "a b cd e f"
492 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000493 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000494 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000495 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000496 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000497 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000498 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000499 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000500
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000501 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000502 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000503 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000504 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000505 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000506 # Test with bytes
507 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
508 'alpha%2Bbeta+gamma')
509 # Test with safe bytes
510 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
511 'alpha+beta+gamma')
512
513 def test_quote_bytes(self):
514 # Bytes should quote directly to percent-encoded values
515 given = b"\xa2\xd8ab\xff"
516 expect = "%A2%D8ab%FF"
517 result = urllib.parse.quote(given)
518 self.assertEqual(expect, result,
519 "using quote(): %r != %r" % (expect, result))
520 # Encoding argument should raise type error on bytes input
521 self.assertRaises(TypeError, urllib.parse.quote, given,
522 encoding="latin-1")
523 # quote_from_bytes should work the same
524 result = urllib.parse.quote_from_bytes(given)
525 self.assertEqual(expect, result,
526 "using quote_from_bytes(): %r != %r"
527 % (expect, result))
528
529 def test_quote_with_unicode(self):
530 # Characters in Latin-1 range, encoded by default in UTF-8
531 given = "\xa2\xd8ab\xff"
532 expect = "%C2%A2%C3%98ab%C3%BF"
533 result = urllib.parse.quote(given)
534 self.assertEqual(expect, result,
535 "using quote(): %r != %r" % (expect, result))
536 # Characters in Latin-1 range, encoded by with None (default)
537 result = urllib.parse.quote(given, encoding=None, errors=None)
538 self.assertEqual(expect, result,
539 "using quote(): %r != %r" % (expect, result))
540 # Characters in Latin-1 range, encoded with Latin-1
541 given = "\xa2\xd8ab\xff"
542 expect = "%A2%D8ab%FF"
543 result = urllib.parse.quote(given, encoding="latin-1")
544 self.assertEqual(expect, result,
545 "using quote(): %r != %r" % (expect, result))
546 # Characters in BMP, encoded by default in UTF-8
547 given = "\u6f22\u5b57" # "Kanji"
548 expect = "%E6%BC%A2%E5%AD%97"
549 result = urllib.parse.quote(given)
550 self.assertEqual(expect, result,
551 "using quote(): %r != %r" % (expect, result))
552 # Characters in BMP, encoded with Latin-1
553 given = "\u6f22\u5b57"
554 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
555 encoding="latin-1")
556 # Characters in BMP, encoded with Latin-1, with replace error handling
557 given = "\u6f22\u5b57"
558 expect = "%3F%3F" # "??"
559 result = urllib.parse.quote(given, encoding="latin-1",
560 errors="replace")
561 self.assertEqual(expect, result,
562 "using quote(): %r != %r" % (expect, result))
563 # Characters in BMP, Latin-1, with xmlcharref error handling
564 given = "\u6f22\u5b57"
565 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
566 result = urllib.parse.quote(given, encoding="latin-1",
567 errors="xmlcharrefreplace")
568 self.assertEqual(expect, result,
569 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000570
Georg Brandlfaf41492009-05-26 18:31:11 +0000571 def test_quote_plus_with_unicode(self):
572 # Encoding (latin-1) test for quote_plus
573 given = "\xa2\xd8 \xff"
574 expect = "%A2%D8+%FF"
575 result = urllib.parse.quote_plus(given, encoding="latin-1")
576 self.assertEqual(expect, result,
577 "using quote_plus(): %r != %r" % (expect, result))
578 # Errors test for quote_plus
579 given = "ab\u6f22\u5b57 cd"
580 expect = "ab%3F%3F+cd"
581 result = urllib.parse.quote_plus(given, encoding="latin-1",
582 errors="replace")
583 self.assertEqual(expect, result,
584 "using quote_plus(): %r != %r" % (expect, result))
585
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000586
Brett Cannon74bfd702003-04-25 09:39:47 +0000587class UnquotingTests(unittest.TestCase):
588 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000589
Brett Cannon74bfd702003-04-25 09:39:47 +0000590 See the doc string for quoting_Tests for details on quoting and such.
591
592 """
593
594 def test_unquoting(self):
595 # Make sure unquoting of all ASCII values works
596 escape_list = []
597 for num in range(128):
598 given = hexescape(chr(num))
599 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000601 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000602 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000603 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000604 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000605 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000606 (expect, result))
607 escape_list.append(given)
608 escape_string = ''.join(escape_list)
609 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000610 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000611 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000612 "using unquote(): not all characters escaped: "
613 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000614 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
615 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000616 with support.check_warnings(('', BytesWarning), quiet=True):
617 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000618
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000619 def test_unquoting_badpercent(self):
620 # Test unquoting on bad percent-escapes
621 given = '%xab'
622 expect = given
623 result = urllib.parse.unquote(given)
624 self.assertEqual(expect, result, "using unquote(): %r != %r"
625 % (expect, result))
626 given = '%x'
627 expect = given
628 result = urllib.parse.unquote(given)
629 self.assertEqual(expect, result, "using unquote(): %r != %r"
630 % (expect, result))
631 given = '%'
632 expect = given
633 result = urllib.parse.unquote(given)
634 self.assertEqual(expect, result, "using unquote(): %r != %r"
635 % (expect, result))
636 # unquote_to_bytes
637 given = '%xab'
638 expect = bytes(given, 'ascii')
639 result = urllib.parse.unquote_to_bytes(given)
640 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
641 % (expect, result))
642 given = '%x'
643 expect = bytes(given, 'ascii')
644 result = urllib.parse.unquote_to_bytes(given)
645 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
646 % (expect, result))
647 given = '%'
648 expect = bytes(given, 'ascii')
649 result = urllib.parse.unquote_to_bytes(given)
650 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
651 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000652 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
653 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000654
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000655 def test_unquoting_mixed_case(self):
656 # Test unquoting on mixed-case hex digits in the percent-escapes
657 given = '%Ab%eA'
658 expect = b'\xab\xea'
659 result = urllib.parse.unquote_to_bytes(given)
660 self.assertEqual(expect, result,
661 "using unquote_to_bytes(): %r != %r"
662 % (expect, result))
663
Brett Cannon74bfd702003-04-25 09:39:47 +0000664 def test_unquoting_parts(self):
665 # Make sure unquoting works when have non-quoted characters
666 # interspersed
667 given = 'ab%sd' % hexescape('c')
668 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000669 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000670 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000671 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000672 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000673 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000674 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000675
Brett Cannon74bfd702003-04-25 09:39:47 +0000676 def test_unquoting_plus(self):
677 # Test difference between unquote() and unquote_plus()
678 given = "are+there+spaces..."
679 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000680 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000681 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000682 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000683 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000684 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000685 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000686 "using unquote_plus(): %r != %r" % (expect, result))
687
688 def test_unquote_to_bytes(self):
689 given = 'br%C3%BCckner_sapporo_20050930.doc'
690 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
691 result = urllib.parse.unquote_to_bytes(given)
692 self.assertEqual(expect, result,
693 "using unquote_to_bytes(): %r != %r"
694 % (expect, result))
695 # Test on a string with unescaped non-ASCII characters
696 # (Technically an invalid URI; expect those characters to be UTF-8
697 # encoded).
698 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
699 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
700 self.assertEqual(expect, result,
701 "using unquote_to_bytes(): %r != %r"
702 % (expect, result))
703 # Test with a bytes as input
704 given = b'%A2%D8ab%FF'
705 expect = b'\xa2\xd8ab\xff'
706 result = urllib.parse.unquote_to_bytes(given)
707 self.assertEqual(expect, result,
708 "using unquote_to_bytes(): %r != %r"
709 % (expect, result))
710 # Test with a bytes as input, with unescaped non-ASCII bytes
711 # (Technically an invalid URI; expect those bytes to be preserved)
712 given = b'%A2\xd8ab%FF'
713 expect = b'\xa2\xd8ab\xff'
714 result = urllib.parse.unquote_to_bytes(given)
715 self.assertEqual(expect, result,
716 "using unquote_to_bytes(): %r != %r"
717 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000718
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000719 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000720 # Characters in the Latin-1 range, encoded with UTF-8
721 given = 'br%C3%BCckner_sapporo_20050930.doc'
722 expect = 'br\u00fcckner_sapporo_20050930.doc'
723 result = urllib.parse.unquote(given)
724 self.assertEqual(expect, result,
725 "using unquote(): %r != %r" % (expect, result))
726 # Characters in the Latin-1 range, encoded with None (default)
727 result = urllib.parse.unquote(given, encoding=None, errors=None)
728 self.assertEqual(expect, result,
729 "using unquote(): %r != %r" % (expect, result))
730
731 # Characters in the Latin-1 range, encoded with Latin-1
732 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
733 encoding="latin-1")
734 expect = 'br\u00fcckner_sapporo_20050930.doc'
735 self.assertEqual(expect, result,
736 "using unquote(): %r != %r" % (expect, result))
737
738 # Characters in BMP, encoded with UTF-8
739 given = "%E6%BC%A2%E5%AD%97"
740 expect = "\u6f22\u5b57" # "Kanji"
741 result = urllib.parse.unquote(given)
742 self.assertEqual(expect, result,
743 "using unquote(): %r != %r" % (expect, result))
744
745 # Decode with UTF-8, invalid sequence
746 given = "%F3%B1"
747 expect = "\ufffd" # Replacement character
748 result = urllib.parse.unquote(given)
749 self.assertEqual(expect, result,
750 "using unquote(): %r != %r" % (expect, result))
751
752 # Decode with UTF-8, invalid sequence, replace errors
753 result = urllib.parse.unquote(given, errors="replace")
754 self.assertEqual(expect, result,
755 "using unquote(): %r != %r" % (expect, result))
756
757 # Decode with UTF-8, invalid sequence, ignoring errors
758 given = "%F3%B1"
759 expect = ""
760 result = urllib.parse.unquote(given, errors="ignore")
761 self.assertEqual(expect, result,
762 "using unquote(): %r != %r" % (expect, result))
763
764 # A mix of non-ASCII and percent-encoded characters, UTF-8
765 result = urllib.parse.unquote("\u6f22%C3%BC")
766 expect = '\u6f22\u00fc'
767 self.assertEqual(expect, result,
768 "using unquote(): %r != %r" % (expect, result))
769
770 # A mix of non-ASCII and percent-encoded characters, Latin-1
771 # (Note, the string contains non-Latin-1-representable characters)
772 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
773 expect = '\u6f22\u00fc'
774 self.assertEqual(expect, result,
775 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000776
Brett Cannon74bfd702003-04-25 09:39:47 +0000777class urlencode_Tests(unittest.TestCase):
778 """Tests for urlencode()"""
779
780 def help_inputtype(self, given, test_type):
781 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000782
Brett Cannon74bfd702003-04-25 09:39:47 +0000783 'given' must lead to only the pairs:
784 * 1st, 1
785 * 2nd, 2
786 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000787
Brett Cannon74bfd702003-04-25 09:39:47 +0000788 Test cannot assume anything about order. Docs make no guarantee and
789 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000790
Brett Cannon74bfd702003-04-25 09:39:47 +0000791 """
792 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000793 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000794 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000795 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000796 "testing %s: %s not found in %s" %
797 (test_type, expected, result))
798 self.assertEqual(result.count('&'), 2,
799 "testing %s: expected 2 '&'s; got %s" %
800 (test_type, result.count('&')))
801 amp_location = result.index('&')
802 on_amp_left = result[amp_location - 1]
803 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000804 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000805 "testing %s: '&' not located in proper place in %s" %
806 (test_type, result))
807 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
808 "testing %s: "
809 "unexpected number of characters: %s != %s" %
810 (test_type, len(result), (5 * 3) + 2))
811
812 def test_using_mapping(self):
813 # Test passing in a mapping object as an argument.
814 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
815 "using dict as input type")
816
817 def test_using_sequence(self):
818 # Test passing in a sequence of two-item sequences as an argument.
819 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
820 "using sequence of two-item tuples as input")
821
822 def test_quoting(self):
823 # Make sure keys and values are quoted using quote_plus()
824 given = {"&":"="}
825 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000826 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000827 self.assertEqual(expect, result)
828 given = {"key name":"A bunch of pluses"}
829 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000830 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000831 self.assertEqual(expect, result)
832
833 def test_doseq(self):
834 # Test that passing True for 'doseq' parameter works correctly
835 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000836 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
837 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000838 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000839 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000840 for value in given["sequence"]:
841 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000842 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000843 self.assertEqual(result.count('&'), 2,
844 "Expected 2 '&'s, got %s" % result.count('&'))
845
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000846 def test_empty_sequence(self):
847 self.assertEqual("", urllib.parse.urlencode({}))
848 self.assertEqual("", urllib.parse.urlencode([]))
849
850 def test_nonstring_values(self):
851 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
852 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
853
854 def test_nonstring_seq_values(self):
855 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
856 self.assertEqual("a=None&a=a",
857 urllib.parse.urlencode({"a": [None, "a"]}, True))
858 self.assertEqual("a=a&a=b",
859 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
860
Senthil Kumarandf022da2010-07-03 17:48:22 +0000861 def test_urlencode_encoding(self):
862 # ASCII encoding. Expect %3F with errors="replace'
863 given = (('\u00a0', '\u00c1'),)
864 expect = '%3F=%3F'
865 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
866 self.assertEqual(expect, result)
867
868 # Default is UTF-8 encoding.
869 given = (('\u00a0', '\u00c1'),)
870 expect = '%C2%A0=%C3%81'
871 result = urllib.parse.urlencode(given)
872 self.assertEqual(expect, result)
873
874 # Latin-1 encoding.
875 given = (('\u00a0', '\u00c1'),)
876 expect = '%A0=%C1'
877 result = urllib.parse.urlencode(given, encoding="latin-1")
878 self.assertEqual(expect, result)
879
880 def test_urlencode_encoding_doseq(self):
881 # ASCII Encoding. Expect %3F with errors="replace'
882 given = (('\u00a0', '\u00c1'),)
883 expect = '%3F=%3F'
884 result = urllib.parse.urlencode(given, doseq=True,
885 encoding="ASCII", errors="replace")
886 self.assertEqual(expect, result)
887
888 # ASCII Encoding. On a sequence of values.
889 given = (("\u00a0", (1, "\u00c1")),)
890 expect = '%3F=1&%3F=%3F'
891 result = urllib.parse.urlencode(given, True,
892 encoding="ASCII", errors="replace")
893 self.assertEqual(expect, result)
894
895 # Utf-8
896 given = (("\u00a0", "\u00c1"),)
897 expect = '%C2%A0=%C3%81'
898 result = urllib.parse.urlencode(given, True)
899 self.assertEqual(expect, result)
900
901 given = (("\u00a0", (42, "\u00c1")),)
902 expect = '%C2%A0=42&%C2%A0=%C3%81'
903 result = urllib.parse.urlencode(given, True)
904 self.assertEqual(expect, result)
905
906 # latin-1
907 given = (("\u00a0", "\u00c1"),)
908 expect = '%A0=%C1'
909 result = urllib.parse.urlencode(given, True, encoding="latin-1")
910 self.assertEqual(expect, result)
911
912 given = (("\u00a0", (42, "\u00c1")),)
913 expect = '%A0=42&%A0=%C1'
914 result = urllib.parse.urlencode(given, True, encoding="latin-1")
915 self.assertEqual(expect, result)
916
917 def test_urlencode_bytes(self):
918 given = ((b'\xa0\x24', b'\xc1\x24'),)
919 expect = '%A0%24=%C1%24'
920 result = urllib.parse.urlencode(given)
921 self.assertEqual(expect, result)
922 result = urllib.parse.urlencode(given, True)
923 self.assertEqual(expect, result)
924
925 # Sequence of values
926 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
927 expect = '%A0%24=42&%A0%24=%C1%24'
928 result = urllib.parse.urlencode(given, True)
929 self.assertEqual(expect, result)
930
931 def test_urlencode_encoding_safe_parameter(self):
932
933 # Send '$' (\x24) as safe character
934 # Default utf-8 encoding
935
936 given = ((b'\xa0\x24', b'\xc1\x24'),)
937 result = urllib.parse.urlencode(given, safe=":$")
938 expect = '%A0$=%C1$'
939 self.assertEqual(expect, result)
940
941 given = ((b'\xa0\x24', b'\xc1\x24'),)
942 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
943 expect = '%A0$=%C1$'
944 self.assertEqual(expect, result)
945
946 # Safe parameter in sequence
947 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
948 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
949 result = urllib.parse.urlencode(given, True, safe=":$")
950 self.assertEqual(expect, result)
951
952 # Test all above in latin-1 encoding
953
954 given = ((b'\xa0\x24', b'\xc1\x24'),)
955 result = urllib.parse.urlencode(given, safe=":$",
956 encoding="latin-1")
957 expect = '%A0$=%C1$'
958 self.assertEqual(expect, result)
959
960 given = ((b'\xa0\x24', b'\xc1\x24'),)
961 expect = '%A0$=%C1$'
962 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
963 encoding="latin-1")
964
965 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
966 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
967 result = urllib.parse.urlencode(given, True, safe=":$",
968 encoding="latin-1")
969 self.assertEqual(expect, result)
970
Brett Cannon74bfd702003-04-25 09:39:47 +0000971class Pathname_Tests(unittest.TestCase):
972 """Test pathname2url() and url2pathname()"""
973
974 def test_basic(self):
975 # Make sure simple tests pass
976 expected_path = os.path.join("parts", "of", "a", "path")
977 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000978 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000979 self.assertEqual(expected_url, result,
980 "pathname2url() failed; %s != %s" %
981 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000982 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000983 self.assertEqual(expected_path, result,
984 "url2pathame() failed; %s != %s" %
985 (result, expected_path))
986
987 def test_quoting(self):
988 # Test automatic quoting and unquoting works for pathnam2url() and
989 # url2pathname() respectively
990 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000991 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
992 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000993 self.assertEqual(expect, result,
994 "pathname2url() failed; %s != %s" %
995 (expect, result))
996 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000997 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000998 self.assertEqual(expect, result,
999 "url2pathname() failed; %s != %s" %
1000 (expect, result))
1001 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001002 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1003 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001004 self.assertEqual(expect, result,
1005 "pathname2url() failed; %s != %s" %
1006 (expect, result))
1007 given = "make+sure/using_unquote"
1008 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001009 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001010 self.assertEqual(expect, result,
1011 "url2pathname() failed; %s != %s" %
1012 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001013
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001014class Utility_Tests(unittest.TestCase):
1015 """Testcase to test the various utility functions in the urllib."""
1016
1017 def test_splitpasswd(self):
1018 """Some of password examples are not sensible, but it is added to
1019 confirming to RFC2617 and addressing issue4675.
1020 """
1021 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1022 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1023 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1024 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1025 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1026 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1027 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1028
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001029
1030class URLopener_Tests(unittest.TestCase):
1031 """Testcase to test the open method of URLopener class."""
1032
1033 def test_quoted_open(self):
1034 class DummyURLopener(urllib.request.URLopener):
1035 def open_spam(self, url):
1036 return url
1037
1038 self.assertEqual(DummyURLopener().open(
1039 'spam://example/ /'),'//example/%20/')
1040
Senthil Kumaran734f0592010-02-20 22:19:04 +00001041 # test the safe characters are not quoted by urlopen
1042 self.assertEqual(DummyURLopener().open(
1043 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1044 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1045
Guido van Rossume7ba4952007-06-06 23:52:48 +00001046# Just commented them out.
1047# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001048# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001049# fail in one of the tests, sometimes in other. I have a linux, and
1050# the tests go ok.
1051# If anybody has one of the problematic enviroments, please help!
1052# . Facundo
1053#
1054# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001055# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001056# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1057# serv.settimeout(3)
1058# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1059# serv.bind(("", 9093))
1060# serv.listen(5)
1061# try:
1062# conn, addr = serv.accept()
1063# conn.send("1 Hola mundo\n")
1064# cantdata = 0
1065# while cantdata < 13:
1066# data = conn.recv(13-cantdata)
1067# cantdata += len(data)
1068# time.sleep(.3)
1069# conn.send("2 No more lines\n")
1070# conn.close()
1071# except socket.timeout:
1072# pass
1073# finally:
1074# serv.close()
1075# evt.set()
1076#
1077# class FTPWrapperTests(unittest.TestCase):
1078#
1079# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001080# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001081# ftplib.FTP.port = 9093
1082# self.evt = threading.Event()
1083# threading.Thread(target=server, args=(self.evt,)).start()
1084# time.sleep(.1)
1085#
1086# def tearDown(self):
1087# self.evt.wait()
1088#
1089# def testBasic(self):
1090# # connects
1091# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001092# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001093#
1094# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001095# # global default timeout is ignored
1096# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001097# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001098# socket.setdefaulttimeout(30)
1099# try:
1100# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1101# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001102# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001103# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001104# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001105#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001106# def testTimeoutDefault(self):
1107# # global default timeout is used
1108# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001109# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001110# socket.setdefaulttimeout(30)
1111# try:
1112# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1113# finally:
1114# socket.setdefaulttimeout(None)
1115# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1116# ftp.close()
1117#
1118# def testTimeoutValue(self):
1119# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1120# timeout=30)
1121# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1122# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001123
Skip Montanaro080c9972001-01-28 21:12:22 +00001124
1125
Brett Cannon74bfd702003-04-25 09:39:47 +00001126def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001127 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001128 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001129 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001130 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001131 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001132 QuotingTests,
1133 UnquotingTests,
1134 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001135 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001136 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001137 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001138 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001139 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001140
1141
1142
1143if __name__ == '__main__':
1144 test_main()