blob: 275b2eb81e90e3e003a4ae88cc8f5cda959f9833 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Georg Brandl5a650a22005-08-26 08:51:34 +000012import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000013
Brett Cannon74bfd702003-04-25 09:39:47 +000014def hexescape(char):
15 """Escape char as RFC 2396 specifies"""
16 hex_repr = hex(ord(char))[2:].upper()
17 if len(hex_repr) == 1:
18 hex_repr = "0%s" % hex_repr
19 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Jeremy Hylton1afc1692008-06-18 20:49:58 +000021# Shortcut for testing FancyURLopener
22_urlopener = None
23def urlopen(url, data=None, proxies=None):
24 """urlopen(url [, data]) -> open file-like object"""
25 global _urlopener
26 if proxies is not None:
27 opener = urllib.request.FancyURLopener(proxies=proxies)
28 elif not _urlopener:
29 opener = urllib.request.FancyURLopener()
30 _urlopener = opener
31 else:
32 opener = _urlopener
33 if data is None:
34 return opener.open(url)
35 else:
36 return opener.open(url, data)
37
Brett Cannon74bfd702003-04-25 09:39:47 +000038class urlopen_FileTests(unittest.TestCase):
39 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000040
Brett Cannon74bfd702003-04-25 09:39:47 +000041 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000042 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000043
Brett Cannon74bfd702003-04-25 09:39:47 +000044 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000045
Brett Cannon74bfd702003-04-25 09:39:47 +000046 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000047 # Create a temp file to use for testing
48 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
49 "ascii")
50 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000051 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000052 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000053 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000054 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000055 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000056 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000057
Brett Cannon74bfd702003-04-25 09:39:47 +000058 def tearDown(self):
59 """Shut down the open object"""
60 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000061 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000062
Brett Cannon74bfd702003-04-25 09:39:47 +000063 def test_interface(self):
64 # Make sure object returned by urlopen() has the specified methods
65 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000066 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000067 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000068 "object returned by urlopen() lacks %s attribute" %
69 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000070
Brett Cannon74bfd702003-04-25 09:39:47 +000071 def test_read(self):
72 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000073
Brett Cannon74bfd702003-04-25 09:39:47 +000074 def test_readline(self):
75 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000076 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000077 "calling readline() after exhausting the file did not"
78 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000079
Brett Cannon74bfd702003-04-25 09:39:47 +000080 def test_readlines(self):
81 lines_list = self.returned_obj.readlines()
82 self.assertEqual(len(lines_list), 1,
83 "readlines() returned the wrong number of lines")
84 self.assertEqual(lines_list[0], self.text,
85 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000086
Brett Cannon74bfd702003-04-25 09:39:47 +000087 def test_fileno(self):
88 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000089 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000090 self.assertEqual(os.read(file_num, len(self.text)), self.text,
91 "Reading on the file descriptor returned by fileno() "
92 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000093
Brett Cannon74bfd702003-04-25 09:39:47 +000094 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080095 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000096 # by the tearDown() method for the test
97 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000098
Brett Cannon74bfd702003-04-25 09:39:47 +000099 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000100 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000101
Brett Cannon74bfd702003-04-25 09:39:47 +0000102 def test_geturl(self):
103 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000104
Christian Heimes9bd667a2008-01-20 15:14:11 +0000105 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000106 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000107
Brett Cannon74bfd702003-04-25 09:39:47 +0000108 def test_iter(self):
109 # Test iterator
110 # Don't need to count number of iterations since test would fail the
111 # instant it returned anything beyond the first line from the
112 # comparison
113 for line in self.returned_obj.__iter__():
114 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000115
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000116class ProxyTests(unittest.TestCase):
117
118 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000119 # Records changes to env vars
120 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000121 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000122 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000123 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000124 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000125
126 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000127 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000128 self.env.__exit__()
129 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000130
131 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000132 self.env.set('NO_PROXY', 'localhost')
133 proxies = urllib.request.getproxies_environment()
134 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000135 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000136
137
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000138class urlopen_HttpTests(unittest.TestCase):
139 """Test urlopen() opening a fake http connection."""
140
141 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000142 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000143 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000144 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000145 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000146 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000147 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000148 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000149 if self.closed: return b""
150 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000151 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000152 if self.closed: return b""
153 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000154 def close(self):
155 self.io_refs -= 1
156 if self.io_refs == 0:
157 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000158 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000159 def connect(self):
160 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000161 self._connection_class = http.client.HTTPConnection
162 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000163
164 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000165 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000166
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000167 def check_read(self, ver):
168 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000169 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000170 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000171 self.assertEqual(fp.readline(), b"Hello!")
172 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000173 self.assertEqual(fp.geturl(), 'http://python.org/')
174 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000175 finally:
176 self.unfakehttp()
177
Senthil Kumaran26430412011-04-13 07:01:19 +0800178 def test_url_fragment(self):
179 # Issue #11703: geturl() omits fragments in the original URL.
180 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800181 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800182 try:
183 fp = urllib.request.urlopen(url)
184 self.assertEqual(fp.geturl(), url)
185 finally:
186 self.unfakehttp()
187
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800188 def test_willclose(self):
189 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800190 try:
191 resp = urlopen("http://www.python.org")
192 self.assertTrue(resp.fp.will_close)
193 finally:
194 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800195
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000196 def test_read_0_9(self):
197 # "0.9" response accepted (but not "simple responses" without
198 # a status line)
199 self.check_read(b"0.9")
200
201 def test_read_1_0(self):
202 self.check_read(b"1.0")
203
204 def test_read_1_1(self):
205 self.check_read(b"1.1")
206
Christian Heimes57dddfb2008-01-02 18:30:52 +0000207 def test_read_bogus(self):
208 # urlopen() should raise IOError for many error codes.
209 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
210Date: Wed, 02 Jan 2008 03:03:54 GMT
211Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
212Connection: close
213Content-Type: text/html; charset=iso-8859-1
214''')
215 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000216 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000217 finally:
218 self.unfakehttp()
219
guido@google.coma119df92011-03-29 11:41:02 -0700220 def test_invalid_redirect(self):
221 # urlopen() should raise IOError for many error codes.
222 self.fakehttp(b'''HTTP/1.1 302 Found
223Date: Wed, 02 Jan 2008 03:03:54 GMT
224Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
225Location: file://guidocomputer.athome.com:/python/license
226Connection: close
227Content-Type: text/html; charset=iso-8859-1
228''')
229 try:
230 self.assertRaises(urllib.error.HTTPError, urlopen,
231 "http://python.org/")
232 finally:
233 self.unfakehttp()
234
Guido van Rossumd8faa362007-04-27 19:54:29 +0000235 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000236 # urlopen() raises IOError if the underlying socket does not send any
237 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000238 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000239 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000240 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000241 finally:
242 self.unfakehttp()
243
Senthil Kumarande0eb242010-08-01 17:53:37 +0000244 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000245 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000246 try:
247 fp = urlopen("http://user:pass@python.org/")
248 self.assertEqual(fp.readline(), b"Hello!")
249 self.assertEqual(fp.readline(), b"")
250 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
251 self.assertEqual(fp.getcode(), 200)
252 finally:
253 self.unfakehttp()
254
Brett Cannon19691362003-04-29 05:08:06 +0000255class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000256 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000257
Brett Cannon19691362003-04-29 05:08:06 +0000258 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000259 # Create a list of temporary files. Each item in the list is a file
260 # name (absolute path or relative to the current working directory).
261 # All files in this list will be deleted in the tearDown method. Note,
262 # this only helps to makes sure temporary files get deleted, but it
263 # does nothing about trying to close files that may still be open. It
264 # is the responsibility of the developer to properly close files even
265 # when exceptional conditions occur.
266 self.tempFiles = []
267
Brett Cannon19691362003-04-29 05:08:06 +0000268 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000269 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000270 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000271 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000272 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000273 FILE.write(self.text)
274 FILE.close()
275 finally:
276 try: FILE.close()
277 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000278
279 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000280 # Delete the temporary files.
281 for each in self.tempFiles:
282 try: os.remove(each)
283 except: pass
284
285 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000286 filePath = os.path.abspath(filePath)
287 try:
288 filePath.encode("utf8")
289 except UnicodeEncodeError:
290 raise unittest.SkipTest("filePath is not encodable to utf8")
291 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000292
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000293 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000294 """Creates a new temporary file containing the specified data,
295 registers the file for deletion during the test fixture tear down, and
296 returns the absolute path of the file."""
297
298 newFd, newFilePath = tempfile.mkstemp()
299 try:
300 self.registerFileForCleanUp(newFilePath)
301 newFile = os.fdopen(newFd, "wb")
302 newFile.write(data)
303 newFile.close()
304 finally:
305 try: newFile.close()
306 except: pass
307 return newFilePath
308
309 def registerFileForCleanUp(self, fileName):
310 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000311
312 def test_basic(self):
313 # Make sure that a local file just gets its own location returned and
314 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000315 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000316 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000317 self.assertIsInstance(result[1], email.message.Message,
318 "did not get a email.message.Message instance "
319 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000320
321 def test_copy(self):
322 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000323 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000324 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000325 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000326 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000327 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000328 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000329 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000330 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000331 try:
332 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000333 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000334 finally:
335 try: FILE.close()
336 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000337 self.assertEqual(self.text, text)
338
339 def test_reporthook(self):
340 # Make sure that the reporthook works.
341 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000342 self.assertIsInstance(count, int)
343 self.assertIsInstance(block_size, int)
344 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000345 self.assertEqual(count, count_holder[0])
346 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000347 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000348 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 urllib.request.urlretrieve(
350 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000351 second_temp, hooktester)
352
353 def test_reporthook_0_bytes(self):
354 # Test on zero length file. Should call reporthook only 1 time.
355 report = []
356 def hooktester(count, block_size, total_size, _report=report):
357 _report.append((count, block_size, total_size))
358 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000359 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000360 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000361 self.assertEqual(len(report), 1)
362 self.assertEqual(report[0][2], 0)
363
364 def test_reporthook_5_bytes(self):
365 # Test on 5 byte file. Should call reporthook only 2 times (once when
366 # the "network connection" is established and once when the block is
367 # read). Since the block size is 8192 bytes, only one block read is
368 # required to read the entire file.
369 report = []
370 def hooktester(count, block_size, total_size, _report=report):
371 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000372 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000373 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000374 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000375 self.assertEqual(len(report), 2)
376 self.assertEqual(report[0][1], 8192)
377 self.assertEqual(report[0][2], 5)
378
379 def test_reporthook_8193_bytes(self):
380 # Test on 8193 byte file. Should call reporthook only 3 times (once
381 # when the "network connection" is established, once for the next 8192
382 # bytes, and once for the last byte).
383 report = []
384 def hooktester(count, block_size, total_size, _report=report):
385 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000386 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000387 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000388 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000389 self.assertEqual(len(report), 3)
390 self.assertEqual(report[0][1], 8192)
391 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000392
Brett Cannon74bfd702003-04-25 09:39:47 +0000393class QuotingTests(unittest.TestCase):
394 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000395
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000396 According to RFC 2396 (Uniform Resource Identifiers), to escape a
397 character you write it as '%' + <2 character US-ASCII hex value>.
398 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
399 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000400
401 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000402
Brett Cannon74bfd702003-04-25 09:39:47 +0000403 Reserved characters : ";/?:@&=+$,"
404 Have special meaning in URIs and must be escaped if not being used for
405 their special meaning
406 Data characters : letters, digits, and "-_.!~*'()"
407 Unreserved and do not need to be escaped; can be, though, if desired
408 Control characters : 0x00 - 0x1F, 0x7F
409 Have no use in URIs so must be escaped
410 space : 0x20
411 Must be escaped
412 Delimiters : '<>#%"'
413 Must be escaped
414 Unwise : "{}|\^[]`"
415 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000416
Brett Cannon74bfd702003-04-25 09:39:47 +0000417 """
418
419 def test_never_quote(self):
420 # Make sure quote() does not quote letters, digits, and "_,.-"
421 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
422 "abcdefghijklmnopqrstuvwxyz",
423 "0123456789",
424 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000425 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000426 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000427 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000428 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000429 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000430 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000431
432 def test_default_safe(self):
433 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000434 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000435
436 def test_safe(self):
437 # Test setting 'safe' parameter does what it should do
438 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000439 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000440 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000441 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000442 result = urllib.parse.quote_plus(quote_by_default,
443 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000444 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000445 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000446 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000447 # Safe expressed as bytes rather than str
448 result = urllib.parse.quote(quote_by_default, safe=b"<>")
449 self.assertEqual(quote_by_default, result,
450 "using quote(): %r != %r" % (quote_by_default, result))
451 # "Safe" non-ASCII characters should have no effect
452 # (Since URIs are not allowed to have non-ASCII characters)
453 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
454 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
455 self.assertEqual(expect, result,
456 "using quote(): %r != %r" %
457 (expect, result))
458 # Same as above, but using a bytes rather than str
459 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
460 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
461 self.assertEqual(expect, result,
462 "using quote(): %r != %r" %
463 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000464
465 def test_default_quoting(self):
466 # Make sure all characters that should be quoted are by default sans
467 # space (separate test for that).
468 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
469 should_quote.append('<>#%"{}|\^[]`')
470 should_quote.append(chr(127)) # For 0x7F
471 should_quote = ''.join(should_quote)
472 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000473 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000474 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000475 "using quote(): "
476 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000477 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000478 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000479 self.assertEqual(hexescape(char), result,
480 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000481 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000482 (char, hexescape(char), result))
483 del should_quote
484 partial_quote = "ab[]cd"
485 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000487 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000488 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000489 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000490 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000491
492 def test_quoting_space(self):
493 # Make sure quote() and quote_plus() handle spaces as specified in
494 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000495 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000496 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000497 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000498 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000499 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000500 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000501 given = "a b cd e f"
502 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000503 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000504 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000505 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000506 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000507 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000508 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000509 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000510
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000511 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000512 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000513 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000514 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000515 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000516 # Test with bytes
517 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
518 'alpha%2Bbeta+gamma')
519 # Test with safe bytes
520 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
521 'alpha+beta+gamma')
522
523 def test_quote_bytes(self):
524 # Bytes should quote directly to percent-encoded values
525 given = b"\xa2\xd8ab\xff"
526 expect = "%A2%D8ab%FF"
527 result = urllib.parse.quote(given)
528 self.assertEqual(expect, result,
529 "using quote(): %r != %r" % (expect, result))
530 # Encoding argument should raise type error on bytes input
531 self.assertRaises(TypeError, urllib.parse.quote, given,
532 encoding="latin-1")
533 # quote_from_bytes should work the same
534 result = urllib.parse.quote_from_bytes(given)
535 self.assertEqual(expect, result,
536 "using quote_from_bytes(): %r != %r"
537 % (expect, result))
538
539 def test_quote_with_unicode(self):
540 # Characters in Latin-1 range, encoded by default in UTF-8
541 given = "\xa2\xd8ab\xff"
542 expect = "%C2%A2%C3%98ab%C3%BF"
543 result = urllib.parse.quote(given)
544 self.assertEqual(expect, result,
545 "using quote(): %r != %r" % (expect, result))
546 # Characters in Latin-1 range, encoded by with None (default)
547 result = urllib.parse.quote(given, encoding=None, errors=None)
548 self.assertEqual(expect, result,
549 "using quote(): %r != %r" % (expect, result))
550 # Characters in Latin-1 range, encoded with Latin-1
551 given = "\xa2\xd8ab\xff"
552 expect = "%A2%D8ab%FF"
553 result = urllib.parse.quote(given, encoding="latin-1")
554 self.assertEqual(expect, result,
555 "using quote(): %r != %r" % (expect, result))
556 # Characters in BMP, encoded by default in UTF-8
557 given = "\u6f22\u5b57" # "Kanji"
558 expect = "%E6%BC%A2%E5%AD%97"
559 result = urllib.parse.quote(given)
560 self.assertEqual(expect, result,
561 "using quote(): %r != %r" % (expect, result))
562 # Characters in BMP, encoded with Latin-1
563 given = "\u6f22\u5b57"
564 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
565 encoding="latin-1")
566 # Characters in BMP, encoded with Latin-1, with replace error handling
567 given = "\u6f22\u5b57"
568 expect = "%3F%3F" # "??"
569 result = urllib.parse.quote(given, encoding="latin-1",
570 errors="replace")
571 self.assertEqual(expect, result,
572 "using quote(): %r != %r" % (expect, result))
573 # Characters in BMP, Latin-1, with xmlcharref error handling
574 given = "\u6f22\u5b57"
575 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
576 result = urllib.parse.quote(given, encoding="latin-1",
577 errors="xmlcharrefreplace")
578 self.assertEqual(expect, result,
579 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000580
Georg Brandlfaf41492009-05-26 18:31:11 +0000581 def test_quote_plus_with_unicode(self):
582 # Encoding (latin-1) test for quote_plus
583 given = "\xa2\xd8 \xff"
584 expect = "%A2%D8+%FF"
585 result = urllib.parse.quote_plus(given, encoding="latin-1")
586 self.assertEqual(expect, result,
587 "using quote_plus(): %r != %r" % (expect, result))
588 # Errors test for quote_plus
589 given = "ab\u6f22\u5b57 cd"
590 expect = "ab%3F%3F+cd"
591 result = urllib.parse.quote_plus(given, encoding="latin-1",
592 errors="replace")
593 self.assertEqual(expect, result,
594 "using quote_plus(): %r != %r" % (expect, result))
595
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000596
Brett Cannon74bfd702003-04-25 09:39:47 +0000597class UnquotingTests(unittest.TestCase):
598 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000599
Brett Cannon74bfd702003-04-25 09:39:47 +0000600 See the doc string for quoting_Tests for details on quoting and such.
601
602 """
603
604 def test_unquoting(self):
605 # Make sure unquoting of all ASCII values works
606 escape_list = []
607 for num in range(128):
608 given = hexescape(chr(num))
609 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000610 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000611 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000612 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000613 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000614 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000615 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000616 (expect, result))
617 escape_list.append(given)
618 escape_string = ''.join(escape_list)
619 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000620 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000621 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000622 "using unquote(): not all characters escaped: "
623 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000624 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
625 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000626 with support.check_warnings(('', BytesWarning), quiet=True):
627 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000628
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000629 def test_unquoting_badpercent(self):
630 # Test unquoting on bad percent-escapes
631 given = '%xab'
632 expect = given
633 result = urllib.parse.unquote(given)
634 self.assertEqual(expect, result, "using unquote(): %r != %r"
635 % (expect, result))
636 given = '%x'
637 expect = given
638 result = urllib.parse.unquote(given)
639 self.assertEqual(expect, result, "using unquote(): %r != %r"
640 % (expect, result))
641 given = '%'
642 expect = given
643 result = urllib.parse.unquote(given)
644 self.assertEqual(expect, result, "using unquote(): %r != %r"
645 % (expect, result))
646 # unquote_to_bytes
647 given = '%xab'
648 expect = bytes(given, 'ascii')
649 result = urllib.parse.unquote_to_bytes(given)
650 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
651 % (expect, result))
652 given = '%x'
653 expect = bytes(given, 'ascii')
654 result = urllib.parse.unquote_to_bytes(given)
655 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
656 % (expect, result))
657 given = '%'
658 expect = bytes(given, 'ascii')
659 result = urllib.parse.unquote_to_bytes(given)
660 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
661 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000662 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
663 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000664
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000665 def test_unquoting_mixed_case(self):
666 # Test unquoting on mixed-case hex digits in the percent-escapes
667 given = '%Ab%eA'
668 expect = b'\xab\xea'
669 result = urllib.parse.unquote_to_bytes(given)
670 self.assertEqual(expect, result,
671 "using unquote_to_bytes(): %r != %r"
672 % (expect, result))
673
Brett Cannon74bfd702003-04-25 09:39:47 +0000674 def test_unquoting_parts(self):
675 # Make sure unquoting works when have non-quoted characters
676 # interspersed
677 given = 'ab%sd' % hexescape('c')
678 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000680 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000681 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000683 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000684 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000685
Brett Cannon74bfd702003-04-25 09:39:47 +0000686 def test_unquoting_plus(self):
687 # Test difference between unquote() and unquote_plus()
688 given = "are+there+spaces..."
689 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000690 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000691 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000692 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000693 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000694 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000695 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000696 "using unquote_plus(): %r != %r" % (expect, result))
697
698 def test_unquote_to_bytes(self):
699 given = 'br%C3%BCckner_sapporo_20050930.doc'
700 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
701 result = urllib.parse.unquote_to_bytes(given)
702 self.assertEqual(expect, result,
703 "using unquote_to_bytes(): %r != %r"
704 % (expect, result))
705 # Test on a string with unescaped non-ASCII characters
706 # (Technically an invalid URI; expect those characters to be UTF-8
707 # encoded).
708 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
709 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
710 self.assertEqual(expect, result,
711 "using unquote_to_bytes(): %r != %r"
712 % (expect, result))
713 # Test with a bytes as input
714 given = b'%A2%D8ab%FF'
715 expect = b'\xa2\xd8ab\xff'
716 result = urllib.parse.unquote_to_bytes(given)
717 self.assertEqual(expect, result,
718 "using unquote_to_bytes(): %r != %r"
719 % (expect, result))
720 # Test with a bytes as input, with unescaped non-ASCII bytes
721 # (Technically an invalid URI; expect those bytes to be preserved)
722 given = b'%A2\xd8ab%FF'
723 expect = b'\xa2\xd8ab\xff'
724 result = urllib.parse.unquote_to_bytes(given)
725 self.assertEqual(expect, result,
726 "using unquote_to_bytes(): %r != %r"
727 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000728
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000729 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000730 # Characters in the Latin-1 range, encoded with UTF-8
731 given = 'br%C3%BCckner_sapporo_20050930.doc'
732 expect = 'br\u00fcckner_sapporo_20050930.doc'
733 result = urllib.parse.unquote(given)
734 self.assertEqual(expect, result,
735 "using unquote(): %r != %r" % (expect, result))
736 # Characters in the Latin-1 range, encoded with None (default)
737 result = urllib.parse.unquote(given, encoding=None, errors=None)
738 self.assertEqual(expect, result,
739 "using unquote(): %r != %r" % (expect, result))
740
741 # Characters in the Latin-1 range, encoded with Latin-1
742 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
743 encoding="latin-1")
744 expect = 'br\u00fcckner_sapporo_20050930.doc'
745 self.assertEqual(expect, result,
746 "using unquote(): %r != %r" % (expect, result))
747
748 # Characters in BMP, encoded with UTF-8
749 given = "%E6%BC%A2%E5%AD%97"
750 expect = "\u6f22\u5b57" # "Kanji"
751 result = urllib.parse.unquote(given)
752 self.assertEqual(expect, result,
753 "using unquote(): %r != %r" % (expect, result))
754
755 # Decode with UTF-8, invalid sequence
756 given = "%F3%B1"
757 expect = "\ufffd" # Replacement character
758 result = urllib.parse.unquote(given)
759 self.assertEqual(expect, result,
760 "using unquote(): %r != %r" % (expect, result))
761
762 # Decode with UTF-8, invalid sequence, replace errors
763 result = urllib.parse.unquote(given, errors="replace")
764 self.assertEqual(expect, result,
765 "using unquote(): %r != %r" % (expect, result))
766
767 # Decode with UTF-8, invalid sequence, ignoring errors
768 given = "%F3%B1"
769 expect = ""
770 result = urllib.parse.unquote(given, errors="ignore")
771 self.assertEqual(expect, result,
772 "using unquote(): %r != %r" % (expect, result))
773
774 # A mix of non-ASCII and percent-encoded characters, UTF-8
775 result = urllib.parse.unquote("\u6f22%C3%BC")
776 expect = '\u6f22\u00fc'
777 self.assertEqual(expect, result,
778 "using unquote(): %r != %r" % (expect, result))
779
780 # A mix of non-ASCII and percent-encoded characters, Latin-1
781 # (Note, the string contains non-Latin-1-representable characters)
782 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
783 expect = '\u6f22\u00fc'
784 self.assertEqual(expect, result,
785 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000786
Brett Cannon74bfd702003-04-25 09:39:47 +0000787class urlencode_Tests(unittest.TestCase):
788 """Tests for urlencode()"""
789
790 def help_inputtype(self, given, test_type):
791 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000792
Brett Cannon74bfd702003-04-25 09:39:47 +0000793 'given' must lead to only the pairs:
794 * 1st, 1
795 * 2nd, 2
796 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000797
Brett Cannon74bfd702003-04-25 09:39:47 +0000798 Test cannot assume anything about order. Docs make no guarantee and
799 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000800
Brett Cannon74bfd702003-04-25 09:39:47 +0000801 """
802 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000804 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000805 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000806 "testing %s: %s not found in %s" %
807 (test_type, expected, result))
808 self.assertEqual(result.count('&'), 2,
809 "testing %s: expected 2 '&'s; got %s" %
810 (test_type, result.count('&')))
811 amp_location = result.index('&')
812 on_amp_left = result[amp_location - 1]
813 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000814 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000815 "testing %s: '&' not located in proper place in %s" %
816 (test_type, result))
817 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
818 "testing %s: "
819 "unexpected number of characters: %s != %s" %
820 (test_type, len(result), (5 * 3) + 2))
821
822 def test_using_mapping(self):
823 # Test passing in a mapping object as an argument.
824 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
825 "using dict as input type")
826
827 def test_using_sequence(self):
828 # Test passing in a sequence of two-item sequences as an argument.
829 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
830 "using sequence of two-item tuples as input")
831
832 def test_quoting(self):
833 # Make sure keys and values are quoted using quote_plus()
834 given = {"&":"="}
835 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000836 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000837 self.assertEqual(expect, result)
838 given = {"key name":"A bunch of pluses"}
839 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000840 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000841 self.assertEqual(expect, result)
842
843 def test_doseq(self):
844 # Test that passing True for 'doseq' parameter works correctly
845 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000846 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
847 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000848 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000849 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000850 for value in given["sequence"]:
851 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000852 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000853 self.assertEqual(result.count('&'), 2,
854 "Expected 2 '&'s, got %s" % result.count('&'))
855
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000856 def test_empty_sequence(self):
857 self.assertEqual("", urllib.parse.urlencode({}))
858 self.assertEqual("", urllib.parse.urlencode([]))
859
860 def test_nonstring_values(self):
861 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
862 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
863
864 def test_nonstring_seq_values(self):
865 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
866 self.assertEqual("a=None&a=a",
867 urllib.parse.urlencode({"a": [None, "a"]}, True))
868 self.assertEqual("a=a&a=b",
869 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
870
Senthil Kumarandf022da2010-07-03 17:48:22 +0000871 def test_urlencode_encoding(self):
872 # ASCII encoding. Expect %3F with errors="replace'
873 given = (('\u00a0', '\u00c1'),)
874 expect = '%3F=%3F'
875 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
876 self.assertEqual(expect, result)
877
878 # Default is UTF-8 encoding.
879 given = (('\u00a0', '\u00c1'),)
880 expect = '%C2%A0=%C3%81'
881 result = urllib.parse.urlencode(given)
882 self.assertEqual(expect, result)
883
884 # Latin-1 encoding.
885 given = (('\u00a0', '\u00c1'),)
886 expect = '%A0=%C1'
887 result = urllib.parse.urlencode(given, encoding="latin-1")
888 self.assertEqual(expect, result)
889
890 def test_urlencode_encoding_doseq(self):
891 # ASCII Encoding. Expect %3F with errors="replace'
892 given = (('\u00a0', '\u00c1'),)
893 expect = '%3F=%3F'
894 result = urllib.parse.urlencode(given, doseq=True,
895 encoding="ASCII", errors="replace")
896 self.assertEqual(expect, result)
897
898 # ASCII Encoding. On a sequence of values.
899 given = (("\u00a0", (1, "\u00c1")),)
900 expect = '%3F=1&%3F=%3F'
901 result = urllib.parse.urlencode(given, True,
902 encoding="ASCII", errors="replace")
903 self.assertEqual(expect, result)
904
905 # Utf-8
906 given = (("\u00a0", "\u00c1"),)
907 expect = '%C2%A0=%C3%81'
908 result = urllib.parse.urlencode(given, True)
909 self.assertEqual(expect, result)
910
911 given = (("\u00a0", (42, "\u00c1")),)
912 expect = '%C2%A0=42&%C2%A0=%C3%81'
913 result = urllib.parse.urlencode(given, True)
914 self.assertEqual(expect, result)
915
916 # latin-1
917 given = (("\u00a0", "\u00c1"),)
918 expect = '%A0=%C1'
919 result = urllib.parse.urlencode(given, True, encoding="latin-1")
920 self.assertEqual(expect, result)
921
922 given = (("\u00a0", (42, "\u00c1")),)
923 expect = '%A0=42&%A0=%C1'
924 result = urllib.parse.urlencode(given, True, encoding="latin-1")
925 self.assertEqual(expect, result)
926
927 def test_urlencode_bytes(self):
928 given = ((b'\xa0\x24', b'\xc1\x24'),)
929 expect = '%A0%24=%C1%24'
930 result = urllib.parse.urlencode(given)
931 self.assertEqual(expect, result)
932 result = urllib.parse.urlencode(given, True)
933 self.assertEqual(expect, result)
934
935 # Sequence of values
936 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
937 expect = '%A0%24=42&%A0%24=%C1%24'
938 result = urllib.parse.urlencode(given, True)
939 self.assertEqual(expect, result)
940
941 def test_urlencode_encoding_safe_parameter(self):
942
943 # Send '$' (\x24) as safe character
944 # Default utf-8 encoding
945
946 given = ((b'\xa0\x24', b'\xc1\x24'),)
947 result = urllib.parse.urlencode(given, safe=":$")
948 expect = '%A0$=%C1$'
949 self.assertEqual(expect, result)
950
951 given = ((b'\xa0\x24', b'\xc1\x24'),)
952 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
953 expect = '%A0$=%C1$'
954 self.assertEqual(expect, result)
955
956 # Safe parameter in sequence
957 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
958 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
959 result = urllib.parse.urlencode(given, True, safe=":$")
960 self.assertEqual(expect, result)
961
962 # Test all above in latin-1 encoding
963
964 given = ((b'\xa0\x24', b'\xc1\x24'),)
965 result = urllib.parse.urlencode(given, safe=":$",
966 encoding="latin-1")
967 expect = '%A0$=%C1$'
968 self.assertEqual(expect, result)
969
970 given = ((b'\xa0\x24', b'\xc1\x24'),)
971 expect = '%A0$=%C1$'
972 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
973 encoding="latin-1")
974
975 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
976 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
977 result = urllib.parse.urlencode(given, True, safe=":$",
978 encoding="latin-1")
979 self.assertEqual(expect, result)
980
Brett Cannon74bfd702003-04-25 09:39:47 +0000981class Pathname_Tests(unittest.TestCase):
982 """Test pathname2url() and url2pathname()"""
983
984 def test_basic(self):
985 # Make sure simple tests pass
986 expected_path = os.path.join("parts", "of", "a", "path")
987 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000988 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000989 self.assertEqual(expected_url, result,
990 "pathname2url() failed; %s != %s" %
991 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000992 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000993 self.assertEqual(expected_path, result,
994 "url2pathame() failed; %s != %s" %
995 (result, expected_path))
996
997 def test_quoting(self):
998 # Test automatic quoting and unquoting works for pathnam2url() and
999 # url2pathname() respectively
1000 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001001 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1002 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001003 self.assertEqual(expect, result,
1004 "pathname2url() failed; %s != %s" %
1005 (expect, result))
1006 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001007 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001008 self.assertEqual(expect, result,
1009 "url2pathname() failed; %s != %s" %
1010 (expect, result))
1011 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001012 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1013 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001014 self.assertEqual(expect, result,
1015 "pathname2url() failed; %s != %s" %
1016 (expect, result))
1017 given = "make+sure/using_unquote"
1018 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001019 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001020 self.assertEqual(expect, result,
1021 "url2pathname() failed; %s != %s" %
1022 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001023
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001024class Utility_Tests(unittest.TestCase):
1025 """Testcase to test the various utility functions in the urllib."""
1026
1027 def test_splitpasswd(self):
1028 """Some of password examples are not sensible, but it is added to
1029 confirming to RFC2617 and addressing issue4675.
1030 """
1031 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1032 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1033 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1034 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1035 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1036 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1037 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1038
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001039
1040class URLopener_Tests(unittest.TestCase):
1041 """Testcase to test the open method of URLopener class."""
1042
1043 def test_quoted_open(self):
1044 class DummyURLopener(urllib.request.URLopener):
1045 def open_spam(self, url):
1046 return url
1047
1048 self.assertEqual(DummyURLopener().open(
1049 'spam://example/ /'),'//example/%20/')
1050
Senthil Kumaran734f0592010-02-20 22:19:04 +00001051 # test the safe characters are not quoted by urlopen
1052 self.assertEqual(DummyURLopener().open(
1053 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1054 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1055
Guido van Rossume7ba4952007-06-06 23:52:48 +00001056# Just commented them out.
1057# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001058# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001059# fail in one of the tests, sometimes in other. I have a linux, and
1060# the tests go ok.
1061# If anybody has one of the problematic enviroments, please help!
1062# . Facundo
1063#
1064# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001065# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001066# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1067# serv.settimeout(3)
1068# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1069# serv.bind(("", 9093))
1070# serv.listen(5)
1071# try:
1072# conn, addr = serv.accept()
1073# conn.send("1 Hola mundo\n")
1074# cantdata = 0
1075# while cantdata < 13:
1076# data = conn.recv(13-cantdata)
1077# cantdata += len(data)
1078# time.sleep(.3)
1079# conn.send("2 No more lines\n")
1080# conn.close()
1081# except socket.timeout:
1082# pass
1083# finally:
1084# serv.close()
1085# evt.set()
1086#
1087# class FTPWrapperTests(unittest.TestCase):
1088#
1089# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001090# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001091# ftplib.FTP.port = 9093
1092# self.evt = threading.Event()
1093# threading.Thread(target=server, args=(self.evt,)).start()
1094# time.sleep(.1)
1095#
1096# def tearDown(self):
1097# self.evt.wait()
1098#
1099# def testBasic(self):
1100# # connects
1101# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001102# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001103#
1104# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001105# # global default timeout is ignored
1106# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001107# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001108# socket.setdefaulttimeout(30)
1109# try:
1110# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1111# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001112# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001113# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001114# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001115#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001116# def testTimeoutDefault(self):
1117# # global default timeout is used
1118# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001119# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001120# socket.setdefaulttimeout(30)
1121# try:
1122# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1123# finally:
1124# socket.setdefaulttimeout(None)
1125# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1126# ftp.close()
1127#
1128# def testTimeoutValue(self):
1129# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1130# timeout=30)
1131# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1132# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001133
Skip Montanaro080c9972001-01-28 21:12:22 +00001134
1135
Brett Cannon74bfd702003-04-25 09:39:47 +00001136def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001137 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001138 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001139 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001140 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001141 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001142 QuotingTests,
1143 UnquotingTests,
1144 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001145 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001146 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001147 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001148 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001149 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001150
1151
1152
1153if __name__ == '__main__':
1154 test_main()