blob: 7d35f10b123d89085cc19458ae912be4ace9dd2a [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000014
Brett Cannon74bfd702003-04-25 09:39:47 +000015def hexescape(char):
16 """Escape char as RFC 2396 specifies"""
17 hex_repr = hex(ord(char))[2:].upper()
18 if len(hex_repr) == 1:
19 hex_repr = "0%s" % hex_repr
20 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000021
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022# Shortcut for testing FancyURLopener
23_urlopener = None
24def urlopen(url, data=None, proxies=None):
25 """urlopen(url [, data]) -> open file-like object"""
26 global _urlopener
27 if proxies is not None:
28 opener = urllib.request.FancyURLopener(proxies=proxies)
29 elif not _urlopener:
30 opener = urllib.request.FancyURLopener()
31 _urlopener = opener
32 else:
33 opener = _urlopener
34 if data is None:
35 return opener.open(url)
36 else:
37 return opener.open(url, data)
38
Brett Cannon74bfd702003-04-25 09:39:47 +000039class urlopen_FileTests(unittest.TestCase):
40 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000041
Brett Cannon74bfd702003-04-25 09:39:47 +000042 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000043 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000046
Brett Cannon74bfd702003-04-25 09:39:47 +000047 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000048 # Create a temp file to use for testing
49 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
50 "ascii")
51 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000052 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000054 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000056 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000058
Brett Cannon74bfd702003-04-25 09:39:47 +000059 def tearDown(self):
60 """Shut down the open object"""
61 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000062 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000063
Brett Cannon74bfd702003-04-25 09:39:47 +000064 def test_interface(self):
65 # Make sure object returned by urlopen() has the specified methods
66 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000067 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000068 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000069 "object returned by urlopen() lacks %s attribute" %
70 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000071
Brett Cannon74bfd702003-04-25 09:39:47 +000072 def test_read(self):
73 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000074
Brett Cannon74bfd702003-04-25 09:39:47 +000075 def test_readline(self):
76 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000077 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000078 "calling readline() after exhausting the file did not"
79 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000080
Brett Cannon74bfd702003-04-25 09:39:47 +000081 def test_readlines(self):
82 lines_list = self.returned_obj.readlines()
83 self.assertEqual(len(lines_list), 1,
84 "readlines() returned the wrong number of lines")
85 self.assertEqual(lines_list[0], self.text,
86 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000087
Brett Cannon74bfd702003-04-25 09:39:47 +000088 def test_fileno(self):
89 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000090 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000091 self.assertEqual(os.read(file_num, len(self.text)), self.text,
92 "Reading on the file descriptor returned by fileno() "
93 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000094
Brett Cannon74bfd702003-04-25 09:39:47 +000095 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080096 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000097 # by the tearDown() method for the test
98 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000099
Brett Cannon74bfd702003-04-25 09:39:47 +0000100 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000101 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000102
Brett Cannon74bfd702003-04-25 09:39:47 +0000103 def test_geturl(self):
104 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000105
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000107 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000108
Brett Cannon74bfd702003-04-25 09:39:47 +0000109 def test_iter(self):
110 # Test iterator
111 # Don't need to count number of iterations since test would fail the
112 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200113 # comparison.
114 # Use the iterator in the usual implicit way to test for ticket #4608.
115 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000116 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000117
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000118class ProxyTests(unittest.TestCase):
119
120 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000121 # Records changes to env vars
122 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000123 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000124 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000125 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000126 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000127
128 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000130 self.env.__exit__()
131 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000132
133 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000134 self.env.set('NO_PROXY', 'localhost')
135 proxies = urllib.request.getproxies_environment()
136 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000137 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800138 # List of no_proxies with space.
139 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
140 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000141
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000142class urlopen_HttpTests(unittest.TestCase):
143 """Test urlopen() opening a fake http connection."""
144
145 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000146 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000147 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000148 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000149 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000150 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000151 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000152 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000153 if self.closed: return b""
154 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000155 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000156 if self.closed: return b""
157 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000158 def close(self):
159 self.io_refs -= 1
160 if self.io_refs == 0:
161 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000162 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000163 def connect(self):
164 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000165 self._connection_class = http.client.HTTPConnection
166 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000167
168 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000169 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000170
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000171 def check_read(self, ver):
172 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000173 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000174 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000175 self.assertEqual(fp.readline(), b"Hello!")
176 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000177 self.assertEqual(fp.geturl(), 'http://python.org/')
178 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000179 finally:
180 self.unfakehttp()
181
Senthil Kumaran26430412011-04-13 07:01:19 +0800182 def test_url_fragment(self):
183 # Issue #11703: geturl() omits fragments in the original URL.
184 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800185 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800186 try:
187 fp = urllib.request.urlopen(url)
188 self.assertEqual(fp.geturl(), url)
189 finally:
190 self.unfakehttp()
191
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800192 def test_willclose(self):
193 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800194 try:
195 resp = urlopen("http://www.python.org")
196 self.assertTrue(resp.fp.will_close)
197 finally:
198 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800199
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000200 def test_read_0_9(self):
201 # "0.9" response accepted (but not "simple responses" without
202 # a status line)
203 self.check_read(b"0.9")
204
205 def test_read_1_0(self):
206 self.check_read(b"1.0")
207
208 def test_read_1_1(self):
209 self.check_read(b"1.1")
210
Christian Heimes57dddfb2008-01-02 18:30:52 +0000211 def test_read_bogus(self):
212 # urlopen() should raise IOError for many error codes.
213 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
214Date: Wed, 02 Jan 2008 03:03:54 GMT
215Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
216Connection: close
217Content-Type: text/html; charset=iso-8859-1
218''')
219 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000220 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000221 finally:
222 self.unfakehttp()
223
guido@google.coma119df92011-03-29 11:41:02 -0700224 def test_invalid_redirect(self):
225 # urlopen() should raise IOError for many error codes.
226 self.fakehttp(b'''HTTP/1.1 302 Found
227Date: Wed, 02 Jan 2008 03:03:54 GMT
228Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
229Location: file://guidocomputer.athome.com:/python/license
230Connection: close
231Content-Type: text/html; charset=iso-8859-1
232''')
233 try:
234 self.assertRaises(urllib.error.HTTPError, urlopen,
235 "http://python.org/")
236 finally:
237 self.unfakehttp()
238
Guido van Rossumd8faa362007-04-27 19:54:29 +0000239 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000240 # urlopen() raises IOError if the underlying socket does not send any
241 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000242 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000243 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000244 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000245 finally:
246 self.unfakehttp()
247
Senthil Kumarande0eb242010-08-01 17:53:37 +0000248 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000249 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000250 try:
251 fp = urlopen("http://user:pass@python.org/")
252 self.assertEqual(fp.readline(), b"Hello!")
253 self.assertEqual(fp.readline(), b"")
254 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
255 self.assertEqual(fp.getcode(), 200)
256 finally:
257 self.unfakehttp()
258
Brett Cannon19691362003-04-29 05:08:06 +0000259class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000260 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000261
Brett Cannon19691362003-04-29 05:08:06 +0000262 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000263 # Create a list of temporary files. Each item in the list is a file
264 # name (absolute path or relative to the current working directory).
265 # All files in this list will be deleted in the tearDown method. Note,
266 # this only helps to makes sure temporary files get deleted, but it
267 # does nothing about trying to close files that may still be open. It
268 # is the responsibility of the developer to properly close files even
269 # when exceptional conditions occur.
270 self.tempFiles = []
271
Brett Cannon19691362003-04-29 05:08:06 +0000272 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000273 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000274 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000275 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000276 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000277 FILE.write(self.text)
278 FILE.close()
279 finally:
280 try: FILE.close()
281 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000282
283 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000284 # Delete the temporary files.
285 for each in self.tempFiles:
286 try: os.remove(each)
287 except: pass
288
289 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000290 filePath = os.path.abspath(filePath)
291 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000292 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000293 except UnicodeEncodeError:
294 raise unittest.SkipTest("filePath is not encodable to utf8")
295 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000296
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000297 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000298 """Creates a new temporary file containing the specified data,
299 registers the file for deletion during the test fixture tear down, and
300 returns the absolute path of the file."""
301
302 newFd, newFilePath = tempfile.mkstemp()
303 try:
304 self.registerFileForCleanUp(newFilePath)
305 newFile = os.fdopen(newFd, "wb")
306 newFile.write(data)
307 newFile.close()
308 finally:
309 try: newFile.close()
310 except: pass
311 return newFilePath
312
313 def registerFileForCleanUp(self, fileName):
314 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000315
316 def test_basic(self):
317 # Make sure that a local file just gets its own location returned and
318 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000319 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000320 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000321 self.assertIsInstance(result[1], email.message.Message,
322 "did not get a email.message.Message instance "
323 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000324
325 def test_copy(self):
326 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000327 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000328 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000329 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000330 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000331 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000332 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000333 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000334 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000335 try:
336 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000337 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000338 finally:
339 try: FILE.close()
340 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000341 self.assertEqual(self.text, text)
342
343 def test_reporthook(self):
344 # Make sure that the reporthook works.
345 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000346 self.assertIsInstance(count, int)
347 self.assertIsInstance(block_size, int)
348 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000349 self.assertEqual(count, count_holder[0])
350 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000351 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000352 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000353 urllib.request.urlretrieve(
354 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000355 second_temp, hooktester)
356
357 def test_reporthook_0_bytes(self):
358 # Test on zero length file. Should call reporthook only 1 time.
359 report = []
360 def hooktester(count, block_size, total_size, _report=report):
361 _report.append((count, block_size, total_size))
362 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000363 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000364 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000365 self.assertEqual(len(report), 1)
366 self.assertEqual(report[0][2], 0)
367
368 def test_reporthook_5_bytes(self):
369 # Test on 5 byte file. Should call reporthook only 2 times (once when
370 # the "network connection" is established and once when the block is
371 # read). Since the block size is 8192 bytes, only one block read is
372 # required to read the entire file.
373 report = []
374 def hooktester(count, block_size, total_size, _report=report):
375 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000376 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000377 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000378 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000379 self.assertEqual(len(report), 2)
380 self.assertEqual(report[0][1], 8192)
381 self.assertEqual(report[0][2], 5)
382
383 def test_reporthook_8193_bytes(self):
384 # Test on 8193 byte file. Should call reporthook only 3 times (once
385 # when the "network connection" is established, once for the next 8192
386 # bytes, and once for the last byte).
387 report = []
388 def hooktester(count, block_size, total_size, _report=report):
389 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000390 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000391 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000392 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000393 self.assertEqual(len(report), 3)
394 self.assertEqual(report[0][1], 8192)
395 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000396
Brett Cannon74bfd702003-04-25 09:39:47 +0000397class QuotingTests(unittest.TestCase):
398 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000399
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400 According to RFC 2396 (Uniform Resource Identifiers), to escape a
401 character you write it as '%' + <2 character US-ASCII hex value>.
402 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
403 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000404
405 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000406
Brett Cannon74bfd702003-04-25 09:39:47 +0000407 Reserved characters : ";/?:@&=+$,"
408 Have special meaning in URIs and must be escaped if not being used for
409 their special meaning
410 Data characters : letters, digits, and "-_.!~*'()"
411 Unreserved and do not need to be escaped; can be, though, if desired
412 Control characters : 0x00 - 0x1F, 0x7F
413 Have no use in URIs so must be escaped
414 space : 0x20
415 Must be escaped
416 Delimiters : '<>#%"'
417 Must be escaped
418 Unwise : "{}|\^[]`"
419 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000420
Brett Cannon74bfd702003-04-25 09:39:47 +0000421 """
422
423 def test_never_quote(self):
424 # Make sure quote() does not quote letters, digits, and "_,.-"
425 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
426 "abcdefghijklmnopqrstuvwxyz",
427 "0123456789",
428 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000429 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000430 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000431 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000432 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000433 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000434 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000435
436 def test_default_safe(self):
437 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000438 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000439
440 def test_safe(self):
441 # Test setting 'safe' parameter does what it should do
442 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000443 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000444 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000445 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000446 result = urllib.parse.quote_plus(quote_by_default,
447 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000448 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000449 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000450 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000451 # Safe expressed as bytes rather than str
452 result = urllib.parse.quote(quote_by_default, safe=b"<>")
453 self.assertEqual(quote_by_default, result,
454 "using quote(): %r != %r" % (quote_by_default, result))
455 # "Safe" non-ASCII characters should have no effect
456 # (Since URIs are not allowed to have non-ASCII characters)
457 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
458 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
459 self.assertEqual(expect, result,
460 "using quote(): %r != %r" %
461 (expect, result))
462 # Same as above, but using a bytes rather than str
463 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
464 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
465 self.assertEqual(expect, result,
466 "using quote(): %r != %r" %
467 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000468
469 def test_default_quoting(self):
470 # Make sure all characters that should be quoted are by default sans
471 # space (separate test for that).
472 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
473 should_quote.append('<>#%"{}|\^[]`')
474 should_quote.append(chr(127)) # For 0x7F
475 should_quote = ''.join(should_quote)
476 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000477 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000478 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000479 "using quote(): "
480 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000481 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000482 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000483 self.assertEqual(hexescape(char), result,
484 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000485 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000486 (char, hexescape(char), result))
487 del should_quote
488 partial_quote = "ab[]cd"
489 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000490 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000491 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000492 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000493 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000494 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000495
496 def test_quoting_space(self):
497 # Make sure quote() and quote_plus() handle spaces as specified in
498 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000499 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000500 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000501 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000502 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000503 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000504 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000505 given = "a b cd e f"
506 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000507 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000508 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000509 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000510 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000511 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000512 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000513 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000514
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000515 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000516 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000517 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000518 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000519 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000520 # Test with bytes
521 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
522 'alpha%2Bbeta+gamma')
523 # Test with safe bytes
524 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
525 'alpha+beta+gamma')
526
527 def test_quote_bytes(self):
528 # Bytes should quote directly to percent-encoded values
529 given = b"\xa2\xd8ab\xff"
530 expect = "%A2%D8ab%FF"
531 result = urllib.parse.quote(given)
532 self.assertEqual(expect, result,
533 "using quote(): %r != %r" % (expect, result))
534 # Encoding argument should raise type error on bytes input
535 self.assertRaises(TypeError, urllib.parse.quote, given,
536 encoding="latin-1")
537 # quote_from_bytes should work the same
538 result = urllib.parse.quote_from_bytes(given)
539 self.assertEqual(expect, result,
540 "using quote_from_bytes(): %r != %r"
541 % (expect, result))
542
543 def test_quote_with_unicode(self):
544 # Characters in Latin-1 range, encoded by default in UTF-8
545 given = "\xa2\xd8ab\xff"
546 expect = "%C2%A2%C3%98ab%C3%BF"
547 result = urllib.parse.quote(given)
548 self.assertEqual(expect, result,
549 "using quote(): %r != %r" % (expect, result))
550 # Characters in Latin-1 range, encoded by with None (default)
551 result = urllib.parse.quote(given, encoding=None, errors=None)
552 self.assertEqual(expect, result,
553 "using quote(): %r != %r" % (expect, result))
554 # Characters in Latin-1 range, encoded with Latin-1
555 given = "\xa2\xd8ab\xff"
556 expect = "%A2%D8ab%FF"
557 result = urllib.parse.quote(given, encoding="latin-1")
558 self.assertEqual(expect, result,
559 "using quote(): %r != %r" % (expect, result))
560 # Characters in BMP, encoded by default in UTF-8
561 given = "\u6f22\u5b57" # "Kanji"
562 expect = "%E6%BC%A2%E5%AD%97"
563 result = urllib.parse.quote(given)
564 self.assertEqual(expect, result,
565 "using quote(): %r != %r" % (expect, result))
566 # Characters in BMP, encoded with Latin-1
567 given = "\u6f22\u5b57"
568 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
569 encoding="latin-1")
570 # Characters in BMP, encoded with Latin-1, with replace error handling
571 given = "\u6f22\u5b57"
572 expect = "%3F%3F" # "??"
573 result = urllib.parse.quote(given, encoding="latin-1",
574 errors="replace")
575 self.assertEqual(expect, result,
576 "using quote(): %r != %r" % (expect, result))
577 # Characters in BMP, Latin-1, with xmlcharref error handling
578 given = "\u6f22\u5b57"
579 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
580 result = urllib.parse.quote(given, encoding="latin-1",
581 errors="xmlcharrefreplace")
582 self.assertEqual(expect, result,
583 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000584
Georg Brandlfaf41492009-05-26 18:31:11 +0000585 def test_quote_plus_with_unicode(self):
586 # Encoding (latin-1) test for quote_plus
587 given = "\xa2\xd8 \xff"
588 expect = "%A2%D8+%FF"
589 result = urllib.parse.quote_plus(given, encoding="latin-1")
590 self.assertEqual(expect, result,
591 "using quote_plus(): %r != %r" % (expect, result))
592 # Errors test for quote_plus
593 given = "ab\u6f22\u5b57 cd"
594 expect = "ab%3F%3F+cd"
595 result = urllib.parse.quote_plus(given, encoding="latin-1",
596 errors="replace")
597 self.assertEqual(expect, result,
598 "using quote_plus(): %r != %r" % (expect, result))
599
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000600
Brett Cannon74bfd702003-04-25 09:39:47 +0000601class UnquotingTests(unittest.TestCase):
602 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000603
Brett Cannon74bfd702003-04-25 09:39:47 +0000604 See the doc string for quoting_Tests for details on quoting and such.
605
606 """
607
608 def test_unquoting(self):
609 # Make sure unquoting of all ASCII values works
610 escape_list = []
611 for num in range(128):
612 given = hexescape(chr(num))
613 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000614 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000615 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000616 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000617 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000618 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000619 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000620 (expect, result))
621 escape_list.append(given)
622 escape_string = ''.join(escape_list)
623 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000624 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000625 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000626 "using unquote(): not all characters escaped: "
627 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000628 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
629 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000630 with support.check_warnings(('', BytesWarning), quiet=True):
631 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000632
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000633 def test_unquoting_badpercent(self):
634 # Test unquoting on bad percent-escapes
635 given = '%xab'
636 expect = given
637 result = urllib.parse.unquote(given)
638 self.assertEqual(expect, result, "using unquote(): %r != %r"
639 % (expect, result))
640 given = '%x'
641 expect = given
642 result = urllib.parse.unquote(given)
643 self.assertEqual(expect, result, "using unquote(): %r != %r"
644 % (expect, result))
645 given = '%'
646 expect = given
647 result = urllib.parse.unquote(given)
648 self.assertEqual(expect, result, "using unquote(): %r != %r"
649 % (expect, result))
650 # unquote_to_bytes
651 given = '%xab'
652 expect = bytes(given, 'ascii')
653 result = urllib.parse.unquote_to_bytes(given)
654 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
655 % (expect, result))
656 given = '%x'
657 expect = bytes(given, 'ascii')
658 result = urllib.parse.unquote_to_bytes(given)
659 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
660 % (expect, result))
661 given = '%'
662 expect = bytes(given, 'ascii')
663 result = urllib.parse.unquote_to_bytes(given)
664 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
665 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000666 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
667 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000668
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000669 def test_unquoting_mixed_case(self):
670 # Test unquoting on mixed-case hex digits in the percent-escapes
671 given = '%Ab%eA'
672 expect = b'\xab\xea'
673 result = urllib.parse.unquote_to_bytes(given)
674 self.assertEqual(expect, result,
675 "using unquote_to_bytes(): %r != %r"
676 % (expect, result))
677
Brett Cannon74bfd702003-04-25 09:39:47 +0000678 def test_unquoting_parts(self):
679 # Make sure unquoting works when have non-quoted characters
680 # interspersed
681 given = 'ab%sd' % hexescape('c')
682 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000684 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000685 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000686 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000687 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000688 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000689
Brett Cannon74bfd702003-04-25 09:39:47 +0000690 def test_unquoting_plus(self):
691 # Test difference between unquote() and unquote_plus()
692 given = "are+there+spaces..."
693 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000694 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000695 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000696 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000697 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000698 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000699 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000700 "using unquote_plus(): %r != %r" % (expect, result))
701
702 def test_unquote_to_bytes(self):
703 given = 'br%C3%BCckner_sapporo_20050930.doc'
704 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
705 result = urllib.parse.unquote_to_bytes(given)
706 self.assertEqual(expect, result,
707 "using unquote_to_bytes(): %r != %r"
708 % (expect, result))
709 # Test on a string with unescaped non-ASCII characters
710 # (Technically an invalid URI; expect those characters to be UTF-8
711 # encoded).
712 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
713 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
714 self.assertEqual(expect, result,
715 "using unquote_to_bytes(): %r != %r"
716 % (expect, result))
717 # Test with a bytes as input
718 given = b'%A2%D8ab%FF'
719 expect = b'\xa2\xd8ab\xff'
720 result = urllib.parse.unquote_to_bytes(given)
721 self.assertEqual(expect, result,
722 "using unquote_to_bytes(): %r != %r"
723 % (expect, result))
724 # Test with a bytes as input, with unescaped non-ASCII bytes
725 # (Technically an invalid URI; expect those bytes to be preserved)
726 given = b'%A2\xd8ab%FF'
727 expect = b'\xa2\xd8ab\xff'
728 result = urllib.parse.unquote_to_bytes(given)
729 self.assertEqual(expect, result,
730 "using unquote_to_bytes(): %r != %r"
731 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000732
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000733 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000734 # Characters in the Latin-1 range, encoded with UTF-8
735 given = 'br%C3%BCckner_sapporo_20050930.doc'
736 expect = 'br\u00fcckner_sapporo_20050930.doc'
737 result = urllib.parse.unquote(given)
738 self.assertEqual(expect, result,
739 "using unquote(): %r != %r" % (expect, result))
740 # Characters in the Latin-1 range, encoded with None (default)
741 result = urllib.parse.unquote(given, encoding=None, errors=None)
742 self.assertEqual(expect, result,
743 "using unquote(): %r != %r" % (expect, result))
744
745 # Characters in the Latin-1 range, encoded with Latin-1
746 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
747 encoding="latin-1")
748 expect = 'br\u00fcckner_sapporo_20050930.doc'
749 self.assertEqual(expect, result,
750 "using unquote(): %r != %r" % (expect, result))
751
752 # Characters in BMP, encoded with UTF-8
753 given = "%E6%BC%A2%E5%AD%97"
754 expect = "\u6f22\u5b57" # "Kanji"
755 result = urllib.parse.unquote(given)
756 self.assertEqual(expect, result,
757 "using unquote(): %r != %r" % (expect, result))
758
759 # Decode with UTF-8, invalid sequence
760 given = "%F3%B1"
761 expect = "\ufffd" # Replacement character
762 result = urllib.parse.unquote(given)
763 self.assertEqual(expect, result,
764 "using unquote(): %r != %r" % (expect, result))
765
766 # Decode with UTF-8, invalid sequence, replace errors
767 result = urllib.parse.unquote(given, errors="replace")
768 self.assertEqual(expect, result,
769 "using unquote(): %r != %r" % (expect, result))
770
771 # Decode with UTF-8, invalid sequence, ignoring errors
772 given = "%F3%B1"
773 expect = ""
774 result = urllib.parse.unquote(given, errors="ignore")
775 self.assertEqual(expect, result,
776 "using unquote(): %r != %r" % (expect, result))
777
778 # A mix of non-ASCII and percent-encoded characters, UTF-8
779 result = urllib.parse.unquote("\u6f22%C3%BC")
780 expect = '\u6f22\u00fc'
781 self.assertEqual(expect, result,
782 "using unquote(): %r != %r" % (expect, result))
783
784 # A mix of non-ASCII and percent-encoded characters, Latin-1
785 # (Note, the string contains non-Latin-1-representable characters)
786 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
787 expect = '\u6f22\u00fc'
788 self.assertEqual(expect, result,
789 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000790
Brett Cannon74bfd702003-04-25 09:39:47 +0000791class urlencode_Tests(unittest.TestCase):
792 """Tests for urlencode()"""
793
794 def help_inputtype(self, given, test_type):
795 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000796
Brett Cannon74bfd702003-04-25 09:39:47 +0000797 'given' must lead to only the pairs:
798 * 1st, 1
799 * 2nd, 2
800 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000801
Brett Cannon74bfd702003-04-25 09:39:47 +0000802 Test cannot assume anything about order. Docs make no guarantee and
803 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000804
Brett Cannon74bfd702003-04-25 09:39:47 +0000805 """
806 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000808 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000809 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000810 "testing %s: %s not found in %s" %
811 (test_type, expected, result))
812 self.assertEqual(result.count('&'), 2,
813 "testing %s: expected 2 '&'s; got %s" %
814 (test_type, result.count('&')))
815 amp_location = result.index('&')
816 on_amp_left = result[amp_location - 1]
817 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000818 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000819 "testing %s: '&' not located in proper place in %s" %
820 (test_type, result))
821 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
822 "testing %s: "
823 "unexpected number of characters: %s != %s" %
824 (test_type, len(result), (5 * 3) + 2))
825
826 def test_using_mapping(self):
827 # Test passing in a mapping object as an argument.
828 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
829 "using dict as input type")
830
831 def test_using_sequence(self):
832 # Test passing in a sequence of two-item sequences as an argument.
833 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
834 "using sequence of two-item tuples as input")
835
836 def test_quoting(self):
837 # Make sure keys and values are quoted using quote_plus()
838 given = {"&":"="}
839 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000840 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000841 self.assertEqual(expect, result)
842 given = {"key name":"A bunch of pluses"}
843 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000845 self.assertEqual(expect, result)
846
847 def test_doseq(self):
848 # Test that passing True for 'doseq' parameter works correctly
849 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
851 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000852 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000853 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000854 for value in given["sequence"]:
855 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000856 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000857 self.assertEqual(result.count('&'), 2,
858 "Expected 2 '&'s, got %s" % result.count('&'))
859
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000860 def test_empty_sequence(self):
861 self.assertEqual("", urllib.parse.urlencode({}))
862 self.assertEqual("", urllib.parse.urlencode([]))
863
864 def test_nonstring_values(self):
865 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
866 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
867
868 def test_nonstring_seq_values(self):
869 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
870 self.assertEqual("a=None&a=a",
871 urllib.parse.urlencode({"a": [None, "a"]}, True))
872 self.assertEqual("a=a&a=b",
873 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
874
Senthil Kumarandf022da2010-07-03 17:48:22 +0000875 def test_urlencode_encoding(self):
876 # ASCII encoding. Expect %3F with errors="replace'
877 given = (('\u00a0', '\u00c1'),)
878 expect = '%3F=%3F'
879 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
880 self.assertEqual(expect, result)
881
882 # Default is UTF-8 encoding.
883 given = (('\u00a0', '\u00c1'),)
884 expect = '%C2%A0=%C3%81'
885 result = urllib.parse.urlencode(given)
886 self.assertEqual(expect, result)
887
888 # Latin-1 encoding.
889 given = (('\u00a0', '\u00c1'),)
890 expect = '%A0=%C1'
891 result = urllib.parse.urlencode(given, encoding="latin-1")
892 self.assertEqual(expect, result)
893
894 def test_urlencode_encoding_doseq(self):
895 # ASCII Encoding. Expect %3F with errors="replace'
896 given = (('\u00a0', '\u00c1'),)
897 expect = '%3F=%3F'
898 result = urllib.parse.urlencode(given, doseq=True,
899 encoding="ASCII", errors="replace")
900 self.assertEqual(expect, result)
901
902 # ASCII Encoding. On a sequence of values.
903 given = (("\u00a0", (1, "\u00c1")),)
904 expect = '%3F=1&%3F=%3F'
905 result = urllib.parse.urlencode(given, True,
906 encoding="ASCII", errors="replace")
907 self.assertEqual(expect, result)
908
909 # Utf-8
910 given = (("\u00a0", "\u00c1"),)
911 expect = '%C2%A0=%C3%81'
912 result = urllib.parse.urlencode(given, True)
913 self.assertEqual(expect, result)
914
915 given = (("\u00a0", (42, "\u00c1")),)
916 expect = '%C2%A0=42&%C2%A0=%C3%81'
917 result = urllib.parse.urlencode(given, True)
918 self.assertEqual(expect, result)
919
920 # latin-1
921 given = (("\u00a0", "\u00c1"),)
922 expect = '%A0=%C1'
923 result = urllib.parse.urlencode(given, True, encoding="latin-1")
924 self.assertEqual(expect, result)
925
926 given = (("\u00a0", (42, "\u00c1")),)
927 expect = '%A0=42&%A0=%C1'
928 result = urllib.parse.urlencode(given, True, encoding="latin-1")
929 self.assertEqual(expect, result)
930
931 def test_urlencode_bytes(self):
932 given = ((b'\xa0\x24', b'\xc1\x24'),)
933 expect = '%A0%24=%C1%24'
934 result = urllib.parse.urlencode(given)
935 self.assertEqual(expect, result)
936 result = urllib.parse.urlencode(given, True)
937 self.assertEqual(expect, result)
938
939 # Sequence of values
940 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
941 expect = '%A0%24=42&%A0%24=%C1%24'
942 result = urllib.parse.urlencode(given, True)
943 self.assertEqual(expect, result)
944
945 def test_urlencode_encoding_safe_parameter(self):
946
947 # Send '$' (\x24) as safe character
948 # Default utf-8 encoding
949
950 given = ((b'\xa0\x24', b'\xc1\x24'),)
951 result = urllib.parse.urlencode(given, safe=":$")
952 expect = '%A0$=%C1$'
953 self.assertEqual(expect, result)
954
955 given = ((b'\xa0\x24', b'\xc1\x24'),)
956 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
957 expect = '%A0$=%C1$'
958 self.assertEqual(expect, result)
959
960 # Safe parameter in sequence
961 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
962 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
963 result = urllib.parse.urlencode(given, True, safe=":$")
964 self.assertEqual(expect, result)
965
966 # Test all above in latin-1 encoding
967
968 given = ((b'\xa0\x24', b'\xc1\x24'),)
969 result = urllib.parse.urlencode(given, safe=":$",
970 encoding="latin-1")
971 expect = '%A0$=%C1$'
972 self.assertEqual(expect, result)
973
974 given = ((b'\xa0\x24', b'\xc1\x24'),)
975 expect = '%A0$=%C1$'
976 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
977 encoding="latin-1")
978
979 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
980 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
981 result = urllib.parse.urlencode(given, True, safe=":$",
982 encoding="latin-1")
983 self.assertEqual(expect, result)
984
Brett Cannon74bfd702003-04-25 09:39:47 +0000985class Pathname_Tests(unittest.TestCase):
986 """Test pathname2url() and url2pathname()"""
987
988 def test_basic(self):
989 # Make sure simple tests pass
990 expected_path = os.path.join("parts", "of", "a", "path")
991 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000992 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000993 self.assertEqual(expected_url, result,
994 "pathname2url() failed; %s != %s" %
995 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000996 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000997 self.assertEqual(expected_path, result,
998 "url2pathame() failed; %s != %s" %
999 (result, expected_path))
1000
1001 def test_quoting(self):
1002 # Test automatic quoting and unquoting works for pathnam2url() and
1003 # url2pathname() respectively
1004 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001005 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1006 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001007 self.assertEqual(expect, result,
1008 "pathname2url() failed; %s != %s" %
1009 (expect, result))
1010 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001011 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001012 self.assertEqual(expect, result,
1013 "url2pathname() failed; %s != %s" %
1014 (expect, result))
1015 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001016 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1017 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001018 self.assertEqual(expect, result,
1019 "pathname2url() failed; %s != %s" %
1020 (expect, result))
1021 given = "make+sure/using_unquote"
1022 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001024 self.assertEqual(expect, result,
1025 "url2pathname() failed; %s != %s" %
1026 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001027
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001028 @unittest.skipUnless(sys.platform == 'win32',
1029 'test specific to the urllib.url2path function.')
1030 def test_ntpath(self):
1031 given = ('/C:/', '///C:/', '/C|//')
1032 expect = 'C:\\'
1033 for url in given:
1034 result = urllib.request.url2pathname(url)
1035 self.assertEqual(expect, result,
1036 'urllib.request..url2pathname() failed; %s != %s' %
1037 (expect, result))
1038 given = '///C|/path'
1039 expect = 'C:\\path'
1040 result = urllib.request.url2pathname(given)
1041 self.assertEqual(expect, result,
1042 'urllib.request.url2pathname() failed; %s != %s' %
1043 (expect, result))
1044
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001045class Utility_Tests(unittest.TestCase):
1046 """Testcase to test the various utility functions in the urllib."""
1047
1048 def test_splitpasswd(self):
1049 """Some of password examples are not sensible, but it is added to
1050 confirming to RFC2617 and addressing issue4675.
1051 """
1052 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1053 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1054 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1055 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1056 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1057 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1058 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1059
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001060
1061class URLopener_Tests(unittest.TestCase):
1062 """Testcase to test the open method of URLopener class."""
1063
1064 def test_quoted_open(self):
1065 class DummyURLopener(urllib.request.URLopener):
1066 def open_spam(self, url):
1067 return url
1068
1069 self.assertEqual(DummyURLopener().open(
1070 'spam://example/ /'),'//example/%20/')
1071
Senthil Kumaran734f0592010-02-20 22:19:04 +00001072 # test the safe characters are not quoted by urlopen
1073 self.assertEqual(DummyURLopener().open(
1074 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1075 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1076
Guido van Rossume7ba4952007-06-06 23:52:48 +00001077# Just commented them out.
1078# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001079# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001080# fail in one of the tests, sometimes in other. I have a linux, and
1081# the tests go ok.
1082# If anybody has one of the problematic enviroments, please help!
1083# . Facundo
1084#
1085# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001086# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001087# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1088# serv.settimeout(3)
1089# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1090# serv.bind(("", 9093))
1091# serv.listen(5)
1092# try:
1093# conn, addr = serv.accept()
1094# conn.send("1 Hola mundo\n")
1095# cantdata = 0
1096# while cantdata < 13:
1097# data = conn.recv(13-cantdata)
1098# cantdata += len(data)
1099# time.sleep(.3)
1100# conn.send("2 No more lines\n")
1101# conn.close()
1102# except socket.timeout:
1103# pass
1104# finally:
1105# serv.close()
1106# evt.set()
1107#
1108# class FTPWrapperTests(unittest.TestCase):
1109#
1110# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001111# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001112# ftplib.FTP.port = 9093
1113# self.evt = threading.Event()
1114# threading.Thread(target=server, args=(self.evt,)).start()
1115# time.sleep(.1)
1116#
1117# def tearDown(self):
1118# self.evt.wait()
1119#
1120# def testBasic(self):
1121# # connects
1122# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001123# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001124#
1125# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001126# # global default timeout is ignored
1127# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001128# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001129# socket.setdefaulttimeout(30)
1130# try:
1131# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1132# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001133# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001134# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001135# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001136#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001137# def testTimeoutDefault(self):
1138# # global default timeout is used
1139# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001140# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001141# socket.setdefaulttimeout(30)
1142# try:
1143# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1144# finally:
1145# socket.setdefaulttimeout(None)
1146# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1147# ftp.close()
1148#
1149# def testTimeoutValue(self):
1150# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1151# timeout=30)
1152# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1153# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001154
Skip Montanaro080c9972001-01-28 21:12:22 +00001155
1156
Brett Cannon74bfd702003-04-25 09:39:47 +00001157def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001158 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001159 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001160 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001161 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001162 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001163 QuotingTests,
1164 UnquotingTests,
1165 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001166 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001167 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001168 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001169 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001170 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001171
1172
1173
1174if __name__ == '__main__':
1175 test_main()