blob: e39fa8dfbc450cab2c05e5fa9498949aa6a80c46 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
Georg Brandl24420152008-05-26 16:32:26 +00005import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00006import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00007import io
Brett Cannon74bfd702003-04-25 09:39:47 +00008import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00009from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000010import os
Georg Brandl5a650a22005-08-26 08:51:34 +000011import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000012
Brett Cannon74bfd702003-04-25 09:39:47 +000013def hexescape(char):
14 """Escape char as RFC 2396 specifies"""
15 hex_repr = hex(ord(char))[2:].upper()
16 if len(hex_repr) == 1:
17 hex_repr = "0%s" % hex_repr
18 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020# Shortcut for testing FancyURLopener
21_urlopener = None
22def urlopen(url, data=None, proxies=None):
23 """urlopen(url [, data]) -> open file-like object"""
24 global _urlopener
25 if proxies is not None:
26 opener = urllib.request.FancyURLopener(proxies=proxies)
27 elif not _urlopener:
28 opener = urllib.request.FancyURLopener()
29 _urlopener = opener
30 else:
31 opener = _urlopener
32 if data is None:
33 return opener.open(url)
34 else:
35 return opener.open(url, data)
36
Brett Cannon74bfd702003-04-25 09:39:47 +000037class urlopen_FileTests(unittest.TestCase):
38 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000039
Brett Cannon74bfd702003-04-25 09:39:47 +000040 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000041 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000042
Brett Cannon74bfd702003-04-25 09:39:47 +000043 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 # Create a temp file to use for testing
47 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
48 "ascii")
49 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000050 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000051 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000052 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000054 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000056
Brett Cannon74bfd702003-04-25 09:39:47 +000057 def tearDown(self):
58 """Shut down the open object"""
59 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000060 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000061
Brett Cannon74bfd702003-04-25 09:39:47 +000062 def test_interface(self):
63 # Make sure object returned by urlopen() has the specified methods
64 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000065 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000066 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000067 "object returned by urlopen() lacks %s attribute" %
68 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000069
Brett Cannon74bfd702003-04-25 09:39:47 +000070 def test_read(self):
71 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000072
Brett Cannon74bfd702003-04-25 09:39:47 +000073 def test_readline(self):
74 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000075 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000076 "calling readline() after exhausting the file did not"
77 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def test_readlines(self):
80 lines_list = self.returned_obj.readlines()
81 self.assertEqual(len(lines_list), 1,
82 "readlines() returned the wrong number of lines")
83 self.assertEqual(lines_list[0], self.text,
84 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000085
Brett Cannon74bfd702003-04-25 09:39:47 +000086 def test_fileno(self):
87 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000088 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000089 self.assertEqual(os.read(file_num, len(self.text)), self.text,
90 "Reading on the file descriptor returned by fileno() "
91 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000092
Brett Cannon74bfd702003-04-25 09:39:47 +000093 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080094 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000095 # by the tearDown() method for the test
96 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000097
Brett Cannon74bfd702003-04-25 09:39:47 +000098 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +000099 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 def test_geturl(self):
102 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000103
Christian Heimes9bd667a2008-01-20 15:14:11 +0000104 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000105 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106
Brett Cannon74bfd702003-04-25 09:39:47 +0000107 def test_iter(self):
108 # Test iterator
109 # Don't need to count number of iterations since test would fail the
110 # instant it returned anything beyond the first line from the
111 # comparison
112 for line in self.returned_obj.__iter__():
113 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000114
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000115class ProxyTests(unittest.TestCase):
116
117 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000118 # Records changes to env vars
119 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000120 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000121 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000122 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000123 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000124
125 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000126 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000127 self.env.__exit__()
128 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129
130 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000131 self.env.set('NO_PROXY', 'localhost')
132 proxies = urllib.request.getproxies_environment()
133 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000134 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000135
136
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000137class urlopen_HttpTests(unittest.TestCase):
138 """Test urlopen() opening a fake http connection."""
139
140 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000141 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000142 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000143 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000144 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000145 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000146 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000147 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000148 if self.closed: return b""
149 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000150 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000151 if self.closed: return b""
152 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000153 def close(self):
154 self.io_refs -= 1
155 if self.io_refs == 0:
156 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000157 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000158 def connect(self):
159 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000160 self._connection_class = http.client.HTTPConnection
161 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000162
163 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000164 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000165
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000166 def check_read(self, ver):
167 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000168 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000169 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000170 self.assertEqual(fp.readline(), b"Hello!")
171 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000172 self.assertEqual(fp.geturl(), 'http://python.org/')
173 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000174 finally:
175 self.unfakehttp()
176
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800177 def test_willclose(self):
178 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800179 try:
180 resp = urlopen("http://www.python.org")
181 self.assertTrue(resp.fp.will_close)
182 finally:
183 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800184
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000185 def test_read_0_9(self):
186 # "0.9" response accepted (but not "simple responses" without
187 # a status line)
188 self.check_read(b"0.9")
189
190 def test_read_1_0(self):
191 self.check_read(b"1.0")
192
193 def test_read_1_1(self):
194 self.check_read(b"1.1")
195
Christian Heimes57dddfb2008-01-02 18:30:52 +0000196 def test_read_bogus(self):
197 # urlopen() should raise IOError for many error codes.
198 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
199Date: Wed, 02 Jan 2008 03:03:54 GMT
200Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
201Connection: close
202Content-Type: text/html; charset=iso-8859-1
203''')
204 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000205 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000206 finally:
207 self.unfakehttp()
208
Guido van Rossumd8faa362007-04-27 19:54:29 +0000209 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000210 # urlopen() raises IOError if the underlying socket does not send any
211 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000212 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000214 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215 finally:
216 self.unfakehttp()
217
Senthil Kumarande0eb242010-08-01 17:53:37 +0000218 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000219 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000220 try:
221 fp = urlopen("http://user:pass@python.org/")
222 self.assertEqual(fp.readline(), b"Hello!")
223 self.assertEqual(fp.readline(), b"")
224 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
225 self.assertEqual(fp.getcode(), 200)
226 finally:
227 self.unfakehttp()
228
Brett Cannon19691362003-04-29 05:08:06 +0000229class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000230 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000231
Brett Cannon19691362003-04-29 05:08:06 +0000232 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000233 # Create a list of temporary files. Each item in the list is a file
234 # name (absolute path or relative to the current working directory).
235 # All files in this list will be deleted in the tearDown method. Note,
236 # this only helps to makes sure temporary files get deleted, but it
237 # does nothing about trying to close files that may still be open. It
238 # is the responsibility of the developer to properly close files even
239 # when exceptional conditions occur.
240 self.tempFiles = []
241
Brett Cannon19691362003-04-29 05:08:06 +0000242 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000243 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000244 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000245 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000246 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000247 FILE.write(self.text)
248 FILE.close()
249 finally:
250 try: FILE.close()
251 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000252
253 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000254 # Delete the temporary files.
255 for each in self.tempFiles:
256 try: os.remove(each)
257 except: pass
258
259 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000260 filePath = os.path.abspath(filePath)
261 try:
262 filePath.encode("utf8")
263 except UnicodeEncodeError:
264 raise unittest.SkipTest("filePath is not encodable to utf8")
265 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000266
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000267 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000268 """Creates a new temporary file containing the specified data,
269 registers the file for deletion during the test fixture tear down, and
270 returns the absolute path of the file."""
271
272 newFd, newFilePath = tempfile.mkstemp()
273 try:
274 self.registerFileForCleanUp(newFilePath)
275 newFile = os.fdopen(newFd, "wb")
276 newFile.write(data)
277 newFile.close()
278 finally:
279 try: newFile.close()
280 except: pass
281 return newFilePath
282
283 def registerFileForCleanUp(self, fileName):
284 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000285
286 def test_basic(self):
287 # Make sure that a local file just gets its own location returned and
288 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000289 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000290 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000291 self.assertIsInstance(result[1], email.message.Message,
292 "did not get a email.message.Message instance "
293 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000294
295 def test_copy(self):
296 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000297 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000298 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000299 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000300 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000301 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000302 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000303 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000304 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000305 try:
306 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000307 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000308 finally:
309 try: FILE.close()
310 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000311 self.assertEqual(self.text, text)
312
313 def test_reporthook(self):
314 # Make sure that the reporthook works.
315 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000316 self.assertIsInstance(count, int)
317 self.assertIsInstance(block_size, int)
318 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000319 self.assertEqual(count, count_holder[0])
320 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000321 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000322 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000323 urllib.request.urlretrieve(
324 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000325 second_temp, hooktester)
326
327 def test_reporthook_0_bytes(self):
328 # Test on zero length file. Should call reporthook only 1 time.
329 report = []
330 def hooktester(count, block_size, total_size, _report=report):
331 _report.append((count, block_size, total_size))
332 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000333 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000334 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000335 self.assertEqual(len(report), 1)
336 self.assertEqual(report[0][2], 0)
337
338 def test_reporthook_5_bytes(self):
339 # Test on 5 byte file. Should call reporthook only 2 times (once when
340 # the "network connection" is established and once when the block is
341 # read). Since the block size is 8192 bytes, only one block read is
342 # required to read the entire file.
343 report = []
344 def hooktester(count, block_size, total_size, _report=report):
345 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000346 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000347 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000348 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000349 self.assertEqual(len(report), 2)
350 self.assertEqual(report[0][1], 8192)
351 self.assertEqual(report[0][2], 5)
352
353 def test_reporthook_8193_bytes(self):
354 # Test on 8193 byte file. Should call reporthook only 3 times (once
355 # when the "network connection" is established, once for the next 8192
356 # bytes, and once for the last byte).
357 report = []
358 def hooktester(count, block_size, total_size, _report=report):
359 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000360 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000361 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000362 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000363 self.assertEqual(len(report), 3)
364 self.assertEqual(report[0][1], 8192)
365 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000366
Brett Cannon74bfd702003-04-25 09:39:47 +0000367class QuotingTests(unittest.TestCase):
368 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000369
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000370 According to RFC 2396 (Uniform Resource Identifiers), to escape a
371 character you write it as '%' + <2 character US-ASCII hex value>.
372 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
373 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000374
375 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000376
Brett Cannon74bfd702003-04-25 09:39:47 +0000377 Reserved characters : ";/?:@&=+$,"
378 Have special meaning in URIs and must be escaped if not being used for
379 their special meaning
380 Data characters : letters, digits, and "-_.!~*'()"
381 Unreserved and do not need to be escaped; can be, though, if desired
382 Control characters : 0x00 - 0x1F, 0x7F
383 Have no use in URIs so must be escaped
384 space : 0x20
385 Must be escaped
386 Delimiters : '<>#%"'
387 Must be escaped
388 Unwise : "{}|\^[]`"
389 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000390
Brett Cannon74bfd702003-04-25 09:39:47 +0000391 """
392
393 def test_never_quote(self):
394 # Make sure quote() does not quote letters, digits, and "_,.-"
395 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
396 "abcdefghijklmnopqrstuvwxyz",
397 "0123456789",
398 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000399 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000400 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000401 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000402 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000403 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000404 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000405
406 def test_default_safe(self):
407 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000408 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000409
410 def test_safe(self):
411 # Test setting 'safe' parameter does what it should do
412 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000413 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000414 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000415 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000416 result = urllib.parse.quote_plus(quote_by_default,
417 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000418 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000419 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000420 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000421 # Safe expressed as bytes rather than str
422 result = urllib.parse.quote(quote_by_default, safe=b"<>")
423 self.assertEqual(quote_by_default, result,
424 "using quote(): %r != %r" % (quote_by_default, result))
425 # "Safe" non-ASCII characters should have no effect
426 # (Since URIs are not allowed to have non-ASCII characters)
427 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
428 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
429 self.assertEqual(expect, result,
430 "using quote(): %r != %r" %
431 (expect, result))
432 # Same as above, but using a bytes rather than str
433 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
434 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
435 self.assertEqual(expect, result,
436 "using quote(): %r != %r" %
437 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000438
439 def test_default_quoting(self):
440 # Make sure all characters that should be quoted are by default sans
441 # space (separate test for that).
442 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
443 should_quote.append('<>#%"{}|\^[]`')
444 should_quote.append(chr(127)) # For 0x7F
445 should_quote = ''.join(should_quote)
446 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000447 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000448 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000449 "using quote(): "
450 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000451 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000452 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000453 self.assertEqual(hexescape(char), result,
454 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000455 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000456 (char, hexescape(char), result))
457 del should_quote
458 partial_quote = "ab[]cd"
459 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000460 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000461 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000462 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000463 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000464 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000465
466 def test_quoting_space(self):
467 # Make sure quote() and quote_plus() handle spaces as specified in
468 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000469 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000470 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000471 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000472 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000473 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000474 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000475 given = "a b cd e f"
476 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000477 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000478 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000479 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000480 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000481 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000482 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000483 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000484
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000485 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000487 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000489 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000490 # Test with bytes
491 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
492 'alpha%2Bbeta+gamma')
493 # Test with safe bytes
494 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
495 'alpha+beta+gamma')
496
497 def test_quote_bytes(self):
498 # Bytes should quote directly to percent-encoded values
499 given = b"\xa2\xd8ab\xff"
500 expect = "%A2%D8ab%FF"
501 result = urllib.parse.quote(given)
502 self.assertEqual(expect, result,
503 "using quote(): %r != %r" % (expect, result))
504 # Encoding argument should raise type error on bytes input
505 self.assertRaises(TypeError, urllib.parse.quote, given,
506 encoding="latin-1")
507 # quote_from_bytes should work the same
508 result = urllib.parse.quote_from_bytes(given)
509 self.assertEqual(expect, result,
510 "using quote_from_bytes(): %r != %r"
511 % (expect, result))
512
513 def test_quote_with_unicode(self):
514 # Characters in Latin-1 range, encoded by default in UTF-8
515 given = "\xa2\xd8ab\xff"
516 expect = "%C2%A2%C3%98ab%C3%BF"
517 result = urllib.parse.quote(given)
518 self.assertEqual(expect, result,
519 "using quote(): %r != %r" % (expect, result))
520 # Characters in Latin-1 range, encoded by with None (default)
521 result = urllib.parse.quote(given, encoding=None, errors=None)
522 self.assertEqual(expect, result,
523 "using quote(): %r != %r" % (expect, result))
524 # Characters in Latin-1 range, encoded with Latin-1
525 given = "\xa2\xd8ab\xff"
526 expect = "%A2%D8ab%FF"
527 result = urllib.parse.quote(given, encoding="latin-1")
528 self.assertEqual(expect, result,
529 "using quote(): %r != %r" % (expect, result))
530 # Characters in BMP, encoded by default in UTF-8
531 given = "\u6f22\u5b57" # "Kanji"
532 expect = "%E6%BC%A2%E5%AD%97"
533 result = urllib.parse.quote(given)
534 self.assertEqual(expect, result,
535 "using quote(): %r != %r" % (expect, result))
536 # Characters in BMP, encoded with Latin-1
537 given = "\u6f22\u5b57"
538 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
539 encoding="latin-1")
540 # Characters in BMP, encoded with Latin-1, with replace error handling
541 given = "\u6f22\u5b57"
542 expect = "%3F%3F" # "??"
543 result = urllib.parse.quote(given, encoding="latin-1",
544 errors="replace")
545 self.assertEqual(expect, result,
546 "using quote(): %r != %r" % (expect, result))
547 # Characters in BMP, Latin-1, with xmlcharref error handling
548 given = "\u6f22\u5b57"
549 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
550 result = urllib.parse.quote(given, encoding="latin-1",
551 errors="xmlcharrefreplace")
552 self.assertEqual(expect, result,
553 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000554
Georg Brandlfaf41492009-05-26 18:31:11 +0000555 def test_quote_plus_with_unicode(self):
556 # Encoding (latin-1) test for quote_plus
557 given = "\xa2\xd8 \xff"
558 expect = "%A2%D8+%FF"
559 result = urllib.parse.quote_plus(given, encoding="latin-1")
560 self.assertEqual(expect, result,
561 "using quote_plus(): %r != %r" % (expect, result))
562 # Errors test for quote_plus
563 given = "ab\u6f22\u5b57 cd"
564 expect = "ab%3F%3F+cd"
565 result = urllib.parse.quote_plus(given, encoding="latin-1",
566 errors="replace")
567 self.assertEqual(expect, result,
568 "using quote_plus(): %r != %r" % (expect, result))
569
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000570
Brett Cannon74bfd702003-04-25 09:39:47 +0000571class UnquotingTests(unittest.TestCase):
572 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000573
Brett Cannon74bfd702003-04-25 09:39:47 +0000574 See the doc string for quoting_Tests for details on quoting and such.
575
576 """
577
578 def test_unquoting(self):
579 # Make sure unquoting of all ASCII values works
580 escape_list = []
581 for num in range(128):
582 given = hexescape(chr(num))
583 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000584 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000585 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000586 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000587 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000588 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000589 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000590 (expect, result))
591 escape_list.append(given)
592 escape_string = ''.join(escape_list)
593 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000594 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000595 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000596 "using unquote(): not all characters escaped: "
597 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000598 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
599 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000600 with support.check_warnings(('', BytesWarning), quiet=True):
601 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000602
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000603 def test_unquoting_badpercent(self):
604 # Test unquoting on bad percent-escapes
605 given = '%xab'
606 expect = given
607 result = urllib.parse.unquote(given)
608 self.assertEqual(expect, result, "using unquote(): %r != %r"
609 % (expect, result))
610 given = '%x'
611 expect = given
612 result = urllib.parse.unquote(given)
613 self.assertEqual(expect, result, "using unquote(): %r != %r"
614 % (expect, result))
615 given = '%'
616 expect = given
617 result = urllib.parse.unquote(given)
618 self.assertEqual(expect, result, "using unquote(): %r != %r"
619 % (expect, result))
620 # unquote_to_bytes
621 given = '%xab'
622 expect = bytes(given, 'ascii')
623 result = urllib.parse.unquote_to_bytes(given)
624 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
625 % (expect, result))
626 given = '%x'
627 expect = bytes(given, 'ascii')
628 result = urllib.parse.unquote_to_bytes(given)
629 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
630 % (expect, result))
631 given = '%'
632 expect = bytes(given, 'ascii')
633 result = urllib.parse.unquote_to_bytes(given)
634 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
635 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000636 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
637 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000638
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000639 def test_unquoting_mixed_case(self):
640 # Test unquoting on mixed-case hex digits in the percent-escapes
641 given = '%Ab%eA'
642 expect = b'\xab\xea'
643 result = urllib.parse.unquote_to_bytes(given)
644 self.assertEqual(expect, result,
645 "using unquote_to_bytes(): %r != %r"
646 % (expect, result))
647
Brett Cannon74bfd702003-04-25 09:39:47 +0000648 def test_unquoting_parts(self):
649 # Make sure unquoting works when have non-quoted characters
650 # interspersed
651 given = 'ab%sd' % hexescape('c')
652 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000653 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000654 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000655 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000656 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000657 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000658 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000659
Brett Cannon74bfd702003-04-25 09:39:47 +0000660 def test_unquoting_plus(self):
661 # Test difference between unquote() and unquote_plus()
662 given = "are+there+spaces..."
663 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000664 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000665 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000666 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000667 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000668 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000669 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000670 "using unquote_plus(): %r != %r" % (expect, result))
671
672 def test_unquote_to_bytes(self):
673 given = 'br%C3%BCckner_sapporo_20050930.doc'
674 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
675 result = urllib.parse.unquote_to_bytes(given)
676 self.assertEqual(expect, result,
677 "using unquote_to_bytes(): %r != %r"
678 % (expect, result))
679 # Test on a string with unescaped non-ASCII characters
680 # (Technically an invalid URI; expect those characters to be UTF-8
681 # encoded).
682 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
683 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
684 self.assertEqual(expect, result,
685 "using unquote_to_bytes(): %r != %r"
686 % (expect, result))
687 # Test with a bytes as input
688 given = b'%A2%D8ab%FF'
689 expect = b'\xa2\xd8ab\xff'
690 result = urllib.parse.unquote_to_bytes(given)
691 self.assertEqual(expect, result,
692 "using unquote_to_bytes(): %r != %r"
693 % (expect, result))
694 # Test with a bytes as input, with unescaped non-ASCII bytes
695 # (Technically an invalid URI; expect those bytes to be preserved)
696 given = b'%A2\xd8ab%FF'
697 expect = b'\xa2\xd8ab\xff'
698 result = urllib.parse.unquote_to_bytes(given)
699 self.assertEqual(expect, result,
700 "using unquote_to_bytes(): %r != %r"
701 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000702
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000703 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000704 # Characters in the Latin-1 range, encoded with UTF-8
705 given = 'br%C3%BCckner_sapporo_20050930.doc'
706 expect = 'br\u00fcckner_sapporo_20050930.doc'
707 result = urllib.parse.unquote(given)
708 self.assertEqual(expect, result,
709 "using unquote(): %r != %r" % (expect, result))
710 # Characters in the Latin-1 range, encoded with None (default)
711 result = urllib.parse.unquote(given, encoding=None, errors=None)
712 self.assertEqual(expect, result,
713 "using unquote(): %r != %r" % (expect, result))
714
715 # Characters in the Latin-1 range, encoded with Latin-1
716 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
717 encoding="latin-1")
718 expect = 'br\u00fcckner_sapporo_20050930.doc'
719 self.assertEqual(expect, result,
720 "using unquote(): %r != %r" % (expect, result))
721
722 # Characters in BMP, encoded with UTF-8
723 given = "%E6%BC%A2%E5%AD%97"
724 expect = "\u6f22\u5b57" # "Kanji"
725 result = urllib.parse.unquote(given)
726 self.assertEqual(expect, result,
727 "using unquote(): %r != %r" % (expect, result))
728
729 # Decode with UTF-8, invalid sequence
730 given = "%F3%B1"
731 expect = "\ufffd" # Replacement character
732 result = urllib.parse.unquote(given)
733 self.assertEqual(expect, result,
734 "using unquote(): %r != %r" % (expect, result))
735
736 # Decode with UTF-8, invalid sequence, replace errors
737 result = urllib.parse.unquote(given, errors="replace")
738 self.assertEqual(expect, result,
739 "using unquote(): %r != %r" % (expect, result))
740
741 # Decode with UTF-8, invalid sequence, ignoring errors
742 given = "%F3%B1"
743 expect = ""
744 result = urllib.parse.unquote(given, errors="ignore")
745 self.assertEqual(expect, result,
746 "using unquote(): %r != %r" % (expect, result))
747
748 # A mix of non-ASCII and percent-encoded characters, UTF-8
749 result = urllib.parse.unquote("\u6f22%C3%BC")
750 expect = '\u6f22\u00fc'
751 self.assertEqual(expect, result,
752 "using unquote(): %r != %r" % (expect, result))
753
754 # A mix of non-ASCII and percent-encoded characters, Latin-1
755 # (Note, the string contains non-Latin-1-representable characters)
756 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
757 expect = '\u6f22\u00fc'
758 self.assertEqual(expect, result,
759 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000760
Brett Cannon74bfd702003-04-25 09:39:47 +0000761class urlencode_Tests(unittest.TestCase):
762 """Tests for urlencode()"""
763
764 def help_inputtype(self, given, test_type):
765 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000766
Brett Cannon74bfd702003-04-25 09:39:47 +0000767 'given' must lead to only the pairs:
768 * 1st, 1
769 * 2nd, 2
770 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000771
Brett Cannon74bfd702003-04-25 09:39:47 +0000772 Test cannot assume anything about order. Docs make no guarantee and
773 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000774
Brett Cannon74bfd702003-04-25 09:39:47 +0000775 """
776 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000777 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000778 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000779 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000780 "testing %s: %s not found in %s" %
781 (test_type, expected, result))
782 self.assertEqual(result.count('&'), 2,
783 "testing %s: expected 2 '&'s; got %s" %
784 (test_type, result.count('&')))
785 amp_location = result.index('&')
786 on_amp_left = result[amp_location - 1]
787 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000788 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000789 "testing %s: '&' not located in proper place in %s" %
790 (test_type, result))
791 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
792 "testing %s: "
793 "unexpected number of characters: %s != %s" %
794 (test_type, len(result), (5 * 3) + 2))
795
796 def test_using_mapping(self):
797 # Test passing in a mapping object as an argument.
798 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
799 "using dict as input type")
800
801 def test_using_sequence(self):
802 # Test passing in a sequence of two-item sequences as an argument.
803 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
804 "using sequence of two-item tuples as input")
805
806 def test_quoting(self):
807 # Make sure keys and values are quoted using quote_plus()
808 given = {"&":"="}
809 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000810 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000811 self.assertEqual(expect, result)
812 given = {"key name":"A bunch of pluses"}
813 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000814 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000815 self.assertEqual(expect, result)
816
817 def test_doseq(self):
818 # Test that passing True for 'doseq' parameter works correctly
819 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000820 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
821 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000822 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000823 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000824 for value in given["sequence"]:
825 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000826 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000827 self.assertEqual(result.count('&'), 2,
828 "Expected 2 '&'s, got %s" % result.count('&'))
829
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000830 def test_empty_sequence(self):
831 self.assertEqual("", urllib.parse.urlencode({}))
832 self.assertEqual("", urllib.parse.urlencode([]))
833
834 def test_nonstring_values(self):
835 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
836 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
837
838 def test_nonstring_seq_values(self):
839 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
840 self.assertEqual("a=None&a=a",
841 urllib.parse.urlencode({"a": [None, "a"]}, True))
842 self.assertEqual("a=a&a=b",
843 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
844
Senthil Kumarandf022da2010-07-03 17:48:22 +0000845 def test_urlencode_encoding(self):
846 # ASCII encoding. Expect %3F with errors="replace'
847 given = (('\u00a0', '\u00c1'),)
848 expect = '%3F=%3F'
849 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
850 self.assertEqual(expect, result)
851
852 # Default is UTF-8 encoding.
853 given = (('\u00a0', '\u00c1'),)
854 expect = '%C2%A0=%C3%81'
855 result = urllib.parse.urlencode(given)
856 self.assertEqual(expect, result)
857
858 # Latin-1 encoding.
859 given = (('\u00a0', '\u00c1'),)
860 expect = '%A0=%C1'
861 result = urllib.parse.urlencode(given, encoding="latin-1")
862 self.assertEqual(expect, result)
863
864 def test_urlencode_encoding_doseq(self):
865 # ASCII Encoding. Expect %3F with errors="replace'
866 given = (('\u00a0', '\u00c1'),)
867 expect = '%3F=%3F'
868 result = urllib.parse.urlencode(given, doseq=True,
869 encoding="ASCII", errors="replace")
870 self.assertEqual(expect, result)
871
872 # ASCII Encoding. On a sequence of values.
873 given = (("\u00a0", (1, "\u00c1")),)
874 expect = '%3F=1&%3F=%3F'
875 result = urllib.parse.urlencode(given, True,
876 encoding="ASCII", errors="replace")
877 self.assertEqual(expect, result)
878
879 # Utf-8
880 given = (("\u00a0", "\u00c1"),)
881 expect = '%C2%A0=%C3%81'
882 result = urllib.parse.urlencode(given, True)
883 self.assertEqual(expect, result)
884
885 given = (("\u00a0", (42, "\u00c1")),)
886 expect = '%C2%A0=42&%C2%A0=%C3%81'
887 result = urllib.parse.urlencode(given, True)
888 self.assertEqual(expect, result)
889
890 # latin-1
891 given = (("\u00a0", "\u00c1"),)
892 expect = '%A0=%C1'
893 result = urllib.parse.urlencode(given, True, encoding="latin-1")
894 self.assertEqual(expect, result)
895
896 given = (("\u00a0", (42, "\u00c1")),)
897 expect = '%A0=42&%A0=%C1'
898 result = urllib.parse.urlencode(given, True, encoding="latin-1")
899 self.assertEqual(expect, result)
900
901 def test_urlencode_bytes(self):
902 given = ((b'\xa0\x24', b'\xc1\x24'),)
903 expect = '%A0%24=%C1%24'
904 result = urllib.parse.urlencode(given)
905 self.assertEqual(expect, result)
906 result = urllib.parse.urlencode(given, True)
907 self.assertEqual(expect, result)
908
909 # Sequence of values
910 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
911 expect = '%A0%24=42&%A0%24=%C1%24'
912 result = urllib.parse.urlencode(given, True)
913 self.assertEqual(expect, result)
914
915 def test_urlencode_encoding_safe_parameter(self):
916
917 # Send '$' (\x24) as safe character
918 # Default utf-8 encoding
919
920 given = ((b'\xa0\x24', b'\xc1\x24'),)
921 result = urllib.parse.urlencode(given, safe=":$")
922 expect = '%A0$=%C1$'
923 self.assertEqual(expect, result)
924
925 given = ((b'\xa0\x24', b'\xc1\x24'),)
926 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
927 expect = '%A0$=%C1$'
928 self.assertEqual(expect, result)
929
930 # Safe parameter in sequence
931 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
932 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
933 result = urllib.parse.urlencode(given, True, safe=":$")
934 self.assertEqual(expect, result)
935
936 # Test all above in latin-1 encoding
937
938 given = ((b'\xa0\x24', b'\xc1\x24'),)
939 result = urllib.parse.urlencode(given, safe=":$",
940 encoding="latin-1")
941 expect = '%A0$=%C1$'
942 self.assertEqual(expect, result)
943
944 given = ((b'\xa0\x24', b'\xc1\x24'),)
945 expect = '%A0$=%C1$'
946 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
947 encoding="latin-1")
948
949 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
950 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
951 result = urllib.parse.urlencode(given, True, safe=":$",
952 encoding="latin-1")
953 self.assertEqual(expect, result)
954
Brett Cannon74bfd702003-04-25 09:39:47 +0000955class Pathname_Tests(unittest.TestCase):
956 """Test pathname2url() and url2pathname()"""
957
958 def test_basic(self):
959 # Make sure simple tests pass
960 expected_path = os.path.join("parts", "of", "a", "path")
961 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000962 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000963 self.assertEqual(expected_url, result,
964 "pathname2url() failed; %s != %s" %
965 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000966 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000967 self.assertEqual(expected_path, result,
968 "url2pathame() failed; %s != %s" %
969 (result, expected_path))
970
971 def test_quoting(self):
972 # Test automatic quoting and unquoting works for pathnam2url() and
973 # url2pathname() respectively
974 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000975 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
976 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000977 self.assertEqual(expect, result,
978 "pathname2url() failed; %s != %s" %
979 (expect, result))
980 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000982 self.assertEqual(expect, result,
983 "url2pathname() failed; %s != %s" %
984 (expect, result))
985 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000986 expect = "%s/using_quote" % urllib.parse.quote("make sure")
987 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000988 self.assertEqual(expect, result,
989 "pathname2url() failed; %s != %s" %
990 (expect, result))
991 given = "make+sure/using_unquote"
992 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000993 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000994 self.assertEqual(expect, result,
995 "url2pathname() failed; %s != %s" %
996 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000997
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000998class Utility_Tests(unittest.TestCase):
999 """Testcase to test the various utility functions in the urllib."""
1000
1001 def test_splitpasswd(self):
1002 """Some of password examples are not sensible, but it is added to
1003 confirming to RFC2617 and addressing issue4675.
1004 """
1005 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1006 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1007 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1008 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1009 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1010 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1011 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1012
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001013
1014class URLopener_Tests(unittest.TestCase):
1015 """Testcase to test the open method of URLopener class."""
1016
1017 def test_quoted_open(self):
1018 class DummyURLopener(urllib.request.URLopener):
1019 def open_spam(self, url):
1020 return url
1021
1022 self.assertEqual(DummyURLopener().open(
1023 'spam://example/ /'),'//example/%20/')
1024
Senthil Kumaran734f0592010-02-20 22:19:04 +00001025 # test the safe characters are not quoted by urlopen
1026 self.assertEqual(DummyURLopener().open(
1027 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1028 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1029
Guido van Rossume7ba4952007-06-06 23:52:48 +00001030# Just commented them out.
1031# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001032# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001033# fail in one of the tests, sometimes in other. I have a linux, and
1034# the tests go ok.
1035# If anybody has one of the problematic enviroments, please help!
1036# . Facundo
1037#
1038# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001039# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001040# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1041# serv.settimeout(3)
1042# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1043# serv.bind(("", 9093))
1044# serv.listen(5)
1045# try:
1046# conn, addr = serv.accept()
1047# conn.send("1 Hola mundo\n")
1048# cantdata = 0
1049# while cantdata < 13:
1050# data = conn.recv(13-cantdata)
1051# cantdata += len(data)
1052# time.sleep(.3)
1053# conn.send("2 No more lines\n")
1054# conn.close()
1055# except socket.timeout:
1056# pass
1057# finally:
1058# serv.close()
1059# evt.set()
1060#
1061# class FTPWrapperTests(unittest.TestCase):
1062#
1063# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001064# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001065# ftplib.FTP.port = 9093
1066# self.evt = threading.Event()
1067# threading.Thread(target=server, args=(self.evt,)).start()
1068# time.sleep(.1)
1069#
1070# def tearDown(self):
1071# self.evt.wait()
1072#
1073# def testBasic(self):
1074# # connects
1075# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001076# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001077#
1078# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001079# # global default timeout is ignored
1080# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001081# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001082# socket.setdefaulttimeout(30)
1083# try:
1084# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1085# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001086# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001087# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001088# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001089#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001090# def testTimeoutDefault(self):
1091# # global default timeout is used
1092# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001093# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001094# socket.setdefaulttimeout(30)
1095# try:
1096# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1097# finally:
1098# socket.setdefaulttimeout(None)
1099# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1100# ftp.close()
1101#
1102# def testTimeoutValue(self):
1103# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1104# timeout=30)
1105# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1106# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001107
Skip Montanaro080c9972001-01-28 21:12:22 +00001108
1109
Brett Cannon74bfd702003-04-25 09:39:47 +00001110def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001111 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001112 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001113 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001114 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001115 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001116 QuotingTests,
1117 UnquotingTests,
1118 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001119 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001120 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001121 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001122 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001123 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001124
1125
1126
1127if __name__ == '__main__':
1128 test_main()