blob: 074d833ca47459d8855cda3b9e26b77d8a9c2bc9 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
Georg Brandl24420152008-05-26 16:32:26 +00005import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00006import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00007import io
Brett Cannon74bfd702003-04-25 09:39:47 +00008import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00009from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000010import os
Georg Brandl5a650a22005-08-26 08:51:34 +000011import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000012
Brett Cannon74bfd702003-04-25 09:39:47 +000013def hexescape(char):
14 """Escape char as RFC 2396 specifies"""
15 hex_repr = hex(ord(char))[2:].upper()
16 if len(hex_repr) == 1:
17 hex_repr = "0%s" % hex_repr
18 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020# Shortcut for testing FancyURLopener
21_urlopener = None
22def urlopen(url, data=None, proxies=None):
23 """urlopen(url [, data]) -> open file-like object"""
24 global _urlopener
25 if proxies is not None:
26 opener = urllib.request.FancyURLopener(proxies=proxies)
27 elif not _urlopener:
28 opener = urllib.request.FancyURLopener()
29 _urlopener = opener
30 else:
31 opener = _urlopener
32 if data is None:
33 return opener.open(url)
34 else:
35 return opener.open(url, data)
36
Brett Cannon74bfd702003-04-25 09:39:47 +000037class urlopen_FileTests(unittest.TestCase):
38 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000039
Brett Cannon74bfd702003-04-25 09:39:47 +000040 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000041 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000042
Brett Cannon74bfd702003-04-25 09:39:47 +000043 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 # Create a temp file to use for testing
47 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
48 "ascii")
49 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000050 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000051 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000052 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000054 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000056
Brett Cannon74bfd702003-04-25 09:39:47 +000057 def tearDown(self):
58 """Shut down the open object"""
59 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000060 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000061
Brett Cannon74bfd702003-04-25 09:39:47 +000062 def test_interface(self):
63 # Make sure object returned by urlopen() has the specified methods
64 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000065 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000066 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000067 "object returned by urlopen() lacks %s attribute" %
68 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000069
Brett Cannon74bfd702003-04-25 09:39:47 +000070 def test_read(self):
71 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000072
Brett Cannon74bfd702003-04-25 09:39:47 +000073 def test_readline(self):
74 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000075 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000076 "calling readline() after exhausting the file did not"
77 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def test_readlines(self):
80 lines_list = self.returned_obj.readlines()
81 self.assertEqual(len(lines_list), 1,
82 "readlines() returned the wrong number of lines")
83 self.assertEqual(lines_list[0], self.text,
84 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000085
Brett Cannon74bfd702003-04-25 09:39:47 +000086 def test_fileno(self):
87 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000088 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000089 self.assertEqual(os.read(file_num, len(self.text)), self.text,
90 "Reading on the file descriptor returned by fileno() "
91 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000092
Brett Cannon74bfd702003-04-25 09:39:47 +000093 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080094 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000095 # by the tearDown() method for the test
96 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000097
Brett Cannon74bfd702003-04-25 09:39:47 +000098 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +000099 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 def test_geturl(self):
102 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000103
Christian Heimes9bd667a2008-01-20 15:14:11 +0000104 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000105 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106
Brett Cannon74bfd702003-04-25 09:39:47 +0000107 def test_iter(self):
108 # Test iterator
109 # Don't need to count number of iterations since test would fail the
110 # instant it returned anything beyond the first line from the
111 # comparison
112 for line in self.returned_obj.__iter__():
113 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000114
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000115class ProxyTests(unittest.TestCase):
116
117 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000118 # Records changes to env vars
119 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000120 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000121 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000122 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000123 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000124
125 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000126 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000127 self.env.__exit__()
128 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129
130 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000131 self.env.set('NO_PROXY', 'localhost')
132 proxies = urllib.request.getproxies_environment()
133 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000134 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000135
136
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000137class urlopen_HttpTests(unittest.TestCase):
138 """Test urlopen() opening a fake http connection."""
139
140 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000141 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000142 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000143 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000144 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000145 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000146 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000147 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000148 if self.closed: return b""
149 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000150 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000151 if self.closed: return b""
152 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000153 def close(self):
154 self.io_refs -= 1
155 if self.io_refs == 0:
156 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000157 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000158 def connect(self):
159 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000160 self._connection_class = http.client.HTTPConnection
161 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000162
163 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000164 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000165
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000166 def check_read(self, ver):
167 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000168 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000169 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000170 self.assertEqual(fp.readline(), b"Hello!")
171 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000172 self.assertEqual(fp.geturl(), 'http://python.org/')
173 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000174 finally:
175 self.unfakehttp()
176
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800177 def test_willclose(self):
178 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
179 resp = urlopen("http://www.python.org")
180 self.assertTrue(resp.fp.will_close)
181
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000182 def test_read_0_9(self):
183 # "0.9" response accepted (but not "simple responses" without
184 # a status line)
185 self.check_read(b"0.9")
186
187 def test_read_1_0(self):
188 self.check_read(b"1.0")
189
190 def test_read_1_1(self):
191 self.check_read(b"1.1")
192
Christian Heimes57dddfb2008-01-02 18:30:52 +0000193 def test_read_bogus(self):
194 # urlopen() should raise IOError for many error codes.
195 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
196Date: Wed, 02 Jan 2008 03:03:54 GMT
197Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
198Connection: close
199Content-Type: text/html; charset=iso-8859-1
200''')
201 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000202 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000203 finally:
204 self.unfakehttp()
205
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000207 # urlopen() raises IOError if the underlying socket does not send any
208 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000209 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000210 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000211 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000212 finally:
213 self.unfakehttp()
214
Senthil Kumarande0eb242010-08-01 17:53:37 +0000215 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000216 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000217 try:
218 fp = urlopen("http://user:pass@python.org/")
219 self.assertEqual(fp.readline(), b"Hello!")
220 self.assertEqual(fp.readline(), b"")
221 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
222 self.assertEqual(fp.getcode(), 200)
223 finally:
224 self.unfakehttp()
225
Brett Cannon19691362003-04-29 05:08:06 +0000226class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000227 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000228
Brett Cannon19691362003-04-29 05:08:06 +0000229 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000230 # Create a list of temporary files. Each item in the list is a file
231 # name (absolute path or relative to the current working directory).
232 # All files in this list will be deleted in the tearDown method. Note,
233 # this only helps to makes sure temporary files get deleted, but it
234 # does nothing about trying to close files that may still be open. It
235 # is the responsibility of the developer to properly close files even
236 # when exceptional conditions occur.
237 self.tempFiles = []
238
Brett Cannon19691362003-04-29 05:08:06 +0000239 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000240 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000241 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000242 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000243 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000244 FILE.write(self.text)
245 FILE.close()
246 finally:
247 try: FILE.close()
248 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000249
250 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000251 # Delete the temporary files.
252 for each in self.tempFiles:
253 try: os.remove(each)
254 except: pass
255
256 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000257 filePath = os.path.abspath(filePath)
258 try:
259 filePath.encode("utf8")
260 except UnicodeEncodeError:
261 raise unittest.SkipTest("filePath is not encodable to utf8")
262 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000263
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000264 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000265 """Creates a new temporary file containing the specified data,
266 registers the file for deletion during the test fixture tear down, and
267 returns the absolute path of the file."""
268
269 newFd, newFilePath = tempfile.mkstemp()
270 try:
271 self.registerFileForCleanUp(newFilePath)
272 newFile = os.fdopen(newFd, "wb")
273 newFile.write(data)
274 newFile.close()
275 finally:
276 try: newFile.close()
277 except: pass
278 return newFilePath
279
280 def registerFileForCleanUp(self, fileName):
281 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000282
283 def test_basic(self):
284 # Make sure that a local file just gets its own location returned and
285 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000286 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000287 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000288 self.assertIsInstance(result[1], email.message.Message,
289 "did not get a email.message.Message instance "
290 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000291
292 def test_copy(self):
293 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000294 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000295 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000296 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000297 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000298 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000299 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000300 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000301 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000302 try:
303 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000304 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000305 finally:
306 try: FILE.close()
307 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000308 self.assertEqual(self.text, text)
309
310 def test_reporthook(self):
311 # Make sure that the reporthook works.
312 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000313 self.assertIsInstance(count, int)
314 self.assertIsInstance(block_size, int)
315 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000316 self.assertEqual(count, count_holder[0])
317 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000318 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000319 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000320 urllib.request.urlretrieve(
321 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000322 second_temp, hooktester)
323
324 def test_reporthook_0_bytes(self):
325 # Test on zero length file. Should call reporthook only 1 time.
326 report = []
327 def hooktester(count, block_size, total_size, _report=report):
328 _report.append((count, block_size, total_size))
329 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000331 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000332 self.assertEqual(len(report), 1)
333 self.assertEqual(report[0][2], 0)
334
335 def test_reporthook_5_bytes(self):
336 # Test on 5 byte file. Should call reporthook only 2 times (once when
337 # the "network connection" is established and once when the block is
338 # read). Since the block size is 8192 bytes, only one block read is
339 # required to read the entire file.
340 report = []
341 def hooktester(count, block_size, total_size, _report=report):
342 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000343 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000344 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000345 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000346 self.assertEqual(len(report), 2)
347 self.assertEqual(report[0][1], 8192)
348 self.assertEqual(report[0][2], 5)
349
350 def test_reporthook_8193_bytes(self):
351 # Test on 8193 byte file. Should call reporthook only 3 times (once
352 # when the "network connection" is established, once for the next 8192
353 # bytes, and once for the last byte).
354 report = []
355 def hooktester(count, block_size, total_size, _report=report):
356 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000357 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000358 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000359 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000360 self.assertEqual(len(report), 3)
361 self.assertEqual(report[0][1], 8192)
362 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000363
Brett Cannon74bfd702003-04-25 09:39:47 +0000364class QuotingTests(unittest.TestCase):
365 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000366
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000367 According to RFC 2396 (Uniform Resource Identifiers), to escape a
368 character you write it as '%' + <2 character US-ASCII hex value>.
369 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
370 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000371
372 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000373
Brett Cannon74bfd702003-04-25 09:39:47 +0000374 Reserved characters : ";/?:@&=+$,"
375 Have special meaning in URIs and must be escaped if not being used for
376 their special meaning
377 Data characters : letters, digits, and "-_.!~*'()"
378 Unreserved and do not need to be escaped; can be, though, if desired
379 Control characters : 0x00 - 0x1F, 0x7F
380 Have no use in URIs so must be escaped
381 space : 0x20
382 Must be escaped
383 Delimiters : '<>#%"'
384 Must be escaped
385 Unwise : "{}|\^[]`"
386 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000387
Brett Cannon74bfd702003-04-25 09:39:47 +0000388 """
389
390 def test_never_quote(self):
391 # Make sure quote() does not quote letters, digits, and "_,.-"
392 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
393 "abcdefghijklmnopqrstuvwxyz",
394 "0123456789",
395 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000396 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000397 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000398 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000399 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000400 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000401 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000402
403 def test_default_safe(self):
404 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000405 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000406
407 def test_safe(self):
408 # Test setting 'safe' parameter does what it should do
409 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000410 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000411 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000412 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000413 result = urllib.parse.quote_plus(quote_by_default,
414 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000415 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000416 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000417 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000418 # Safe expressed as bytes rather than str
419 result = urllib.parse.quote(quote_by_default, safe=b"<>")
420 self.assertEqual(quote_by_default, result,
421 "using quote(): %r != %r" % (quote_by_default, result))
422 # "Safe" non-ASCII characters should have no effect
423 # (Since URIs are not allowed to have non-ASCII characters)
424 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
425 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
426 self.assertEqual(expect, result,
427 "using quote(): %r != %r" %
428 (expect, result))
429 # Same as above, but using a bytes rather than str
430 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
431 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
432 self.assertEqual(expect, result,
433 "using quote(): %r != %r" %
434 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000435
436 def test_default_quoting(self):
437 # Make sure all characters that should be quoted are by default sans
438 # space (separate test for that).
439 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
440 should_quote.append('<>#%"{}|\^[]`')
441 should_quote.append(chr(127)) # For 0x7F
442 should_quote = ''.join(should_quote)
443 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000444 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000445 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000446 "using quote(): "
447 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000448 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000449 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000450 self.assertEqual(hexescape(char), result,
451 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000452 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000453 (char, hexescape(char), result))
454 del should_quote
455 partial_quote = "ab[]cd"
456 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000457 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000458 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000459 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000460 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000461 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000462
463 def test_quoting_space(self):
464 # Make sure quote() and quote_plus() handle spaces as specified in
465 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000466 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000467 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000468 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000469 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000470 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000471 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000472 given = "a b cd e f"
473 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000474 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000475 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000476 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000477 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000478 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000479 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000480 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000481
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000482 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000483 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000484 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000485 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000486 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000487 # Test with bytes
488 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
489 'alpha%2Bbeta+gamma')
490 # Test with safe bytes
491 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
492 'alpha+beta+gamma')
493
494 def test_quote_bytes(self):
495 # Bytes should quote directly to percent-encoded values
496 given = b"\xa2\xd8ab\xff"
497 expect = "%A2%D8ab%FF"
498 result = urllib.parse.quote(given)
499 self.assertEqual(expect, result,
500 "using quote(): %r != %r" % (expect, result))
501 # Encoding argument should raise type error on bytes input
502 self.assertRaises(TypeError, urllib.parse.quote, given,
503 encoding="latin-1")
504 # quote_from_bytes should work the same
505 result = urllib.parse.quote_from_bytes(given)
506 self.assertEqual(expect, result,
507 "using quote_from_bytes(): %r != %r"
508 % (expect, result))
509
510 def test_quote_with_unicode(self):
511 # Characters in Latin-1 range, encoded by default in UTF-8
512 given = "\xa2\xd8ab\xff"
513 expect = "%C2%A2%C3%98ab%C3%BF"
514 result = urllib.parse.quote(given)
515 self.assertEqual(expect, result,
516 "using quote(): %r != %r" % (expect, result))
517 # Characters in Latin-1 range, encoded by with None (default)
518 result = urllib.parse.quote(given, encoding=None, errors=None)
519 self.assertEqual(expect, result,
520 "using quote(): %r != %r" % (expect, result))
521 # Characters in Latin-1 range, encoded with Latin-1
522 given = "\xa2\xd8ab\xff"
523 expect = "%A2%D8ab%FF"
524 result = urllib.parse.quote(given, encoding="latin-1")
525 self.assertEqual(expect, result,
526 "using quote(): %r != %r" % (expect, result))
527 # Characters in BMP, encoded by default in UTF-8
528 given = "\u6f22\u5b57" # "Kanji"
529 expect = "%E6%BC%A2%E5%AD%97"
530 result = urllib.parse.quote(given)
531 self.assertEqual(expect, result,
532 "using quote(): %r != %r" % (expect, result))
533 # Characters in BMP, encoded with Latin-1
534 given = "\u6f22\u5b57"
535 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
536 encoding="latin-1")
537 # Characters in BMP, encoded with Latin-1, with replace error handling
538 given = "\u6f22\u5b57"
539 expect = "%3F%3F" # "??"
540 result = urllib.parse.quote(given, encoding="latin-1",
541 errors="replace")
542 self.assertEqual(expect, result,
543 "using quote(): %r != %r" % (expect, result))
544 # Characters in BMP, Latin-1, with xmlcharref error handling
545 given = "\u6f22\u5b57"
546 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
547 result = urllib.parse.quote(given, encoding="latin-1",
548 errors="xmlcharrefreplace")
549 self.assertEqual(expect, result,
550 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000551
Georg Brandlfaf41492009-05-26 18:31:11 +0000552 def test_quote_plus_with_unicode(self):
553 # Encoding (latin-1) test for quote_plus
554 given = "\xa2\xd8 \xff"
555 expect = "%A2%D8+%FF"
556 result = urllib.parse.quote_plus(given, encoding="latin-1")
557 self.assertEqual(expect, result,
558 "using quote_plus(): %r != %r" % (expect, result))
559 # Errors test for quote_plus
560 given = "ab\u6f22\u5b57 cd"
561 expect = "ab%3F%3F+cd"
562 result = urllib.parse.quote_plus(given, encoding="latin-1",
563 errors="replace")
564 self.assertEqual(expect, result,
565 "using quote_plus(): %r != %r" % (expect, result))
566
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000567
Brett Cannon74bfd702003-04-25 09:39:47 +0000568class UnquotingTests(unittest.TestCase):
569 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000570
Brett Cannon74bfd702003-04-25 09:39:47 +0000571 See the doc string for quoting_Tests for details on quoting and such.
572
573 """
574
575 def test_unquoting(self):
576 # Make sure unquoting of all ASCII values works
577 escape_list = []
578 for num in range(128):
579 given = hexescape(chr(num))
580 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000581 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000582 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000583 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000584 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000585 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000586 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000587 (expect, result))
588 escape_list.append(given)
589 escape_string = ''.join(escape_list)
590 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000591 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000592 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000593 "using unquote(): not all characters escaped: "
594 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000595 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
596 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000597 with support.check_warnings(('', BytesWarning), quiet=True):
598 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000599
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000600 def test_unquoting_badpercent(self):
601 # Test unquoting on bad percent-escapes
602 given = '%xab'
603 expect = given
604 result = urllib.parse.unquote(given)
605 self.assertEqual(expect, result, "using unquote(): %r != %r"
606 % (expect, result))
607 given = '%x'
608 expect = given
609 result = urllib.parse.unquote(given)
610 self.assertEqual(expect, result, "using unquote(): %r != %r"
611 % (expect, result))
612 given = '%'
613 expect = given
614 result = urllib.parse.unquote(given)
615 self.assertEqual(expect, result, "using unquote(): %r != %r"
616 % (expect, result))
617 # unquote_to_bytes
618 given = '%xab'
619 expect = bytes(given, 'ascii')
620 result = urllib.parse.unquote_to_bytes(given)
621 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
622 % (expect, result))
623 given = '%x'
624 expect = bytes(given, 'ascii')
625 result = urllib.parse.unquote_to_bytes(given)
626 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
627 % (expect, result))
628 given = '%'
629 expect = bytes(given, 'ascii')
630 result = urllib.parse.unquote_to_bytes(given)
631 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
632 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000633 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
634 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000635
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000636 def test_unquoting_mixed_case(self):
637 # Test unquoting on mixed-case hex digits in the percent-escapes
638 given = '%Ab%eA'
639 expect = b'\xab\xea'
640 result = urllib.parse.unquote_to_bytes(given)
641 self.assertEqual(expect, result,
642 "using unquote_to_bytes(): %r != %r"
643 % (expect, result))
644
Brett Cannon74bfd702003-04-25 09:39:47 +0000645 def test_unquoting_parts(self):
646 # Make sure unquoting works when have non-quoted characters
647 # interspersed
648 given = 'ab%sd' % hexescape('c')
649 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000650 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000651 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000652 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000653 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000654 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000655 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000656
Brett Cannon74bfd702003-04-25 09:39:47 +0000657 def test_unquoting_plus(self):
658 # Test difference between unquote() and unquote_plus()
659 given = "are+there+spaces..."
660 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000661 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000662 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000663 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000664 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000665 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000666 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000667 "using unquote_plus(): %r != %r" % (expect, result))
668
669 def test_unquote_to_bytes(self):
670 given = 'br%C3%BCckner_sapporo_20050930.doc'
671 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
672 result = urllib.parse.unquote_to_bytes(given)
673 self.assertEqual(expect, result,
674 "using unquote_to_bytes(): %r != %r"
675 % (expect, result))
676 # Test on a string with unescaped non-ASCII characters
677 # (Technically an invalid URI; expect those characters to be UTF-8
678 # encoded).
679 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
680 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
681 self.assertEqual(expect, result,
682 "using unquote_to_bytes(): %r != %r"
683 % (expect, result))
684 # Test with a bytes as input
685 given = b'%A2%D8ab%FF'
686 expect = b'\xa2\xd8ab\xff'
687 result = urllib.parse.unquote_to_bytes(given)
688 self.assertEqual(expect, result,
689 "using unquote_to_bytes(): %r != %r"
690 % (expect, result))
691 # Test with a bytes as input, with unescaped non-ASCII bytes
692 # (Technically an invalid URI; expect those bytes to be preserved)
693 given = b'%A2\xd8ab%FF'
694 expect = b'\xa2\xd8ab\xff'
695 result = urllib.parse.unquote_to_bytes(given)
696 self.assertEqual(expect, result,
697 "using unquote_to_bytes(): %r != %r"
698 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000699
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000700 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000701 # Characters in the Latin-1 range, encoded with UTF-8
702 given = 'br%C3%BCckner_sapporo_20050930.doc'
703 expect = 'br\u00fcckner_sapporo_20050930.doc'
704 result = urllib.parse.unquote(given)
705 self.assertEqual(expect, result,
706 "using unquote(): %r != %r" % (expect, result))
707 # Characters in the Latin-1 range, encoded with None (default)
708 result = urllib.parse.unquote(given, encoding=None, errors=None)
709 self.assertEqual(expect, result,
710 "using unquote(): %r != %r" % (expect, result))
711
712 # Characters in the Latin-1 range, encoded with Latin-1
713 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
714 encoding="latin-1")
715 expect = 'br\u00fcckner_sapporo_20050930.doc'
716 self.assertEqual(expect, result,
717 "using unquote(): %r != %r" % (expect, result))
718
719 # Characters in BMP, encoded with UTF-8
720 given = "%E6%BC%A2%E5%AD%97"
721 expect = "\u6f22\u5b57" # "Kanji"
722 result = urllib.parse.unquote(given)
723 self.assertEqual(expect, result,
724 "using unquote(): %r != %r" % (expect, result))
725
726 # Decode with UTF-8, invalid sequence
727 given = "%F3%B1"
728 expect = "\ufffd" # Replacement character
729 result = urllib.parse.unquote(given)
730 self.assertEqual(expect, result,
731 "using unquote(): %r != %r" % (expect, result))
732
733 # Decode with UTF-8, invalid sequence, replace errors
734 result = urllib.parse.unquote(given, errors="replace")
735 self.assertEqual(expect, result,
736 "using unquote(): %r != %r" % (expect, result))
737
738 # Decode with UTF-8, invalid sequence, ignoring errors
739 given = "%F3%B1"
740 expect = ""
741 result = urllib.parse.unquote(given, errors="ignore")
742 self.assertEqual(expect, result,
743 "using unquote(): %r != %r" % (expect, result))
744
745 # A mix of non-ASCII and percent-encoded characters, UTF-8
746 result = urllib.parse.unquote("\u6f22%C3%BC")
747 expect = '\u6f22\u00fc'
748 self.assertEqual(expect, result,
749 "using unquote(): %r != %r" % (expect, result))
750
751 # A mix of non-ASCII and percent-encoded characters, Latin-1
752 # (Note, the string contains non-Latin-1-representable characters)
753 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
754 expect = '\u6f22\u00fc'
755 self.assertEqual(expect, result,
756 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000757
Brett Cannon74bfd702003-04-25 09:39:47 +0000758class urlencode_Tests(unittest.TestCase):
759 """Tests for urlencode()"""
760
761 def help_inputtype(self, given, test_type):
762 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000763
Brett Cannon74bfd702003-04-25 09:39:47 +0000764 'given' must lead to only the pairs:
765 * 1st, 1
766 * 2nd, 2
767 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000768
Brett Cannon74bfd702003-04-25 09:39:47 +0000769 Test cannot assume anything about order. Docs make no guarantee and
770 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000771
Brett Cannon74bfd702003-04-25 09:39:47 +0000772 """
773 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000774 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000775 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000776 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000777 "testing %s: %s not found in %s" %
778 (test_type, expected, result))
779 self.assertEqual(result.count('&'), 2,
780 "testing %s: expected 2 '&'s; got %s" %
781 (test_type, result.count('&')))
782 amp_location = result.index('&')
783 on_amp_left = result[amp_location - 1]
784 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000785 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000786 "testing %s: '&' not located in proper place in %s" %
787 (test_type, result))
788 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
789 "testing %s: "
790 "unexpected number of characters: %s != %s" %
791 (test_type, len(result), (5 * 3) + 2))
792
793 def test_using_mapping(self):
794 # Test passing in a mapping object as an argument.
795 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
796 "using dict as input type")
797
798 def test_using_sequence(self):
799 # Test passing in a sequence of two-item sequences as an argument.
800 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
801 "using sequence of two-item tuples as input")
802
803 def test_quoting(self):
804 # Make sure keys and values are quoted using quote_plus()
805 given = {"&":"="}
806 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000808 self.assertEqual(expect, result)
809 given = {"key name":"A bunch of pluses"}
810 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000811 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000812 self.assertEqual(expect, result)
813
814 def test_doseq(self):
815 # Test that passing True for 'doseq' parameter works correctly
816 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000817 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
818 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000819 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000820 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000821 for value in given["sequence"]:
822 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000823 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000824 self.assertEqual(result.count('&'), 2,
825 "Expected 2 '&'s, got %s" % result.count('&'))
826
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000827 def test_empty_sequence(self):
828 self.assertEqual("", urllib.parse.urlencode({}))
829 self.assertEqual("", urllib.parse.urlencode([]))
830
831 def test_nonstring_values(self):
832 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
833 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
834
835 def test_nonstring_seq_values(self):
836 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
837 self.assertEqual("a=None&a=a",
838 urllib.parse.urlencode({"a": [None, "a"]}, True))
839 self.assertEqual("a=a&a=b",
840 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
841
Senthil Kumarandf022da2010-07-03 17:48:22 +0000842 def test_urlencode_encoding(self):
843 # ASCII encoding. Expect %3F with errors="replace'
844 given = (('\u00a0', '\u00c1'),)
845 expect = '%3F=%3F'
846 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
847 self.assertEqual(expect, result)
848
849 # Default is UTF-8 encoding.
850 given = (('\u00a0', '\u00c1'),)
851 expect = '%C2%A0=%C3%81'
852 result = urllib.parse.urlencode(given)
853 self.assertEqual(expect, result)
854
855 # Latin-1 encoding.
856 given = (('\u00a0', '\u00c1'),)
857 expect = '%A0=%C1'
858 result = urllib.parse.urlencode(given, encoding="latin-1")
859 self.assertEqual(expect, result)
860
861 def test_urlencode_encoding_doseq(self):
862 # ASCII Encoding. Expect %3F with errors="replace'
863 given = (('\u00a0', '\u00c1'),)
864 expect = '%3F=%3F'
865 result = urllib.parse.urlencode(given, doseq=True,
866 encoding="ASCII", errors="replace")
867 self.assertEqual(expect, result)
868
869 # ASCII Encoding. On a sequence of values.
870 given = (("\u00a0", (1, "\u00c1")),)
871 expect = '%3F=1&%3F=%3F'
872 result = urllib.parse.urlencode(given, True,
873 encoding="ASCII", errors="replace")
874 self.assertEqual(expect, result)
875
876 # Utf-8
877 given = (("\u00a0", "\u00c1"),)
878 expect = '%C2%A0=%C3%81'
879 result = urllib.parse.urlencode(given, True)
880 self.assertEqual(expect, result)
881
882 given = (("\u00a0", (42, "\u00c1")),)
883 expect = '%C2%A0=42&%C2%A0=%C3%81'
884 result = urllib.parse.urlencode(given, True)
885 self.assertEqual(expect, result)
886
887 # latin-1
888 given = (("\u00a0", "\u00c1"),)
889 expect = '%A0=%C1'
890 result = urllib.parse.urlencode(given, True, encoding="latin-1")
891 self.assertEqual(expect, result)
892
893 given = (("\u00a0", (42, "\u00c1")),)
894 expect = '%A0=42&%A0=%C1'
895 result = urllib.parse.urlencode(given, True, encoding="latin-1")
896 self.assertEqual(expect, result)
897
898 def test_urlencode_bytes(self):
899 given = ((b'\xa0\x24', b'\xc1\x24'),)
900 expect = '%A0%24=%C1%24'
901 result = urllib.parse.urlencode(given)
902 self.assertEqual(expect, result)
903 result = urllib.parse.urlencode(given, True)
904 self.assertEqual(expect, result)
905
906 # Sequence of values
907 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
908 expect = '%A0%24=42&%A0%24=%C1%24'
909 result = urllib.parse.urlencode(given, True)
910 self.assertEqual(expect, result)
911
912 def test_urlencode_encoding_safe_parameter(self):
913
914 # Send '$' (\x24) as safe character
915 # Default utf-8 encoding
916
917 given = ((b'\xa0\x24', b'\xc1\x24'),)
918 result = urllib.parse.urlencode(given, safe=":$")
919 expect = '%A0$=%C1$'
920 self.assertEqual(expect, result)
921
922 given = ((b'\xa0\x24', b'\xc1\x24'),)
923 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
924 expect = '%A0$=%C1$'
925 self.assertEqual(expect, result)
926
927 # Safe parameter in sequence
928 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
929 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
930 result = urllib.parse.urlencode(given, True, safe=":$")
931 self.assertEqual(expect, result)
932
933 # Test all above in latin-1 encoding
934
935 given = ((b'\xa0\x24', b'\xc1\x24'),)
936 result = urllib.parse.urlencode(given, safe=":$",
937 encoding="latin-1")
938 expect = '%A0$=%C1$'
939 self.assertEqual(expect, result)
940
941 given = ((b'\xa0\x24', b'\xc1\x24'),)
942 expect = '%A0$=%C1$'
943 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
944 encoding="latin-1")
945
946 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
947 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
948 result = urllib.parse.urlencode(given, True, safe=":$",
949 encoding="latin-1")
950 self.assertEqual(expect, result)
951
Brett Cannon74bfd702003-04-25 09:39:47 +0000952class Pathname_Tests(unittest.TestCase):
953 """Test pathname2url() and url2pathname()"""
954
955 def test_basic(self):
956 # Make sure simple tests pass
957 expected_path = os.path.join("parts", "of", "a", "path")
958 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000959 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000960 self.assertEqual(expected_url, result,
961 "pathname2url() failed; %s != %s" %
962 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000963 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000964 self.assertEqual(expected_path, result,
965 "url2pathame() failed; %s != %s" %
966 (result, expected_path))
967
968 def test_quoting(self):
969 # Test automatic quoting and unquoting works for pathnam2url() and
970 # url2pathname() respectively
971 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000972 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
973 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000974 self.assertEqual(expect, result,
975 "pathname2url() failed; %s != %s" %
976 (expect, result))
977 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000978 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000979 self.assertEqual(expect, result,
980 "url2pathname() failed; %s != %s" %
981 (expect, result))
982 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000983 expect = "%s/using_quote" % urllib.parse.quote("make sure")
984 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000985 self.assertEqual(expect, result,
986 "pathname2url() failed; %s != %s" %
987 (expect, result))
988 given = "make+sure/using_unquote"
989 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000990 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000991 self.assertEqual(expect, result,
992 "url2pathname() failed; %s != %s" %
993 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000994
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000995class Utility_Tests(unittest.TestCase):
996 """Testcase to test the various utility functions in the urllib."""
997
998 def test_splitpasswd(self):
999 """Some of password examples are not sensible, but it is added to
1000 confirming to RFC2617 and addressing issue4675.
1001 """
1002 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1003 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1004 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1005 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1006 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1007 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1008 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1009
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001010
1011class URLopener_Tests(unittest.TestCase):
1012 """Testcase to test the open method of URLopener class."""
1013
1014 def test_quoted_open(self):
1015 class DummyURLopener(urllib.request.URLopener):
1016 def open_spam(self, url):
1017 return url
1018
1019 self.assertEqual(DummyURLopener().open(
1020 'spam://example/ /'),'//example/%20/')
1021
Senthil Kumaran734f0592010-02-20 22:19:04 +00001022 # test the safe characters are not quoted by urlopen
1023 self.assertEqual(DummyURLopener().open(
1024 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1025 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1026
Guido van Rossume7ba4952007-06-06 23:52:48 +00001027# Just commented them out.
1028# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001029# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001030# fail in one of the tests, sometimes in other. I have a linux, and
1031# the tests go ok.
1032# If anybody has one of the problematic enviroments, please help!
1033# . Facundo
1034#
1035# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001036# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001037# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1038# serv.settimeout(3)
1039# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1040# serv.bind(("", 9093))
1041# serv.listen(5)
1042# try:
1043# conn, addr = serv.accept()
1044# conn.send("1 Hola mundo\n")
1045# cantdata = 0
1046# while cantdata < 13:
1047# data = conn.recv(13-cantdata)
1048# cantdata += len(data)
1049# time.sleep(.3)
1050# conn.send("2 No more lines\n")
1051# conn.close()
1052# except socket.timeout:
1053# pass
1054# finally:
1055# serv.close()
1056# evt.set()
1057#
1058# class FTPWrapperTests(unittest.TestCase):
1059#
1060# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001061# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001062# ftplib.FTP.port = 9093
1063# self.evt = threading.Event()
1064# threading.Thread(target=server, args=(self.evt,)).start()
1065# time.sleep(.1)
1066#
1067# def tearDown(self):
1068# self.evt.wait()
1069#
1070# def testBasic(self):
1071# # connects
1072# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001073# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001074#
1075# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001076# # global default timeout is ignored
1077# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001078# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001079# socket.setdefaulttimeout(30)
1080# try:
1081# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1082# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001083# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001084# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001085# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001086#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001087# def testTimeoutDefault(self):
1088# # global default timeout is used
1089# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001090# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001091# socket.setdefaulttimeout(30)
1092# try:
1093# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1094# finally:
1095# socket.setdefaulttimeout(None)
1096# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1097# ftp.close()
1098#
1099# def testTimeoutValue(self):
1100# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1101# timeout=30)
1102# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1103# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001104
Skip Montanaro080c9972001-01-28 21:12:22 +00001105
1106
Brett Cannon74bfd702003-04-25 09:39:47 +00001107def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001108 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001109 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001110 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001111 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001112 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001113 QuotingTests,
1114 UnquotingTests,
1115 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001116 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001117 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001118 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001119 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001120 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001121
1122
1123
1124if __name__ == '__main__':
1125 test_main()