blob: 482acc1c0f224d7fa1f52025ce3e039426053d63 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Florent Xicluna99e472e2010-08-14 23:12:27 +000014import warnings
Georg Brandl2daf6ae2012-02-20 19:54:16 +010015import collections
Jeremy Hylton6102e292000-08-31 15:48:10 +000016
Brett Cannon74bfd702003-04-25 09:39:47 +000017def hexescape(char):
18 """Escape char as RFC 2396 specifies"""
19 hex_repr = hex(ord(char))[2:].upper()
20 if len(hex_repr) == 1:
21 hex_repr = "0%s" % hex_repr
22 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000023
Jeremy Hylton1afc1692008-06-18 20:49:58 +000024# Shortcut for testing FancyURLopener
25_urlopener = None
26def urlopen(url, data=None, proxies=None):
27 """urlopen(url [, data]) -> open file-like object"""
28 global _urlopener
29 if proxies is not None:
30 opener = urllib.request.FancyURLopener(proxies=proxies)
31 elif not _urlopener:
32 opener = urllib.request.FancyURLopener()
33 _urlopener = opener
34 else:
35 opener = _urlopener
36 if data is None:
37 return opener.open(url)
38 else:
39 return opener.open(url, data)
40
Brett Cannon74bfd702003-04-25 09:39:47 +000041class urlopen_FileTests(unittest.TestCase):
42 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000043
Brett Cannon74bfd702003-04-25 09:39:47 +000044 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000045 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000046
Brett Cannon74bfd702003-04-25 09:39:47 +000047 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000048
Brett Cannon74bfd702003-04-25 09:39:47 +000049 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050 # Create a temp file to use for testing
51 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
52 "ascii")
53 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000054 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000056 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000058 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000060
Brett Cannon74bfd702003-04-25 09:39:47 +000061 def tearDown(self):
62 """Shut down the open object"""
63 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000064 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000065
Brett Cannon74bfd702003-04-25 09:39:47 +000066 def test_interface(self):
67 # Make sure object returned by urlopen() has the specified methods
68 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000069 "close", "info", "geturl", "getcode", "__iter__"):
Georg Brandlab91fde2009-08-13 08:51:18 +000070 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000071 "object returned by urlopen() lacks %s attribute" %
72 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000073
Brett Cannon74bfd702003-04-25 09:39:47 +000074 def test_read(self):
75 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000076
Brett Cannon74bfd702003-04-25 09:39:47 +000077 def test_readline(self):
78 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000079 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000080 "calling readline() after exhausting the file did not"
81 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000082
Brett Cannon74bfd702003-04-25 09:39:47 +000083 def test_readlines(self):
84 lines_list = self.returned_obj.readlines()
85 self.assertEqual(len(lines_list), 1,
86 "readlines() returned the wrong number of lines")
87 self.assertEqual(lines_list[0], self.text,
88 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000089
Brett Cannon74bfd702003-04-25 09:39:47 +000090 def test_fileno(self):
91 file_num = self.returned_obj.fileno()
Georg Brandlab91fde2009-08-13 08:51:18 +000092 self.assertTrue(isinstance(file_num, int),
Brett Cannon74bfd702003-04-25 09:39:47 +000093 "fileno() did not return an int")
94 self.assertEqual(os.read(file_num, len(self.text)), self.text,
95 "Reading on the file descriptor returned by fileno() "
96 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000097
Brett Cannon74bfd702003-04-25 09:39:47 +000098 def test_close(self):
99 # Test close() by calling it hear and then having it be called again
100 # by the tearDown() method for the test
101 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000102
Brett Cannon74bfd702003-04-25 09:39:47 +0000103 def test_info(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000104 self.assertTrue(isinstance(self.returned_obj.info(), email.message.Message))
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000105
Brett Cannon74bfd702003-04-25 09:39:47 +0000106 def test_geturl(self):
107 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000108
Christian Heimes9bd667a2008-01-20 15:14:11 +0000109 def test_getcode(self):
Florent Xiclunab4efb3d2010-08-14 18:24:40 +0000110 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000111
Brett Cannon74bfd702003-04-25 09:39:47 +0000112 def test_iter(self):
113 # Test iterator
114 # Don't need to count number of iterations since test would fail the
115 # instant it returned anything beyond the first line from the
116 # comparison
117 for line in self.returned_obj.__iter__():
118 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000119
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000120class ProxyTests(unittest.TestCase):
121
122 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000123 # Records changes to env vars
124 self.env = support.EnvironmentVarGuard()
Benjamin Petersonffeda292010-01-09 18:48:46 +0000125 # Delete all proxy related env vars
Antoine Pitrouf8827ae2010-10-14 18:40:02 +0000126 for k in list(os.environ):
127 if 'proxy' in k.lower():
Benjamin Petersonffeda292010-01-09 18:48:46 +0000128 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129
130 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000131 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000132 self.env.__exit__()
133 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000134
135 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000136 self.env.set('NO_PROXY', 'localhost')
137 proxies = urllib.request.getproxies_environment()
138 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xiclunab4efb3d2010-08-14 18:24:40 +0000139 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000140
141
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000142class urlopen_HttpTests(unittest.TestCase):
143 """Test urlopen() opening a fake http connection."""
144
145 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000146 class FakeSocket(io.BytesIO):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000147 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000148 def makefile(self, *args, **kwds):
149 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000150 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000151 if self.closed: return b""
152 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000153 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000154 if self.closed: return b""
155 return io.BytesIO.readline(self, length)
Georg Brandl24420152008-05-26 16:32:26 +0000156 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000157 def connect(self):
158 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000159 self._connection_class = http.client.HTTPConnection
160 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000161
162 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000163 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000164
165 def test_read(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000166 self.fakehttp(b"Hello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000167 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000168 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000169 self.assertEqual(fp.readline(), b"Hello!")
170 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000171 self.assertEqual(fp.geturl(), 'http://python.org/')
172 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000173 finally:
174 self.unfakehttp()
175
Senthil Kumaran26430412011-04-13 07:01:19 +0800176 def test_url_fragment(self):
177 # Issue #11703: geturl() omits fragments in the original URL.
178 url = 'http://docs.python.org/library/urllib.html#OK'
179 self.fakehttp(b'Hello!')
180 try:
181 fp = urllib.request.urlopen(url)
182 self.assertEqual(fp.geturl(), url)
183 finally:
184 self.unfakehttp()
185
Christian Heimes57dddfb2008-01-02 18:30:52 +0000186 def test_read_bogus(self):
187 # urlopen() should raise IOError for many error codes.
188 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
189Date: Wed, 02 Jan 2008 03:03:54 GMT
190Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
191Connection: close
192Content-Type: text/html; charset=iso-8859-1
193''')
194 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000195 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000196 finally:
197 self.unfakehttp()
198
guido@google.coma119df92011-03-29 11:41:02 -0700199 def test_invalid_redirect(self):
200 # urlopen() should raise IOError for many error codes.
201 self.fakehttp(b'''HTTP/1.1 302 Found
202Date: Wed, 02 Jan 2008 03:03:54 GMT
203Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
204Location: file://guidocomputer.athome.com:/python/license
205Connection: close
206Content-Type: text/html; charset=iso-8859-1
207''')
208 try:
209 self.assertRaises(urllib.error.HTTPError, urlopen,
210 "http://python.org/")
211 finally:
212 self.unfakehttp()
213
Guido van Rossumd8faa362007-04-27 19:54:29 +0000214 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000215 # urlopen() raises IOError if the underlying socket does not send any
216 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000217 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000219 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220 finally:
221 self.unfakehttp()
222
Senthil Kumaranafef78f2010-08-01 17:55:50 +0000223 def test_userpass_inurl(self):
224 self.fakehttp(b"Hello!")
225 try:
226 fp = urlopen("http://user:pass@python.org/")
227 self.assertEqual(fp.readline(), b"Hello!")
228 self.assertEqual(fp.readline(), b"")
229 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
230 self.assertEqual(fp.getcode(), 200)
231 finally:
232 self.unfakehttp()
233
Brett Cannon19691362003-04-29 05:08:06 +0000234class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000235 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000236
Brett Cannon19691362003-04-29 05:08:06 +0000237 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000238 # Create a list of temporary files. Each item in the list is a file
239 # name (absolute path or relative to the current working directory).
240 # All files in this list will be deleted in the tearDown method. Note,
241 # this only helps to makes sure temporary files get deleted, but it
242 # does nothing about trying to close files that may still be open. It
243 # is the responsibility of the developer to properly close files even
244 # when exceptional conditions occur.
245 self.tempFiles = []
246
Brett Cannon19691362003-04-29 05:08:06 +0000247 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000248 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000249 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000250 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000251 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000252 FILE.write(self.text)
253 FILE.close()
254 finally:
255 try: FILE.close()
256 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000257
258 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000259 # Delete the temporary files.
260 for each in self.tempFiles:
261 try: os.remove(each)
262 except: pass
263
264 def constructLocalFileUrl(self, filePath):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000265 return "file://%s" % urllib.request.pathname2url(
266 os.path.abspath(filePath))
Georg Brandl5a650a22005-08-26 08:51:34 +0000267
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000268 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000269 """Creates a new temporary file containing the specified data,
270 registers the file for deletion during the test fixture tear down, and
271 returns the absolute path of the file."""
272
273 newFd, newFilePath = tempfile.mkstemp()
274 try:
275 self.registerFileForCleanUp(newFilePath)
276 newFile = os.fdopen(newFd, "wb")
277 newFile.write(data)
278 newFile.close()
279 finally:
280 try: newFile.close()
281 except: pass
282 return newFilePath
283
284 def registerFileForCleanUp(self, fileName):
285 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000286
287 def test_basic(self):
288 # Make sure that a local file just gets its own location returned and
289 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000290 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000291 self.assertEqual(result[0], support.TESTFN)
Georg Brandlab91fde2009-08-13 08:51:18 +0000292 self.assertTrue(isinstance(result[1], email.message.Message),
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000293 "did not get a email.message.Message instance "
294 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000295
296 def test_copy(self):
297 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000298 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000299 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000300 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000301 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000302 self.assertEqual(second_temp, result[0])
Georg Brandlab91fde2009-08-13 08:51:18 +0000303 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000304 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000305 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000306 try:
307 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000308 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000309 finally:
310 try: FILE.close()
311 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000312 self.assertEqual(self.text, text)
313
314 def test_reporthook(self):
315 # Make sure that the reporthook works.
316 def hooktester(count, block_size, total_size, count_holder=[0]):
Georg Brandlab91fde2009-08-13 08:51:18 +0000317 self.assertTrue(isinstance(count, int))
318 self.assertTrue(isinstance(block_size, int))
319 self.assertTrue(isinstance(total_size, int))
Brett Cannon19691362003-04-29 05:08:06 +0000320 self.assertEqual(count, count_holder[0])
321 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000322 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000323 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000324 urllib.request.urlretrieve(
325 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000326 second_temp, hooktester)
327
328 def test_reporthook_0_bytes(self):
329 # Test on zero length file. Should call reporthook only 1 time.
330 report = []
331 def hooktester(count, block_size, total_size, _report=report):
332 _report.append((count, block_size, total_size))
333 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000334 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000335 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000336 self.assertEqual(len(report), 1)
337 self.assertEqual(report[0][2], 0)
338
339 def test_reporthook_5_bytes(self):
340 # Test on 5 byte file. Should call reporthook only 2 times (once when
341 # the "network connection" is established and once when the block is
342 # read). Since the block size is 8192 bytes, only one block read is
343 # required to read the entire file.
344 report = []
345 def hooktester(count, block_size, total_size, _report=report):
346 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000347 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000348 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000349 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000350 self.assertEqual(len(report), 2)
351 self.assertEqual(report[0][1], 8192)
352 self.assertEqual(report[0][2], 5)
353
354 def test_reporthook_8193_bytes(self):
355 # Test on 8193 byte file. Should call reporthook only 3 times (once
356 # when the "network connection" is established, once for the next 8192
357 # bytes, and once for the last byte).
358 report = []
359 def hooktester(count, block_size, total_size, _report=report):
360 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000361 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000362 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000363 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000364 self.assertEqual(len(report), 3)
365 self.assertEqual(report[0][1], 8192)
366 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000367
Brett Cannon74bfd702003-04-25 09:39:47 +0000368class QuotingTests(unittest.TestCase):
369 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000370
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000371 According to RFC 2396 (Uniform Resource Identifiers), to escape a
372 character you write it as '%' + <2 character US-ASCII hex value>.
373 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
374 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000375
376 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000377
Brett Cannon74bfd702003-04-25 09:39:47 +0000378 Reserved characters : ";/?:@&=+$,"
379 Have special meaning in URIs and must be escaped if not being used for
380 their special meaning
381 Data characters : letters, digits, and "-_.!~*'()"
382 Unreserved and do not need to be escaped; can be, though, if desired
383 Control characters : 0x00 - 0x1F, 0x7F
384 Have no use in URIs so must be escaped
385 space : 0x20
386 Must be escaped
387 Delimiters : '<>#%"'
388 Must be escaped
389 Unwise : "{}|\^[]`"
390 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000391
Brett Cannon74bfd702003-04-25 09:39:47 +0000392 """
393
394 def test_never_quote(self):
395 # Make sure quote() does not quote letters, digits, and "_,.-"
396 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
397 "abcdefghijklmnopqrstuvwxyz",
398 "0123456789",
399 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000401 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000402 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000403 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000404 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000405 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000406
407 def test_default_safe(self):
408 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000409 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000410
411 def test_safe(self):
412 # Test setting 'safe' parameter does what it should do
413 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000414 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000415 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000416 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000417 result = urllib.parse.quote_plus(quote_by_default,
418 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000419 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000420 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000421 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000422 # Safe expressed as bytes rather than str
423 result = urllib.parse.quote(quote_by_default, safe=b"<>")
424 self.assertEqual(quote_by_default, result,
425 "using quote(): %r != %r" % (quote_by_default, result))
426 # "Safe" non-ASCII characters should have no effect
427 # (Since URIs are not allowed to have non-ASCII characters)
428 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
429 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
430 self.assertEqual(expect, result,
431 "using quote(): %r != %r" %
432 (expect, result))
433 # Same as above, but using a bytes rather than str
434 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
435 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
436 self.assertEqual(expect, result,
437 "using quote(): %r != %r" %
438 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000439
440 def test_default_quoting(self):
441 # Make sure all characters that should be quoted are by default sans
442 # space (separate test for that).
443 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
444 should_quote.append('<>#%"{}|\^[]`')
445 should_quote.append(chr(127)) # For 0x7F
446 should_quote = ''.join(should_quote)
447 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000448 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000449 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000450 "using quote(): "
451 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000452 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000453 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000454 self.assertEqual(hexescape(char), result,
455 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000456 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000457 (char, hexescape(char), result))
458 del should_quote
459 partial_quote = "ab[]cd"
460 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000461 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000462 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000463 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000464 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000465 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000466
467 def test_quoting_space(self):
468 # Make sure quote() and quote_plus() handle spaces as specified in
469 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000470 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000471 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000472 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000473 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000474 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000475 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000476 given = "a b cd e f"
477 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000478 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000479 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000480 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000481 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000482 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000483 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000484 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000485
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000486 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000487 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000488 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000489 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000490 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000491 # Test with bytes
492 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
493 'alpha%2Bbeta+gamma')
494 # Test with safe bytes
495 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
496 'alpha+beta+gamma')
497
498 def test_quote_bytes(self):
499 # Bytes should quote directly to percent-encoded values
500 given = b"\xa2\xd8ab\xff"
501 expect = "%A2%D8ab%FF"
502 result = urllib.parse.quote(given)
503 self.assertEqual(expect, result,
504 "using quote(): %r != %r" % (expect, result))
505 # Encoding argument should raise type error on bytes input
506 self.assertRaises(TypeError, urllib.parse.quote, given,
507 encoding="latin-1")
508 # quote_from_bytes should work the same
509 result = urllib.parse.quote_from_bytes(given)
510 self.assertEqual(expect, result,
511 "using quote_from_bytes(): %r != %r"
512 % (expect, result))
513
514 def test_quote_with_unicode(self):
515 # Characters in Latin-1 range, encoded by default in UTF-8
516 given = "\xa2\xd8ab\xff"
517 expect = "%C2%A2%C3%98ab%C3%BF"
518 result = urllib.parse.quote(given)
519 self.assertEqual(expect, result,
520 "using quote(): %r != %r" % (expect, result))
521 # Characters in Latin-1 range, encoded by with None (default)
522 result = urllib.parse.quote(given, encoding=None, errors=None)
523 self.assertEqual(expect, result,
524 "using quote(): %r != %r" % (expect, result))
525 # Characters in Latin-1 range, encoded with Latin-1
526 given = "\xa2\xd8ab\xff"
527 expect = "%A2%D8ab%FF"
528 result = urllib.parse.quote(given, encoding="latin-1")
529 self.assertEqual(expect, result,
530 "using quote(): %r != %r" % (expect, result))
531 # Characters in BMP, encoded by default in UTF-8
532 given = "\u6f22\u5b57" # "Kanji"
533 expect = "%E6%BC%A2%E5%AD%97"
534 result = urllib.parse.quote(given)
535 self.assertEqual(expect, result,
536 "using quote(): %r != %r" % (expect, result))
537 # Characters in BMP, encoded with Latin-1
538 given = "\u6f22\u5b57"
539 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
540 encoding="latin-1")
541 # Characters in BMP, encoded with Latin-1, with replace error handling
542 given = "\u6f22\u5b57"
543 expect = "%3F%3F" # "??"
544 result = urllib.parse.quote(given, encoding="latin-1",
545 errors="replace")
546 self.assertEqual(expect, result,
547 "using quote(): %r != %r" % (expect, result))
548 # Characters in BMP, Latin-1, with xmlcharref error handling
549 given = "\u6f22\u5b57"
550 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
551 result = urllib.parse.quote(given, encoding="latin-1",
552 errors="xmlcharrefreplace")
553 self.assertEqual(expect, result,
554 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000555
Georg Brandlfaf41492009-05-26 18:31:11 +0000556 def test_quote_plus_with_unicode(self):
557 # Encoding (latin-1) test for quote_plus
558 given = "\xa2\xd8 \xff"
559 expect = "%A2%D8+%FF"
560 result = urllib.parse.quote_plus(given, encoding="latin-1")
561 self.assertEqual(expect, result,
562 "using quote_plus(): %r != %r" % (expect, result))
563 # Errors test for quote_plus
564 given = "ab\u6f22\u5b57 cd"
565 expect = "ab%3F%3F+cd"
566 result = urllib.parse.quote_plus(given, encoding="latin-1",
567 errors="replace")
568 self.assertEqual(expect, result,
569 "using quote_plus(): %r != %r" % (expect, result))
570
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000571
Brett Cannon74bfd702003-04-25 09:39:47 +0000572class UnquotingTests(unittest.TestCase):
573 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000574
Brett Cannon74bfd702003-04-25 09:39:47 +0000575 See the doc string for quoting_Tests for details on quoting and such.
576
577 """
578
579 def test_unquoting(self):
580 # Make sure unquoting of all ASCII values works
581 escape_list = []
582 for num in range(128):
583 given = hexescape(chr(num))
584 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000585 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000586 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000587 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000588 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000589 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000590 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000591 (expect, result))
592 escape_list.append(given)
593 escape_string = ''.join(escape_list)
594 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000595 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000596 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000597 "using unquote(): not all characters escaped: "
598 "%s" % result)
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000599 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
600 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna99e472e2010-08-14 23:12:27 +0000601 with warnings.catch_warnings():
602 warnings.simplefilter('ignore', BytesWarning)
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000603 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000604
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000605 def test_unquoting_badpercent(self):
606 # Test unquoting on bad percent-escapes
607 given = '%xab'
608 expect = given
609 result = urllib.parse.unquote(given)
610 self.assertEqual(expect, result, "using unquote(): %r != %r"
611 % (expect, result))
612 given = '%x'
613 expect = given
614 result = urllib.parse.unquote(given)
615 self.assertEqual(expect, result, "using unquote(): %r != %r"
616 % (expect, result))
617 given = '%'
618 expect = given
619 result = urllib.parse.unquote(given)
620 self.assertEqual(expect, result, "using unquote(): %r != %r"
621 % (expect, result))
622 # unquote_to_bytes
623 given = '%xab'
624 expect = bytes(given, 'ascii')
625 result = urllib.parse.unquote_to_bytes(given)
626 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
627 % (expect, result))
628 given = '%x'
629 expect = bytes(given, 'ascii')
630 result = urllib.parse.unquote_to_bytes(given)
631 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
632 % (expect, result))
633 given = '%'
634 expect = bytes(given, 'ascii')
635 result = urllib.parse.unquote_to_bytes(given)
636 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
637 % (expect, result))
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000638 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
639 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000640
641 def test_unquoting_mixed_case(self):
642 # Test unquoting on mixed-case hex digits in the percent-escapes
643 given = '%Ab%eA'
644 expect = b'\xab\xea'
645 result = urllib.parse.unquote_to_bytes(given)
646 self.assertEqual(expect, result,
647 "using unquote_to_bytes(): %r != %r"
648 % (expect, result))
649
Brett Cannon74bfd702003-04-25 09:39:47 +0000650 def test_unquoting_parts(self):
651 # Make sure unquoting works when have non-quoted characters
652 # interspersed
653 given = 'ab%sd' % hexescape('c')
654 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000655 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000656 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000657 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000658 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000659 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000660 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000661
Brett Cannon74bfd702003-04-25 09:39:47 +0000662 def test_unquoting_plus(self):
663 # Test difference between unquote() and unquote_plus()
664 given = "are+there+spaces..."
665 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000666 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000667 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000668 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000669 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000670 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000671 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000672 "using unquote_plus(): %r != %r" % (expect, result))
673
674 def test_unquote_to_bytes(self):
675 given = 'br%C3%BCckner_sapporo_20050930.doc'
676 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
677 result = urllib.parse.unquote_to_bytes(given)
678 self.assertEqual(expect, result,
679 "using unquote_to_bytes(): %r != %r"
680 % (expect, result))
681 # Test on a string with unescaped non-ASCII characters
682 # (Technically an invalid URI; expect those characters to be UTF-8
683 # encoded).
684 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
685 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
686 self.assertEqual(expect, result,
687 "using unquote_to_bytes(): %r != %r"
688 % (expect, result))
689 # Test with a bytes as input
690 given = b'%A2%D8ab%FF'
691 expect = b'\xa2\xd8ab\xff'
692 result = urllib.parse.unquote_to_bytes(given)
693 self.assertEqual(expect, result,
694 "using unquote_to_bytes(): %r != %r"
695 % (expect, result))
696 # Test with a bytes as input, with unescaped non-ASCII bytes
697 # (Technically an invalid URI; expect those bytes to be preserved)
698 given = b'%A2\xd8ab%FF'
699 expect = b'\xa2\xd8ab\xff'
700 result = urllib.parse.unquote_to_bytes(given)
701 self.assertEqual(expect, result,
702 "using unquote_to_bytes(): %r != %r"
703 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000704
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000705 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000706 # Characters in the Latin-1 range, encoded with UTF-8
707 given = 'br%C3%BCckner_sapporo_20050930.doc'
708 expect = 'br\u00fcckner_sapporo_20050930.doc'
709 result = urllib.parse.unquote(given)
710 self.assertEqual(expect, result,
711 "using unquote(): %r != %r" % (expect, result))
712 # Characters in the Latin-1 range, encoded with None (default)
713 result = urllib.parse.unquote(given, encoding=None, errors=None)
714 self.assertEqual(expect, result,
715 "using unquote(): %r != %r" % (expect, result))
716
717 # Characters in the Latin-1 range, encoded with Latin-1
718 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
719 encoding="latin-1")
720 expect = 'br\u00fcckner_sapporo_20050930.doc'
721 self.assertEqual(expect, result,
722 "using unquote(): %r != %r" % (expect, result))
723
724 # Characters in BMP, encoded with UTF-8
725 given = "%E6%BC%A2%E5%AD%97"
726 expect = "\u6f22\u5b57" # "Kanji"
727 result = urllib.parse.unquote(given)
728 self.assertEqual(expect, result,
729 "using unquote(): %r != %r" % (expect, result))
730
731 # Decode with UTF-8, invalid sequence
732 given = "%F3%B1"
733 expect = "\ufffd" # Replacement character
734 result = urllib.parse.unquote(given)
735 self.assertEqual(expect, result,
736 "using unquote(): %r != %r" % (expect, result))
737
738 # Decode with UTF-8, invalid sequence, replace errors
739 result = urllib.parse.unquote(given, errors="replace")
740 self.assertEqual(expect, result,
741 "using unquote(): %r != %r" % (expect, result))
742
743 # Decode with UTF-8, invalid sequence, ignoring errors
744 given = "%F3%B1"
745 expect = ""
746 result = urllib.parse.unquote(given, errors="ignore")
747 self.assertEqual(expect, result,
748 "using unquote(): %r != %r" % (expect, result))
749
750 # A mix of non-ASCII and percent-encoded characters, UTF-8
751 result = urllib.parse.unquote("\u6f22%C3%BC")
752 expect = '\u6f22\u00fc'
753 self.assertEqual(expect, result,
754 "using unquote(): %r != %r" % (expect, result))
755
756 # A mix of non-ASCII and percent-encoded characters, Latin-1
757 # (Note, the string contains non-Latin-1-representable characters)
758 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
759 expect = '\u6f22\u00fc'
760 self.assertEqual(expect, result,
761 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000762
Brett Cannon74bfd702003-04-25 09:39:47 +0000763class urlencode_Tests(unittest.TestCase):
764 """Tests for urlencode()"""
765
766 def help_inputtype(self, given, test_type):
767 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000768
Brett Cannon74bfd702003-04-25 09:39:47 +0000769 'given' must lead to only the pairs:
770 * 1st, 1
771 * 2nd, 2
772 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000773
Brett Cannon74bfd702003-04-25 09:39:47 +0000774 Test cannot assume anything about order. Docs make no guarantee and
775 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000776
Brett Cannon74bfd702003-04-25 09:39:47 +0000777 """
778 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000779 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000780 for expected in expect_somewhere:
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000781 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000782 "testing %s: %s not found in %s" %
783 (test_type, expected, result))
784 self.assertEqual(result.count('&'), 2,
785 "testing %s: expected 2 '&'s; got %s" %
786 (test_type, result.count('&')))
787 amp_location = result.index('&')
788 on_amp_left = result[amp_location - 1]
789 on_amp_right = result[amp_location + 1]
Georg Brandlab91fde2009-08-13 08:51:18 +0000790 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000791 "testing %s: '&' not located in proper place in %s" %
792 (test_type, result))
793 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
794 "testing %s: "
795 "unexpected number of characters: %s != %s" %
796 (test_type, len(result), (5 * 3) + 2))
797
798 def test_using_mapping(self):
799 # Test passing in a mapping object as an argument.
800 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
801 "using dict as input type")
802
803 def test_using_sequence(self):
804 # Test passing in a sequence of two-item sequences as an argument.
805 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
806 "using sequence of two-item tuples as input")
807
808 def test_quoting(self):
809 # Make sure keys and values are quoted using quote_plus()
810 given = {"&":"="}
811 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000813 self.assertEqual(expect, result)
814 given = {"key name":"A bunch of pluses"}
815 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000816 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000817 self.assertEqual(expect, result)
818
819 def test_doseq(self):
820 # Test that passing True for 'doseq' parameter works correctly
821 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
823 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000824 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000826 for value in given["sequence"]:
827 expect = "sequence=%s" % value
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000828 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000829 self.assertEqual(result.count('&'), 2,
830 "Expected 2 '&'s, got %s" % result.count('&'))
831
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000832 def test_empty_sequence(self):
833 self.assertEqual("", urllib.parse.urlencode({}))
834 self.assertEqual("", urllib.parse.urlencode([]))
835
836 def test_nonstring_values(self):
837 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
838 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
839
840 def test_nonstring_seq_values(self):
841 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
842 self.assertEqual("a=None&a=a",
843 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100844 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000845 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100846 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000847
Senthil Kumaranfe1ad152010-07-03 17:55:41 +0000848 def test_urlencode_encoding(self):
849 # ASCII encoding. Expect %3F with errors="replace'
850 given = (('\u00a0', '\u00c1'),)
851 expect = '%3F=%3F'
852 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
853 self.assertEqual(expect, result)
854
855 # Default is UTF-8 encoding.
856 given = (('\u00a0', '\u00c1'),)
857 expect = '%C2%A0=%C3%81'
858 result = urllib.parse.urlencode(given)
859 self.assertEqual(expect, result)
860
861 # Latin-1 encoding.
862 given = (('\u00a0', '\u00c1'),)
863 expect = '%A0=%C1'
864 result = urllib.parse.urlencode(given, encoding="latin-1")
865 self.assertEqual(expect, result)
866
867 def test_urlencode_encoding_doseq(self):
868 # ASCII Encoding. Expect %3F with errors="replace'
869 given = (('\u00a0', '\u00c1'),)
870 expect = '%3F=%3F'
871 result = urllib.parse.urlencode(given, doseq=True,
872 encoding="ASCII", errors="replace")
873 self.assertEqual(expect, result)
874
875 # ASCII Encoding. On a sequence of values.
876 given = (("\u00a0", (1, "\u00c1")),)
877 expect = '%3F=1&%3F=%3F'
878 result = urllib.parse.urlencode(given, True,
879 encoding="ASCII", errors="replace")
880 self.assertEqual(expect, result)
881
882 # Utf-8
883 given = (("\u00a0", "\u00c1"),)
884 expect = '%C2%A0=%C3%81'
885 result = urllib.parse.urlencode(given, True)
886 self.assertEqual(expect, result)
887
888 given = (("\u00a0", (42, "\u00c1")),)
889 expect = '%C2%A0=42&%C2%A0=%C3%81'
890 result = urllib.parse.urlencode(given, True)
891 self.assertEqual(expect, result)
892
893 # latin-1
894 given = (("\u00a0", "\u00c1"),)
895 expect = '%A0=%C1'
896 result = urllib.parse.urlencode(given, True, encoding="latin-1")
897 self.assertEqual(expect, result)
898
899 given = (("\u00a0", (42, "\u00c1")),)
900 expect = '%A0=42&%A0=%C1'
901 result = urllib.parse.urlencode(given, True, encoding="latin-1")
902 self.assertEqual(expect, result)
903
904 def test_urlencode_bytes(self):
905 given = ((b'\xa0\x24', b'\xc1\x24'),)
906 expect = '%A0%24=%C1%24'
907 result = urllib.parse.urlencode(given)
908 self.assertEqual(expect, result)
909 result = urllib.parse.urlencode(given, True)
910 self.assertEqual(expect, result)
911
912 # Sequence of values
913 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
914 expect = '%A0%24=42&%A0%24=%C1%24'
915 result = urllib.parse.urlencode(given, True)
916 self.assertEqual(expect, result)
917
918 def test_urlencode_encoding_safe_parameter(self):
919
920 # Send '$' (\x24) as safe character
921 # Default utf-8 encoding
922
923 given = ((b'\xa0\x24', b'\xc1\x24'),)
924 result = urllib.parse.urlencode(given, safe=":$")
925 expect = '%A0$=%C1$'
926 self.assertEqual(expect, result)
927
928 given = ((b'\xa0\x24', b'\xc1\x24'),)
929 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
930 expect = '%A0$=%C1$'
931 self.assertEqual(expect, result)
932
933 # Safe parameter in sequence
934 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
935 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
936 result = urllib.parse.urlencode(given, True, safe=":$")
937 self.assertEqual(expect, result)
938
939 # Test all above in latin-1 encoding
940
941 given = ((b'\xa0\x24', b'\xc1\x24'),)
942 result = urllib.parse.urlencode(given, safe=":$",
943 encoding="latin-1")
944 expect = '%A0$=%C1$'
945 self.assertEqual(expect, result)
946
947 given = ((b'\xa0\x24', b'\xc1\x24'),)
948 expect = '%A0$=%C1$'
949 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
950 encoding="latin-1")
951
952 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
953 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
954 result = urllib.parse.urlencode(given, True, safe=":$",
955 encoding="latin-1")
956 self.assertEqual(expect, result)
957
Brett Cannon74bfd702003-04-25 09:39:47 +0000958class Pathname_Tests(unittest.TestCase):
959 """Test pathname2url() and url2pathname()"""
960
961 def test_basic(self):
962 # Make sure simple tests pass
963 expected_path = os.path.join("parts", "of", "a", "path")
964 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000966 self.assertEqual(expected_url, result,
967 "pathname2url() failed; %s != %s" %
968 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000969 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000970 self.assertEqual(expected_path, result,
971 "url2pathame() failed; %s != %s" %
972 (result, expected_path))
973
974 def test_quoting(self):
975 # Test automatic quoting and unquoting works for pathnam2url() and
976 # url2pathname() respectively
977 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000978 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
979 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000980 self.assertEqual(expect, result,
981 "pathname2url() failed; %s != %s" %
982 (expect, result))
983 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000984 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000985 self.assertEqual(expect, result,
986 "url2pathname() failed; %s != %s" %
987 (expect, result))
988 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000989 expect = "%s/using_quote" % urllib.parse.quote("make sure")
990 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000991 self.assertEqual(expect, result,
992 "pathname2url() failed; %s != %s" %
993 (expect, result))
994 given = "make+sure/using_unquote"
995 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000996 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000997 self.assertEqual(expect, result,
998 "url2pathname() failed; %s != %s" %
999 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001000
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001001 @unittest.skipUnless(sys.platform == 'win32',
1002 'test specific to the urllib.url2path function.')
1003 def test_ntpath(self):
1004 given = ('/C:/', '///C:/', '/C|//')
1005 expect = 'C:\\'
1006 for url in given:
1007 result = urllib.request.url2pathname(url)
1008 self.assertEqual(expect, result,
1009 'urllib.request..url2pathname() failed; %s != %s' %
1010 (expect, result))
1011 given = '///C|/path'
1012 expect = 'C:\\path'
1013 result = urllib.request.url2pathname(given)
1014 self.assertEqual(expect, result,
1015 'urllib.request.url2pathname() failed; %s != %s' %
1016 (expect, result))
1017
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001018class Utility_Tests(unittest.TestCase):
1019 """Testcase to test the various utility functions in the urllib."""
1020
1021 def test_splitpasswd(self):
1022 """Some of password examples are not sensible, but it is added to
1023 confirming to RFC2617 and addressing issue4675.
1024 """
1025 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1026 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1027 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1028 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1029 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1030 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1031 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1032
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001033
1034class URLopener_Tests(unittest.TestCase):
1035 """Testcase to test the open method of URLopener class."""
1036
1037 def test_quoted_open(self):
1038 class DummyURLopener(urllib.request.URLopener):
1039 def open_spam(self, url):
1040 return url
1041
1042 self.assertEqual(DummyURLopener().open(
1043 'spam://example/ /'),'//example/%20/')
1044
Senthil Kumaran0e7e9ae2010-02-20 22:30:21 +00001045 # test the safe characters are not quoted by urlopen
1046 self.assertEqual(DummyURLopener().open(
1047 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1048 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1049
Guido van Rossume7ba4952007-06-06 23:52:48 +00001050# Just commented them out.
1051# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001052# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001053# fail in one of the tests, sometimes in other. I have a linux, and
1054# the tests go ok.
1055# If anybody has one of the problematic enviroments, please help!
1056# . Facundo
1057#
1058# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001059# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001060# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1061# serv.settimeout(3)
1062# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1063# serv.bind(("", 9093))
1064# serv.listen(5)
1065# try:
1066# conn, addr = serv.accept()
1067# conn.send("1 Hola mundo\n")
1068# cantdata = 0
1069# while cantdata < 13:
1070# data = conn.recv(13-cantdata)
1071# cantdata += len(data)
1072# time.sleep(.3)
1073# conn.send("2 No more lines\n")
1074# conn.close()
1075# except socket.timeout:
1076# pass
1077# finally:
1078# serv.close()
1079# evt.set()
1080#
1081# class FTPWrapperTests(unittest.TestCase):
1082#
1083# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001084# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001085# ftplib.FTP.port = 9093
1086# self.evt = threading.Event()
1087# threading.Thread(target=server, args=(self.evt,)).start()
1088# time.sleep(.1)
1089#
1090# def tearDown(self):
1091# self.evt.wait()
1092#
1093# def testBasic(self):
1094# # connects
1095# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001096# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001097#
1098# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001099# # global default timeout is ignored
1100# import socket
Georg Brandlab91fde2009-08-13 08:51:18 +00001101# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001102# socket.setdefaulttimeout(30)
1103# try:
1104# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1105# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001106# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001107# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001108# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001109#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001110# def testTimeoutDefault(self):
1111# # global default timeout is used
1112# import socket
Georg Brandlab91fde2009-08-13 08:51:18 +00001113# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001114# socket.setdefaulttimeout(30)
1115# try:
1116# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1117# finally:
1118# socket.setdefaulttimeout(None)
1119# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1120# ftp.close()
1121#
1122# def testTimeoutValue(self):
1123# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1124# timeout=30)
1125# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1126# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001127
Skip Montanaro080c9972001-01-28 21:12:22 +00001128
1129
Brett Cannon74bfd702003-04-25 09:39:47 +00001130def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001131 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001132 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001133 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001134 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001135 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001136 QuotingTests,
1137 UnquotingTests,
1138 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001139 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001140 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001141 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001142 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001143 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001144
1145
1146
1147if __name__ == '__main__':
1148 test_main()