blob: 3003331416f496be9c207f5ad070eb68924feaac [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
Georg Brandl24420152008-05-26 16:32:26 +00005import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00006import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00007import io
Brett Cannon74bfd702003-04-25 09:39:47 +00008import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +00009from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000010import os
Georg Brandl5a650a22005-08-26 08:51:34 +000011import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000012
Brett Cannon74bfd702003-04-25 09:39:47 +000013def hexescape(char):
14 """Escape char as RFC 2396 specifies"""
15 hex_repr = hex(ord(char))[2:].upper()
16 if len(hex_repr) == 1:
17 hex_repr = "0%s" % hex_repr
18 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000019
Jeremy Hylton1afc1692008-06-18 20:49:58 +000020# Shortcut for testing FancyURLopener
21_urlopener = None
22def urlopen(url, data=None, proxies=None):
23 """urlopen(url [, data]) -> open file-like object"""
24 global _urlopener
25 if proxies is not None:
26 opener = urllib.request.FancyURLopener(proxies=proxies)
27 elif not _urlopener:
28 opener = urllib.request.FancyURLopener()
29 _urlopener = opener
30 else:
31 opener = _urlopener
32 if data is None:
33 return opener.open(url)
34 else:
35 return opener.open(url, data)
36
Brett Cannon74bfd702003-04-25 09:39:47 +000037class urlopen_FileTests(unittest.TestCase):
38 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000039
Brett Cannon74bfd702003-04-25 09:39:47 +000040 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000041 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000042
Brett Cannon74bfd702003-04-25 09:39:47 +000043 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 # Create a temp file to use for testing
47 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
48 "ascii")
49 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000050 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000051 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000052 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000054 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000056
Brett Cannon74bfd702003-04-25 09:39:47 +000057 def tearDown(self):
58 """Shut down the open object"""
59 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000060 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000061
Brett Cannon74bfd702003-04-25 09:39:47 +000062 def test_interface(self):
63 # Make sure object returned by urlopen() has the specified methods
64 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000065 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000066 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000067 "object returned by urlopen() lacks %s attribute" %
68 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000069
Brett Cannon74bfd702003-04-25 09:39:47 +000070 def test_read(self):
71 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000072
Brett Cannon74bfd702003-04-25 09:39:47 +000073 def test_readline(self):
74 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000075 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000076 "calling readline() after exhausting the file did not"
77 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def test_readlines(self):
80 lines_list = self.returned_obj.readlines()
81 self.assertEqual(len(lines_list), 1,
82 "readlines() returned the wrong number of lines")
83 self.assertEqual(lines_list[0], self.text,
84 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000085
Brett Cannon74bfd702003-04-25 09:39:47 +000086 def test_fileno(self):
87 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000088 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000089 self.assertEqual(os.read(file_num, len(self.text)), self.text,
90 "Reading on the file descriptor returned by fileno() "
91 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000092
Brett Cannon74bfd702003-04-25 09:39:47 +000093 def test_close(self):
94 # Test close() by calling it hear and then having it be called again
95 # by the tearDown() method for the test
96 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000097
Brett Cannon74bfd702003-04-25 09:39:47 +000098 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +000099 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 def test_geturl(self):
102 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000103
Christian Heimes9bd667a2008-01-20 15:14:11 +0000104 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000105 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106
Brett Cannon74bfd702003-04-25 09:39:47 +0000107 def test_iter(self):
108 # Test iterator
109 # Don't need to count number of iterations since test would fail the
110 # instant it returned anything beyond the first line from the
111 # comparison
112 for line in self.returned_obj.__iter__():
113 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000114
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000115class ProxyTests(unittest.TestCase):
116
117 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000118 # Records changes to env vars
119 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000120 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000121 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000122 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000123 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000124
125 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000126 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000127 self.env.__exit__()
128 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129
130 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000131 self.env.set('NO_PROXY', 'localhost')
132 proxies = urllib.request.getproxies_environment()
133 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000134 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000135
136
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000137class urlopen_HttpTests(unittest.TestCase):
138 """Test urlopen() opening a fake http connection."""
139
140 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000141 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000142 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000143 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000144 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000145 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000146 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000147 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000148 if self.closed: return b""
149 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000150 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000151 if self.closed: return b""
152 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000153 def close(self):
154 self.io_refs -= 1
155 if self.io_refs == 0:
156 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000157 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000158 def connect(self):
159 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000160 self._connection_class = http.client.HTTPConnection
161 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000162
163 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000164 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000165
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000166 def check_read(self, ver):
167 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000168 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000169 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000170 self.assertEqual(fp.readline(), b"Hello!")
171 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000172 self.assertEqual(fp.geturl(), 'http://python.org/')
173 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000174 finally:
175 self.unfakehttp()
176
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000177 def test_read_0_9(self):
178 # "0.9" response accepted (but not "simple responses" without
179 # a status line)
180 self.check_read(b"0.9")
181
182 def test_read_1_0(self):
183 self.check_read(b"1.0")
184
185 def test_read_1_1(self):
186 self.check_read(b"1.1")
187
Christian Heimes57dddfb2008-01-02 18:30:52 +0000188 def test_read_bogus(self):
189 # urlopen() should raise IOError for many error codes.
190 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
191Date: Wed, 02 Jan 2008 03:03:54 GMT
192Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
193Connection: close
194Content-Type: text/html; charset=iso-8859-1
195''')
196 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000197 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000198 finally:
199 self.unfakehttp()
200
Guido van Rossumd8faa362007-04-27 19:54:29 +0000201 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000202 # urlopen() raises IOError if the underlying socket does not send any
203 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000204 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000205 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000206 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207 finally:
208 self.unfakehttp()
209
Senthil Kumarande0eb242010-08-01 17:53:37 +0000210 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000211 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000212 try:
213 fp = urlopen("http://user:pass@python.org/")
214 self.assertEqual(fp.readline(), b"Hello!")
215 self.assertEqual(fp.readline(), b"")
216 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
217 self.assertEqual(fp.getcode(), 200)
218 finally:
219 self.unfakehttp()
220
Brett Cannon19691362003-04-29 05:08:06 +0000221class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000222 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000223
Brett Cannon19691362003-04-29 05:08:06 +0000224 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000225 # Create a list of temporary files. Each item in the list is a file
226 # name (absolute path or relative to the current working directory).
227 # All files in this list will be deleted in the tearDown method. Note,
228 # this only helps to makes sure temporary files get deleted, but it
229 # does nothing about trying to close files that may still be open. It
230 # is the responsibility of the developer to properly close files even
231 # when exceptional conditions occur.
232 self.tempFiles = []
233
Brett Cannon19691362003-04-29 05:08:06 +0000234 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000235 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000236 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000237 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000238 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000239 FILE.write(self.text)
240 FILE.close()
241 finally:
242 try: FILE.close()
243 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000244
245 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000246 # Delete the temporary files.
247 for each in self.tempFiles:
248 try: os.remove(each)
249 except: pass
250
251 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000252 filePath = os.path.abspath(filePath)
253 try:
254 filePath.encode("utf8")
255 except UnicodeEncodeError:
256 raise unittest.SkipTest("filePath is not encodable to utf8")
257 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000258
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000259 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000260 """Creates a new temporary file containing the specified data,
261 registers the file for deletion during the test fixture tear down, and
262 returns the absolute path of the file."""
263
264 newFd, newFilePath = tempfile.mkstemp()
265 try:
266 self.registerFileForCleanUp(newFilePath)
267 newFile = os.fdopen(newFd, "wb")
268 newFile.write(data)
269 newFile.close()
270 finally:
271 try: newFile.close()
272 except: pass
273 return newFilePath
274
275 def registerFileForCleanUp(self, fileName):
276 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000277
278 def test_basic(self):
279 # Make sure that a local file just gets its own location returned and
280 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000281 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000282 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000283 self.assertIsInstance(result[1], email.message.Message,
284 "did not get a email.message.Message instance "
285 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000286
287 def test_copy(self):
288 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000289 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000290 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000291 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000292 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000293 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000294 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000295 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000296 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000297 try:
298 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000299 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000300 finally:
301 try: FILE.close()
302 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000303 self.assertEqual(self.text, text)
304
305 def test_reporthook(self):
306 # Make sure that the reporthook works.
307 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000308 self.assertIsInstance(count, int)
309 self.assertIsInstance(block_size, int)
310 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000311 self.assertEqual(count, count_holder[0])
312 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000313 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000314 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000315 urllib.request.urlretrieve(
316 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000317 second_temp, hooktester)
318
319 def test_reporthook_0_bytes(self):
320 # Test on zero length file. Should call reporthook only 1 time.
321 report = []
322 def hooktester(count, block_size, total_size, _report=report):
323 _report.append((count, block_size, total_size))
324 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000325 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000326 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000327 self.assertEqual(len(report), 1)
328 self.assertEqual(report[0][2], 0)
329
330 def test_reporthook_5_bytes(self):
331 # Test on 5 byte file. Should call reporthook only 2 times (once when
332 # the "network connection" is established and once when the block is
333 # read). Since the block size is 8192 bytes, only one block read is
334 # required to read the entire file.
335 report = []
336 def hooktester(count, block_size, total_size, _report=report):
337 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000338 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000339 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000340 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000341 self.assertEqual(len(report), 2)
342 self.assertEqual(report[0][1], 8192)
343 self.assertEqual(report[0][2], 5)
344
345 def test_reporthook_8193_bytes(self):
346 # Test on 8193 byte file. Should call reporthook only 3 times (once
347 # when the "network connection" is established, once for the next 8192
348 # bytes, and once for the last byte).
349 report = []
350 def hooktester(count, block_size, total_size, _report=report):
351 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000352 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000353 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000354 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000355 self.assertEqual(len(report), 3)
356 self.assertEqual(report[0][1], 8192)
357 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000358
Brett Cannon74bfd702003-04-25 09:39:47 +0000359class QuotingTests(unittest.TestCase):
360 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000361
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000362 According to RFC 2396 (Uniform Resource Identifiers), to escape a
363 character you write it as '%' + <2 character US-ASCII hex value>.
364 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
365 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000366
367 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000368
Brett Cannon74bfd702003-04-25 09:39:47 +0000369 Reserved characters : ";/?:@&=+$,"
370 Have special meaning in URIs and must be escaped if not being used for
371 their special meaning
372 Data characters : letters, digits, and "-_.!~*'()"
373 Unreserved and do not need to be escaped; can be, though, if desired
374 Control characters : 0x00 - 0x1F, 0x7F
375 Have no use in URIs so must be escaped
376 space : 0x20
377 Must be escaped
378 Delimiters : '<>#%"'
379 Must be escaped
380 Unwise : "{}|\^[]`"
381 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000382
Brett Cannon74bfd702003-04-25 09:39:47 +0000383 """
384
385 def test_never_quote(self):
386 # Make sure quote() does not quote letters, digits, and "_,.-"
387 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
388 "abcdefghijklmnopqrstuvwxyz",
389 "0123456789",
390 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000391 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000392 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000393 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000394 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000395 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000396 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000397
398 def test_default_safe(self):
399 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000401
402 def test_safe(self):
403 # Test setting 'safe' parameter does what it should do
404 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000405 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000406 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000407 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000408 result = urllib.parse.quote_plus(quote_by_default,
409 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000410 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000411 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000412 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000413 # Safe expressed as bytes rather than str
414 result = urllib.parse.quote(quote_by_default, safe=b"<>")
415 self.assertEqual(quote_by_default, result,
416 "using quote(): %r != %r" % (quote_by_default, result))
417 # "Safe" non-ASCII characters should have no effect
418 # (Since URIs are not allowed to have non-ASCII characters)
419 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
420 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
421 self.assertEqual(expect, result,
422 "using quote(): %r != %r" %
423 (expect, result))
424 # Same as above, but using a bytes rather than str
425 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
426 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
427 self.assertEqual(expect, result,
428 "using quote(): %r != %r" %
429 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000430
431 def test_default_quoting(self):
432 # Make sure all characters that should be quoted are by default sans
433 # space (separate test for that).
434 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
435 should_quote.append('<>#%"{}|\^[]`')
436 should_quote.append(chr(127)) # For 0x7F
437 should_quote = ''.join(should_quote)
438 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000439 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000440 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000441 "using quote(): "
442 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000443 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000444 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000445 self.assertEqual(hexescape(char), result,
446 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000447 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000448 (char, hexescape(char), result))
449 del should_quote
450 partial_quote = "ab[]cd"
451 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000452 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000453 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000454 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000455 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000456 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000457
458 def test_quoting_space(self):
459 # Make sure quote() and quote_plus() handle spaces as specified in
460 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000461 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000462 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000463 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000464 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000465 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000466 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000467 given = "a b cd e f"
468 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000469 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000470 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000471 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000472 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000473 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000474 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000475 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000476
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000477 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000478 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000479 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000480 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000481 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000482 # Test with bytes
483 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
484 'alpha%2Bbeta+gamma')
485 # Test with safe bytes
486 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
487 'alpha+beta+gamma')
488
489 def test_quote_bytes(self):
490 # Bytes should quote directly to percent-encoded values
491 given = b"\xa2\xd8ab\xff"
492 expect = "%A2%D8ab%FF"
493 result = urllib.parse.quote(given)
494 self.assertEqual(expect, result,
495 "using quote(): %r != %r" % (expect, result))
496 # Encoding argument should raise type error on bytes input
497 self.assertRaises(TypeError, urllib.parse.quote, given,
498 encoding="latin-1")
499 # quote_from_bytes should work the same
500 result = urllib.parse.quote_from_bytes(given)
501 self.assertEqual(expect, result,
502 "using quote_from_bytes(): %r != %r"
503 % (expect, result))
504
505 def test_quote_with_unicode(self):
506 # Characters in Latin-1 range, encoded by default in UTF-8
507 given = "\xa2\xd8ab\xff"
508 expect = "%C2%A2%C3%98ab%C3%BF"
509 result = urllib.parse.quote(given)
510 self.assertEqual(expect, result,
511 "using quote(): %r != %r" % (expect, result))
512 # Characters in Latin-1 range, encoded by with None (default)
513 result = urllib.parse.quote(given, encoding=None, errors=None)
514 self.assertEqual(expect, result,
515 "using quote(): %r != %r" % (expect, result))
516 # Characters in Latin-1 range, encoded with Latin-1
517 given = "\xa2\xd8ab\xff"
518 expect = "%A2%D8ab%FF"
519 result = urllib.parse.quote(given, encoding="latin-1")
520 self.assertEqual(expect, result,
521 "using quote(): %r != %r" % (expect, result))
522 # Characters in BMP, encoded by default in UTF-8
523 given = "\u6f22\u5b57" # "Kanji"
524 expect = "%E6%BC%A2%E5%AD%97"
525 result = urllib.parse.quote(given)
526 self.assertEqual(expect, result,
527 "using quote(): %r != %r" % (expect, result))
528 # Characters in BMP, encoded with Latin-1
529 given = "\u6f22\u5b57"
530 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
531 encoding="latin-1")
532 # Characters in BMP, encoded with Latin-1, with replace error handling
533 given = "\u6f22\u5b57"
534 expect = "%3F%3F" # "??"
535 result = urllib.parse.quote(given, encoding="latin-1",
536 errors="replace")
537 self.assertEqual(expect, result,
538 "using quote(): %r != %r" % (expect, result))
539 # Characters in BMP, Latin-1, with xmlcharref error handling
540 given = "\u6f22\u5b57"
541 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
542 result = urllib.parse.quote(given, encoding="latin-1",
543 errors="xmlcharrefreplace")
544 self.assertEqual(expect, result,
545 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000546
Georg Brandlfaf41492009-05-26 18:31:11 +0000547 def test_quote_plus_with_unicode(self):
548 # Encoding (latin-1) test for quote_plus
549 given = "\xa2\xd8 \xff"
550 expect = "%A2%D8+%FF"
551 result = urllib.parse.quote_plus(given, encoding="latin-1")
552 self.assertEqual(expect, result,
553 "using quote_plus(): %r != %r" % (expect, result))
554 # Errors test for quote_plus
555 given = "ab\u6f22\u5b57 cd"
556 expect = "ab%3F%3F+cd"
557 result = urllib.parse.quote_plus(given, encoding="latin-1",
558 errors="replace")
559 self.assertEqual(expect, result,
560 "using quote_plus(): %r != %r" % (expect, result))
561
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000562
Brett Cannon74bfd702003-04-25 09:39:47 +0000563class UnquotingTests(unittest.TestCase):
564 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000565
Brett Cannon74bfd702003-04-25 09:39:47 +0000566 See the doc string for quoting_Tests for details on quoting and such.
567
568 """
569
570 def test_unquoting(self):
571 # Make sure unquoting of all ASCII values works
572 escape_list = []
573 for num in range(128):
574 given = hexescape(chr(num))
575 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000576 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000577 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000578 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000579 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000580 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000581 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000582 (expect, result))
583 escape_list.append(given)
584 escape_string = ''.join(escape_list)
585 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000586 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000587 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000588 "using unquote(): not all characters escaped: "
589 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000590 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
591 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000592 with support.check_warnings(('', BytesWarning), quiet=True):
593 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000594
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000595 def test_unquoting_badpercent(self):
596 # Test unquoting on bad percent-escapes
597 given = '%xab'
598 expect = given
599 result = urllib.parse.unquote(given)
600 self.assertEqual(expect, result, "using unquote(): %r != %r"
601 % (expect, result))
602 given = '%x'
603 expect = given
604 result = urllib.parse.unquote(given)
605 self.assertEqual(expect, result, "using unquote(): %r != %r"
606 % (expect, result))
607 given = '%'
608 expect = given
609 result = urllib.parse.unquote(given)
610 self.assertEqual(expect, result, "using unquote(): %r != %r"
611 % (expect, result))
612 # unquote_to_bytes
613 given = '%xab'
614 expect = bytes(given, 'ascii')
615 result = urllib.parse.unquote_to_bytes(given)
616 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
617 % (expect, result))
618 given = '%x'
619 expect = bytes(given, 'ascii')
620 result = urllib.parse.unquote_to_bytes(given)
621 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
622 % (expect, result))
623 given = '%'
624 expect = bytes(given, 'ascii')
625 result = urllib.parse.unquote_to_bytes(given)
626 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
627 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000628 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
629 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000630
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000631 def test_unquoting_mixed_case(self):
632 # Test unquoting on mixed-case hex digits in the percent-escapes
633 given = '%Ab%eA'
634 expect = b'\xab\xea'
635 result = urllib.parse.unquote_to_bytes(given)
636 self.assertEqual(expect, result,
637 "using unquote_to_bytes(): %r != %r"
638 % (expect, result))
639
Brett Cannon74bfd702003-04-25 09:39:47 +0000640 def test_unquoting_parts(self):
641 # Make sure unquoting works when have non-quoted characters
642 # interspersed
643 given = 'ab%sd' % hexescape('c')
644 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000645 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000646 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000647 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000648 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000649 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000650 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000651
Brett Cannon74bfd702003-04-25 09:39:47 +0000652 def test_unquoting_plus(self):
653 # Test difference between unquote() and unquote_plus()
654 given = "are+there+spaces..."
655 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000656 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000657 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000658 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000659 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000660 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000661 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000662 "using unquote_plus(): %r != %r" % (expect, result))
663
664 def test_unquote_to_bytes(self):
665 given = 'br%C3%BCckner_sapporo_20050930.doc'
666 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
667 result = urllib.parse.unquote_to_bytes(given)
668 self.assertEqual(expect, result,
669 "using unquote_to_bytes(): %r != %r"
670 % (expect, result))
671 # Test on a string with unescaped non-ASCII characters
672 # (Technically an invalid URI; expect those characters to be UTF-8
673 # encoded).
674 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
675 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
676 self.assertEqual(expect, result,
677 "using unquote_to_bytes(): %r != %r"
678 % (expect, result))
679 # Test with a bytes as input
680 given = b'%A2%D8ab%FF'
681 expect = b'\xa2\xd8ab\xff'
682 result = urllib.parse.unquote_to_bytes(given)
683 self.assertEqual(expect, result,
684 "using unquote_to_bytes(): %r != %r"
685 % (expect, result))
686 # Test with a bytes as input, with unescaped non-ASCII bytes
687 # (Technically an invalid URI; expect those bytes to be preserved)
688 given = b'%A2\xd8ab%FF'
689 expect = b'\xa2\xd8ab\xff'
690 result = urllib.parse.unquote_to_bytes(given)
691 self.assertEqual(expect, result,
692 "using unquote_to_bytes(): %r != %r"
693 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000694
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000695 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000696 # Characters in the Latin-1 range, encoded with UTF-8
697 given = 'br%C3%BCckner_sapporo_20050930.doc'
698 expect = 'br\u00fcckner_sapporo_20050930.doc'
699 result = urllib.parse.unquote(given)
700 self.assertEqual(expect, result,
701 "using unquote(): %r != %r" % (expect, result))
702 # Characters in the Latin-1 range, encoded with None (default)
703 result = urllib.parse.unquote(given, encoding=None, errors=None)
704 self.assertEqual(expect, result,
705 "using unquote(): %r != %r" % (expect, result))
706
707 # Characters in the Latin-1 range, encoded with Latin-1
708 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
709 encoding="latin-1")
710 expect = 'br\u00fcckner_sapporo_20050930.doc'
711 self.assertEqual(expect, result,
712 "using unquote(): %r != %r" % (expect, result))
713
714 # Characters in BMP, encoded with UTF-8
715 given = "%E6%BC%A2%E5%AD%97"
716 expect = "\u6f22\u5b57" # "Kanji"
717 result = urllib.parse.unquote(given)
718 self.assertEqual(expect, result,
719 "using unquote(): %r != %r" % (expect, result))
720
721 # Decode with UTF-8, invalid sequence
722 given = "%F3%B1"
723 expect = "\ufffd" # Replacement character
724 result = urllib.parse.unquote(given)
725 self.assertEqual(expect, result,
726 "using unquote(): %r != %r" % (expect, result))
727
728 # Decode with UTF-8, invalid sequence, replace errors
729 result = urllib.parse.unquote(given, errors="replace")
730 self.assertEqual(expect, result,
731 "using unquote(): %r != %r" % (expect, result))
732
733 # Decode with UTF-8, invalid sequence, ignoring errors
734 given = "%F3%B1"
735 expect = ""
736 result = urllib.parse.unquote(given, errors="ignore")
737 self.assertEqual(expect, result,
738 "using unquote(): %r != %r" % (expect, result))
739
740 # A mix of non-ASCII and percent-encoded characters, UTF-8
741 result = urllib.parse.unquote("\u6f22%C3%BC")
742 expect = '\u6f22\u00fc'
743 self.assertEqual(expect, result,
744 "using unquote(): %r != %r" % (expect, result))
745
746 # A mix of non-ASCII and percent-encoded characters, Latin-1
747 # (Note, the string contains non-Latin-1-representable characters)
748 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
749 expect = '\u6f22\u00fc'
750 self.assertEqual(expect, result,
751 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000752
Brett Cannon74bfd702003-04-25 09:39:47 +0000753class urlencode_Tests(unittest.TestCase):
754 """Tests for urlencode()"""
755
756 def help_inputtype(self, given, test_type):
757 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000758
Brett Cannon74bfd702003-04-25 09:39:47 +0000759 'given' must lead to only the pairs:
760 * 1st, 1
761 * 2nd, 2
762 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000763
Brett Cannon74bfd702003-04-25 09:39:47 +0000764 Test cannot assume anything about order. Docs make no guarantee and
765 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000766
Brett Cannon74bfd702003-04-25 09:39:47 +0000767 """
768 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000769 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000770 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000771 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000772 "testing %s: %s not found in %s" %
773 (test_type, expected, result))
774 self.assertEqual(result.count('&'), 2,
775 "testing %s: expected 2 '&'s; got %s" %
776 (test_type, result.count('&')))
777 amp_location = result.index('&')
778 on_amp_left = result[amp_location - 1]
779 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000780 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000781 "testing %s: '&' not located in proper place in %s" %
782 (test_type, result))
783 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
784 "testing %s: "
785 "unexpected number of characters: %s != %s" %
786 (test_type, len(result), (5 * 3) + 2))
787
788 def test_using_mapping(self):
789 # Test passing in a mapping object as an argument.
790 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
791 "using dict as input type")
792
793 def test_using_sequence(self):
794 # Test passing in a sequence of two-item sequences as an argument.
795 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
796 "using sequence of two-item tuples as input")
797
798 def test_quoting(self):
799 # Make sure keys and values are quoted using quote_plus()
800 given = {"&":"="}
801 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000803 self.assertEqual(expect, result)
804 given = {"key name":"A bunch of pluses"}
805 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000806 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000807 self.assertEqual(expect, result)
808
809 def test_doseq(self):
810 # Test that passing True for 'doseq' parameter works correctly
811 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
813 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000814 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 for value in given["sequence"]:
817 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000818 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000819 self.assertEqual(result.count('&'), 2,
820 "Expected 2 '&'s, got %s" % result.count('&'))
821
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000822 def test_empty_sequence(self):
823 self.assertEqual("", urllib.parse.urlencode({}))
824 self.assertEqual("", urllib.parse.urlencode([]))
825
826 def test_nonstring_values(self):
827 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
828 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
829
830 def test_nonstring_seq_values(self):
831 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
832 self.assertEqual("a=None&a=a",
833 urllib.parse.urlencode({"a": [None, "a"]}, True))
834 self.assertEqual("a=a&a=b",
835 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
836
Senthil Kumarandf022da2010-07-03 17:48:22 +0000837 def test_urlencode_encoding(self):
838 # ASCII encoding. Expect %3F with errors="replace'
839 given = (('\u00a0', '\u00c1'),)
840 expect = '%3F=%3F'
841 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
842 self.assertEqual(expect, result)
843
844 # Default is UTF-8 encoding.
845 given = (('\u00a0', '\u00c1'),)
846 expect = '%C2%A0=%C3%81'
847 result = urllib.parse.urlencode(given)
848 self.assertEqual(expect, result)
849
850 # Latin-1 encoding.
851 given = (('\u00a0', '\u00c1'),)
852 expect = '%A0=%C1'
853 result = urllib.parse.urlencode(given, encoding="latin-1")
854 self.assertEqual(expect, result)
855
856 def test_urlencode_encoding_doseq(self):
857 # ASCII Encoding. Expect %3F with errors="replace'
858 given = (('\u00a0', '\u00c1'),)
859 expect = '%3F=%3F'
860 result = urllib.parse.urlencode(given, doseq=True,
861 encoding="ASCII", errors="replace")
862 self.assertEqual(expect, result)
863
864 # ASCII Encoding. On a sequence of values.
865 given = (("\u00a0", (1, "\u00c1")),)
866 expect = '%3F=1&%3F=%3F'
867 result = urllib.parse.urlencode(given, True,
868 encoding="ASCII", errors="replace")
869 self.assertEqual(expect, result)
870
871 # Utf-8
872 given = (("\u00a0", "\u00c1"),)
873 expect = '%C2%A0=%C3%81'
874 result = urllib.parse.urlencode(given, True)
875 self.assertEqual(expect, result)
876
877 given = (("\u00a0", (42, "\u00c1")),)
878 expect = '%C2%A0=42&%C2%A0=%C3%81'
879 result = urllib.parse.urlencode(given, True)
880 self.assertEqual(expect, result)
881
882 # latin-1
883 given = (("\u00a0", "\u00c1"),)
884 expect = '%A0=%C1'
885 result = urllib.parse.urlencode(given, True, encoding="latin-1")
886 self.assertEqual(expect, result)
887
888 given = (("\u00a0", (42, "\u00c1")),)
889 expect = '%A0=42&%A0=%C1'
890 result = urllib.parse.urlencode(given, True, encoding="latin-1")
891 self.assertEqual(expect, result)
892
893 def test_urlencode_bytes(self):
894 given = ((b'\xa0\x24', b'\xc1\x24'),)
895 expect = '%A0%24=%C1%24'
896 result = urllib.parse.urlencode(given)
897 self.assertEqual(expect, result)
898 result = urllib.parse.urlencode(given, True)
899 self.assertEqual(expect, result)
900
901 # Sequence of values
902 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
903 expect = '%A0%24=42&%A0%24=%C1%24'
904 result = urllib.parse.urlencode(given, True)
905 self.assertEqual(expect, result)
906
907 def test_urlencode_encoding_safe_parameter(self):
908
909 # Send '$' (\x24) as safe character
910 # Default utf-8 encoding
911
912 given = ((b'\xa0\x24', b'\xc1\x24'),)
913 result = urllib.parse.urlencode(given, safe=":$")
914 expect = '%A0$=%C1$'
915 self.assertEqual(expect, result)
916
917 given = ((b'\xa0\x24', b'\xc1\x24'),)
918 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
919 expect = '%A0$=%C1$'
920 self.assertEqual(expect, result)
921
922 # Safe parameter in sequence
923 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
924 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
925 result = urllib.parse.urlencode(given, True, safe=":$")
926 self.assertEqual(expect, result)
927
928 # Test all above in latin-1 encoding
929
930 given = ((b'\xa0\x24', b'\xc1\x24'),)
931 result = urllib.parse.urlencode(given, safe=":$",
932 encoding="latin-1")
933 expect = '%A0$=%C1$'
934 self.assertEqual(expect, result)
935
936 given = ((b'\xa0\x24', b'\xc1\x24'),)
937 expect = '%A0$=%C1$'
938 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
939 encoding="latin-1")
940
941 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
942 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
943 result = urllib.parse.urlencode(given, True, safe=":$",
944 encoding="latin-1")
945 self.assertEqual(expect, result)
946
Brett Cannon74bfd702003-04-25 09:39:47 +0000947class Pathname_Tests(unittest.TestCase):
948 """Test pathname2url() and url2pathname()"""
949
950 def test_basic(self):
951 # Make sure simple tests pass
952 expected_path = os.path.join("parts", "of", "a", "path")
953 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000954 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000955 self.assertEqual(expected_url, result,
956 "pathname2url() failed; %s != %s" %
957 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000958 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000959 self.assertEqual(expected_path, result,
960 "url2pathame() failed; %s != %s" %
961 (result, expected_path))
962
963 def test_quoting(self):
964 # Test automatic quoting and unquoting works for pathnam2url() and
965 # url2pathname() respectively
966 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000967 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
968 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000969 self.assertEqual(expect, result,
970 "pathname2url() failed; %s != %s" %
971 (expect, result))
972 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000973 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000974 self.assertEqual(expect, result,
975 "url2pathname() failed; %s != %s" %
976 (expect, result))
977 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000978 expect = "%s/using_quote" % urllib.parse.quote("make sure")
979 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000980 self.assertEqual(expect, result,
981 "pathname2url() failed; %s != %s" %
982 (expect, result))
983 given = "make+sure/using_unquote"
984 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000985 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000986 self.assertEqual(expect, result,
987 "url2pathname() failed; %s != %s" %
988 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000989
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000990class Utility_Tests(unittest.TestCase):
991 """Testcase to test the various utility functions in the urllib."""
992
993 def test_splitpasswd(self):
994 """Some of password examples are not sensible, but it is added to
995 confirming to RFC2617 and addressing issue4675.
996 """
997 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
998 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
999 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1000 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1001 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1002 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1003 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1004
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001005
1006class URLopener_Tests(unittest.TestCase):
1007 """Testcase to test the open method of URLopener class."""
1008
1009 def test_quoted_open(self):
1010 class DummyURLopener(urllib.request.URLopener):
1011 def open_spam(self, url):
1012 return url
1013
1014 self.assertEqual(DummyURLopener().open(
1015 'spam://example/ /'),'//example/%20/')
1016
Senthil Kumaran734f0592010-02-20 22:19:04 +00001017 # test the safe characters are not quoted by urlopen
1018 self.assertEqual(DummyURLopener().open(
1019 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1020 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1021
Guido van Rossume7ba4952007-06-06 23:52:48 +00001022# Just commented them out.
1023# Can't really tell why keep failing in windows and sparc.
1024# Everywhere else they work ok, but on those machines, someteimes
1025# fail in one of the tests, sometimes in other. I have a linux, and
1026# the tests go ok.
1027# If anybody has one of the problematic enviroments, please help!
1028# . Facundo
1029#
1030# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001031# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001032# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1033# serv.settimeout(3)
1034# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1035# serv.bind(("", 9093))
1036# serv.listen(5)
1037# try:
1038# conn, addr = serv.accept()
1039# conn.send("1 Hola mundo\n")
1040# cantdata = 0
1041# while cantdata < 13:
1042# data = conn.recv(13-cantdata)
1043# cantdata += len(data)
1044# time.sleep(.3)
1045# conn.send("2 No more lines\n")
1046# conn.close()
1047# except socket.timeout:
1048# pass
1049# finally:
1050# serv.close()
1051# evt.set()
1052#
1053# class FTPWrapperTests(unittest.TestCase):
1054#
1055# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001056# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001057# ftplib.FTP.port = 9093
1058# self.evt = threading.Event()
1059# threading.Thread(target=server, args=(self.evt,)).start()
1060# time.sleep(.1)
1061#
1062# def tearDown(self):
1063# self.evt.wait()
1064#
1065# def testBasic(self):
1066# # connects
1067# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001068# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001069#
1070# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001071# # global default timeout is ignored
1072# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001073# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001074# socket.setdefaulttimeout(30)
1075# try:
1076# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1077# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001078# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001079# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001080# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001081#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001082# def testTimeoutDefault(self):
1083# # global default timeout is used
1084# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001085# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001086# socket.setdefaulttimeout(30)
1087# try:
1088# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1089# finally:
1090# socket.setdefaulttimeout(None)
1091# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1092# ftp.close()
1093#
1094# def testTimeoutValue(self):
1095# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1096# timeout=30)
1097# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1098# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001099
Skip Montanaro080c9972001-01-28 21:12:22 +00001100
1101
Brett Cannon74bfd702003-04-25 09:39:47 +00001102def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001103 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001104 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001105 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001106 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001107 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001108 QuotingTests,
1109 UnquotingTests,
1110 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001111 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001112 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001113 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001114 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001115 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001116
1117
1118
1119if __name__ == '__main__':
1120 test_main()