blob: 4d3509ae7d2f1726910745f4a9a9f1a06ed39af8 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Florent Xicluna99e472e2010-08-14 23:12:27 +000014import warnings
Jeremy Hylton6102e292000-08-31 15:48:10 +000015
Brett Cannon74bfd702003-04-25 09:39:47 +000016def hexescape(char):
17 """Escape char as RFC 2396 specifies"""
18 hex_repr = hex(ord(char))[2:].upper()
19 if len(hex_repr) == 1:
20 hex_repr = "0%s" % hex_repr
21 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000022
Jeremy Hylton1afc1692008-06-18 20:49:58 +000023# Shortcut for testing FancyURLopener
24_urlopener = None
25def urlopen(url, data=None, proxies=None):
26 """urlopen(url [, data]) -> open file-like object"""
27 global _urlopener
28 if proxies is not None:
29 opener = urllib.request.FancyURLopener(proxies=proxies)
30 elif not _urlopener:
31 opener = urllib.request.FancyURLopener()
32 _urlopener = opener
33 else:
34 opener = _urlopener
35 if data is None:
36 return opener.open(url)
37 else:
38 return opener.open(url, data)
39
Brett Cannon74bfd702003-04-25 09:39:47 +000040class urlopen_FileTests(unittest.TestCase):
41 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000042
Brett Cannon74bfd702003-04-25 09:39:47 +000043 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000044 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000045
Brett Cannon74bfd702003-04-25 09:39:47 +000046 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000047
Brett Cannon74bfd702003-04-25 09:39:47 +000048 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 # Create a temp file to use for testing
50 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
51 "ascii")
52 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000053 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000054 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000055 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000056 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000057 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000058 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000059
Brett Cannon74bfd702003-04-25 09:39:47 +000060 def tearDown(self):
61 """Shut down the open object"""
62 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000063 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000064
Brett Cannon74bfd702003-04-25 09:39:47 +000065 def test_interface(self):
66 # Make sure object returned by urlopen() has the specified methods
67 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000068 "close", "info", "geturl", "getcode", "__iter__"):
Georg Brandlab91fde2009-08-13 08:51:18 +000069 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000070 "object returned by urlopen() lacks %s attribute" %
71 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000072
Brett Cannon74bfd702003-04-25 09:39:47 +000073 def test_read(self):
74 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000075
Brett Cannon74bfd702003-04-25 09:39:47 +000076 def test_readline(self):
77 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000078 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000079 "calling readline() after exhausting the file did not"
80 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000081
Brett Cannon74bfd702003-04-25 09:39:47 +000082 def test_readlines(self):
83 lines_list = self.returned_obj.readlines()
84 self.assertEqual(len(lines_list), 1,
85 "readlines() returned the wrong number of lines")
86 self.assertEqual(lines_list[0], self.text,
87 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000088
Brett Cannon74bfd702003-04-25 09:39:47 +000089 def test_fileno(self):
90 file_num = self.returned_obj.fileno()
Georg Brandlab91fde2009-08-13 08:51:18 +000091 self.assertTrue(isinstance(file_num, int),
Brett Cannon74bfd702003-04-25 09:39:47 +000092 "fileno() did not return an int")
93 self.assertEqual(os.read(file_num, len(self.text)), self.text,
94 "Reading on the file descriptor returned by fileno() "
95 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000096
Brett Cannon74bfd702003-04-25 09:39:47 +000097 def test_close(self):
98 # Test close() by calling it hear and then having it be called again
99 # by the tearDown() method for the test
100 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000101
Brett Cannon74bfd702003-04-25 09:39:47 +0000102 def test_info(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000103 self.assertTrue(isinstance(self.returned_obj.info(), email.message.Message))
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000104
Brett Cannon74bfd702003-04-25 09:39:47 +0000105 def test_geturl(self):
106 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000107
Christian Heimes9bd667a2008-01-20 15:14:11 +0000108 def test_getcode(self):
Florent Xiclunab4efb3d2010-08-14 18:24:40 +0000109 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000110
Brett Cannon74bfd702003-04-25 09:39:47 +0000111 def test_iter(self):
112 # Test iterator
113 # Don't need to count number of iterations since test would fail the
114 # instant it returned anything beyond the first line from the
115 # comparison
116 for line in self.returned_obj.__iter__():
117 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000118
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000119class ProxyTests(unittest.TestCase):
120
121 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000122 # Records changes to env vars
123 self.env = support.EnvironmentVarGuard()
Benjamin Petersonffeda292010-01-09 18:48:46 +0000124 # Delete all proxy related env vars
Antoine Pitrouf8827ae2010-10-14 18:40:02 +0000125 for k in list(os.environ):
126 if 'proxy' in k.lower():
Benjamin Petersonffeda292010-01-09 18:48:46 +0000127 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000128
129 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000130 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000131 self.env.__exit__()
132 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000133
134 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000135 self.env.set('NO_PROXY', 'localhost')
136 proxies = urllib.request.getproxies_environment()
137 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xiclunab4efb3d2010-08-14 18:24:40 +0000138 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000139
140
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000141class urlopen_HttpTests(unittest.TestCase):
142 """Test urlopen() opening a fake http connection."""
143
144 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000145 class FakeSocket(io.BytesIO):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000146 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000147 def makefile(self, *args, **kwds):
148 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000149 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000150 if self.closed: return b""
151 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000152 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000153 if self.closed: return b""
154 return io.BytesIO.readline(self, length)
Georg Brandl24420152008-05-26 16:32:26 +0000155 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000156 def connect(self):
157 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000158 self._connection_class = http.client.HTTPConnection
159 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000160
161 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000162 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000163
164 def test_read(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000165 self.fakehttp(b"Hello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000166 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000167 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000168 self.assertEqual(fp.readline(), b"Hello!")
169 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000170 self.assertEqual(fp.geturl(), 'http://python.org/')
171 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000172 finally:
173 self.unfakehttp()
174
Senthil Kumaran26430412011-04-13 07:01:19 +0800175 def test_url_fragment(self):
176 # Issue #11703: geturl() omits fragments in the original URL.
177 url = 'http://docs.python.org/library/urllib.html#OK'
178 self.fakehttp(b'Hello!')
179 try:
180 fp = urllib.request.urlopen(url)
181 self.assertEqual(fp.geturl(), url)
182 finally:
183 self.unfakehttp()
184
Christian Heimes57dddfb2008-01-02 18:30:52 +0000185 def test_read_bogus(self):
186 # urlopen() should raise IOError for many error codes.
187 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
188Date: Wed, 02 Jan 2008 03:03:54 GMT
189Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
190Connection: close
191Content-Type: text/html; charset=iso-8859-1
192''')
193 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000194 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000195 finally:
196 self.unfakehttp()
197
guido@google.coma119df92011-03-29 11:41:02 -0700198 def test_invalid_redirect(self):
199 # urlopen() should raise IOError for many error codes.
200 self.fakehttp(b'''HTTP/1.1 302 Found
201Date: Wed, 02 Jan 2008 03:03:54 GMT
202Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
203Location: file://guidocomputer.athome.com:/python/license
204Connection: close
205Content-Type: text/html; charset=iso-8859-1
206''')
207 try:
208 self.assertRaises(urllib.error.HTTPError, urlopen,
209 "http://python.org/")
210 finally:
211 self.unfakehttp()
212
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000214 # urlopen() raises IOError if the underlying socket does not send any
215 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000216 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000217 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000218 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219 finally:
220 self.unfakehttp()
221
Senthil Kumaranafef78f2010-08-01 17:55:50 +0000222 def test_userpass_inurl(self):
223 self.fakehttp(b"Hello!")
224 try:
225 fp = urlopen("http://user:pass@python.org/")
226 self.assertEqual(fp.readline(), b"Hello!")
227 self.assertEqual(fp.readline(), b"")
228 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
229 self.assertEqual(fp.getcode(), 200)
230 finally:
231 self.unfakehttp()
232
Brett Cannon19691362003-04-29 05:08:06 +0000233class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000234 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000235
Brett Cannon19691362003-04-29 05:08:06 +0000236 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000237 # Create a list of temporary files. Each item in the list is a file
238 # name (absolute path or relative to the current working directory).
239 # All files in this list will be deleted in the tearDown method. Note,
240 # this only helps to makes sure temporary files get deleted, but it
241 # does nothing about trying to close files that may still be open. It
242 # is the responsibility of the developer to properly close files even
243 # when exceptional conditions occur.
244 self.tempFiles = []
245
Brett Cannon19691362003-04-29 05:08:06 +0000246 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000247 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000248 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000249 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000250 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000251 FILE.write(self.text)
252 FILE.close()
253 finally:
254 try: FILE.close()
255 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000256
257 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000258 # Delete the temporary files.
259 for each in self.tempFiles:
260 try: os.remove(each)
261 except: pass
262
263 def constructLocalFileUrl(self, filePath):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000264 return "file://%s" % urllib.request.pathname2url(
265 os.path.abspath(filePath))
Georg Brandl5a650a22005-08-26 08:51:34 +0000266
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000267 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000268 """Creates a new temporary file containing the specified data,
269 registers the file for deletion during the test fixture tear down, and
270 returns the absolute path of the file."""
271
272 newFd, newFilePath = tempfile.mkstemp()
273 try:
274 self.registerFileForCleanUp(newFilePath)
275 newFile = os.fdopen(newFd, "wb")
276 newFile.write(data)
277 newFile.close()
278 finally:
279 try: newFile.close()
280 except: pass
281 return newFilePath
282
283 def registerFileForCleanUp(self, fileName):
284 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000285
286 def test_basic(self):
287 # Make sure that a local file just gets its own location returned and
288 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000289 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000290 self.assertEqual(result[0], support.TESTFN)
Georg Brandlab91fde2009-08-13 08:51:18 +0000291 self.assertTrue(isinstance(result[1], email.message.Message),
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000292 "did not get a email.message.Message instance "
293 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000294
295 def test_copy(self):
296 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000297 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000298 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000299 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000300 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000301 self.assertEqual(second_temp, result[0])
Georg Brandlab91fde2009-08-13 08:51:18 +0000302 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000303 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000304 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000305 try:
306 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000307 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000308 finally:
309 try: FILE.close()
310 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000311 self.assertEqual(self.text, text)
312
313 def test_reporthook(self):
314 # Make sure that the reporthook works.
315 def hooktester(count, block_size, total_size, count_holder=[0]):
Georg Brandlab91fde2009-08-13 08:51:18 +0000316 self.assertTrue(isinstance(count, int))
317 self.assertTrue(isinstance(block_size, int))
318 self.assertTrue(isinstance(total_size, int))
Brett Cannon19691362003-04-29 05:08:06 +0000319 self.assertEqual(count, count_holder[0])
320 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000321 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000322 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000323 urllib.request.urlretrieve(
324 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000325 second_temp, hooktester)
326
327 def test_reporthook_0_bytes(self):
328 # Test on zero length file. Should call reporthook only 1 time.
329 report = []
330 def hooktester(count, block_size, total_size, _report=report):
331 _report.append((count, block_size, total_size))
332 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000333 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000334 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000335 self.assertEqual(len(report), 1)
336 self.assertEqual(report[0][2], 0)
337
338 def test_reporthook_5_bytes(self):
339 # Test on 5 byte file. Should call reporthook only 2 times (once when
340 # the "network connection" is established and once when the block is
341 # read). Since the block size is 8192 bytes, only one block read is
342 # required to read the entire file.
343 report = []
344 def hooktester(count, block_size, total_size, _report=report):
345 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000346 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000347 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000348 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000349 self.assertEqual(len(report), 2)
350 self.assertEqual(report[0][1], 8192)
351 self.assertEqual(report[0][2], 5)
352
353 def test_reporthook_8193_bytes(self):
354 # Test on 8193 byte file. Should call reporthook only 3 times (once
355 # when the "network connection" is established, once for the next 8192
356 # bytes, and once for the last byte).
357 report = []
358 def hooktester(count, block_size, total_size, _report=report):
359 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000360 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000361 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000362 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000363 self.assertEqual(len(report), 3)
364 self.assertEqual(report[0][1], 8192)
365 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000366
Brett Cannon74bfd702003-04-25 09:39:47 +0000367class QuotingTests(unittest.TestCase):
368 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000369
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000370 According to RFC 2396 (Uniform Resource Identifiers), to escape a
371 character you write it as '%' + <2 character US-ASCII hex value>.
372 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
373 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000374
375 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000376
Brett Cannon74bfd702003-04-25 09:39:47 +0000377 Reserved characters : ";/?:@&=+$,"
378 Have special meaning in URIs and must be escaped if not being used for
379 their special meaning
380 Data characters : letters, digits, and "-_.!~*'()"
381 Unreserved and do not need to be escaped; can be, though, if desired
382 Control characters : 0x00 - 0x1F, 0x7F
383 Have no use in URIs so must be escaped
384 space : 0x20
385 Must be escaped
386 Delimiters : '<>#%"'
387 Must be escaped
388 Unwise : "{}|\^[]`"
389 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000390
Brett Cannon74bfd702003-04-25 09:39:47 +0000391 """
392
393 def test_never_quote(self):
394 # Make sure quote() does not quote letters, digits, and "_,.-"
395 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
396 "abcdefghijklmnopqrstuvwxyz",
397 "0123456789",
398 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000399 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000400 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000401 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000402 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000403 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000404 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000405
406 def test_default_safe(self):
407 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000408 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000409
410 def test_safe(self):
411 # Test setting 'safe' parameter does what it should do
412 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000413 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000414 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000415 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000416 result = urllib.parse.quote_plus(quote_by_default,
417 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000418 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000419 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000420 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000421 # Safe expressed as bytes rather than str
422 result = urllib.parse.quote(quote_by_default, safe=b"<>")
423 self.assertEqual(quote_by_default, result,
424 "using quote(): %r != %r" % (quote_by_default, result))
425 # "Safe" non-ASCII characters should have no effect
426 # (Since URIs are not allowed to have non-ASCII characters)
427 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
428 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
429 self.assertEqual(expect, result,
430 "using quote(): %r != %r" %
431 (expect, result))
432 # Same as above, but using a bytes rather than str
433 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
434 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
435 self.assertEqual(expect, result,
436 "using quote(): %r != %r" %
437 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000438
439 def test_default_quoting(self):
440 # Make sure all characters that should be quoted are by default sans
441 # space (separate test for that).
442 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
443 should_quote.append('<>#%"{}|\^[]`')
444 should_quote.append(chr(127)) # For 0x7F
445 should_quote = ''.join(should_quote)
446 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000447 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000448 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000449 "using quote(): "
450 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000451 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000452 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000453 self.assertEqual(hexescape(char), result,
454 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000455 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000456 (char, hexescape(char), result))
457 del should_quote
458 partial_quote = "ab[]cd"
459 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000460 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000461 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000462 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000463 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000464 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000465
466 def test_quoting_space(self):
467 # Make sure quote() and quote_plus() handle spaces as specified in
468 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000469 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000470 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000471 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000472 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000473 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000474 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000475 given = "a b cd e f"
476 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000477 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000478 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000479 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000480 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000481 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000482 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000483 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000484
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000485 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000487 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000489 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000490 # Test with bytes
491 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
492 'alpha%2Bbeta+gamma')
493 # Test with safe bytes
494 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
495 'alpha+beta+gamma')
496
497 def test_quote_bytes(self):
498 # Bytes should quote directly to percent-encoded values
499 given = b"\xa2\xd8ab\xff"
500 expect = "%A2%D8ab%FF"
501 result = urllib.parse.quote(given)
502 self.assertEqual(expect, result,
503 "using quote(): %r != %r" % (expect, result))
504 # Encoding argument should raise type error on bytes input
505 self.assertRaises(TypeError, urllib.parse.quote, given,
506 encoding="latin-1")
507 # quote_from_bytes should work the same
508 result = urllib.parse.quote_from_bytes(given)
509 self.assertEqual(expect, result,
510 "using quote_from_bytes(): %r != %r"
511 % (expect, result))
512
513 def test_quote_with_unicode(self):
514 # Characters in Latin-1 range, encoded by default in UTF-8
515 given = "\xa2\xd8ab\xff"
516 expect = "%C2%A2%C3%98ab%C3%BF"
517 result = urllib.parse.quote(given)
518 self.assertEqual(expect, result,
519 "using quote(): %r != %r" % (expect, result))
520 # Characters in Latin-1 range, encoded by with None (default)
521 result = urllib.parse.quote(given, encoding=None, errors=None)
522 self.assertEqual(expect, result,
523 "using quote(): %r != %r" % (expect, result))
524 # Characters in Latin-1 range, encoded with Latin-1
525 given = "\xa2\xd8ab\xff"
526 expect = "%A2%D8ab%FF"
527 result = urllib.parse.quote(given, encoding="latin-1")
528 self.assertEqual(expect, result,
529 "using quote(): %r != %r" % (expect, result))
530 # Characters in BMP, encoded by default in UTF-8
531 given = "\u6f22\u5b57" # "Kanji"
532 expect = "%E6%BC%A2%E5%AD%97"
533 result = urllib.parse.quote(given)
534 self.assertEqual(expect, result,
535 "using quote(): %r != %r" % (expect, result))
536 # Characters in BMP, encoded with Latin-1
537 given = "\u6f22\u5b57"
538 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
539 encoding="latin-1")
540 # Characters in BMP, encoded with Latin-1, with replace error handling
541 given = "\u6f22\u5b57"
542 expect = "%3F%3F" # "??"
543 result = urllib.parse.quote(given, encoding="latin-1",
544 errors="replace")
545 self.assertEqual(expect, result,
546 "using quote(): %r != %r" % (expect, result))
547 # Characters in BMP, Latin-1, with xmlcharref error handling
548 given = "\u6f22\u5b57"
549 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
550 result = urllib.parse.quote(given, encoding="latin-1",
551 errors="xmlcharrefreplace")
552 self.assertEqual(expect, result,
553 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000554
Georg Brandlfaf41492009-05-26 18:31:11 +0000555 def test_quote_plus_with_unicode(self):
556 # Encoding (latin-1) test for quote_plus
557 given = "\xa2\xd8 \xff"
558 expect = "%A2%D8+%FF"
559 result = urllib.parse.quote_plus(given, encoding="latin-1")
560 self.assertEqual(expect, result,
561 "using quote_plus(): %r != %r" % (expect, result))
562 # Errors test for quote_plus
563 given = "ab\u6f22\u5b57 cd"
564 expect = "ab%3F%3F+cd"
565 result = urllib.parse.quote_plus(given, encoding="latin-1",
566 errors="replace")
567 self.assertEqual(expect, result,
568 "using quote_plus(): %r != %r" % (expect, result))
569
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000570
Brett Cannon74bfd702003-04-25 09:39:47 +0000571class UnquotingTests(unittest.TestCase):
572 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000573
Brett Cannon74bfd702003-04-25 09:39:47 +0000574 See the doc string for quoting_Tests for details on quoting and such.
575
576 """
577
578 def test_unquoting(self):
579 # Make sure unquoting of all ASCII values works
580 escape_list = []
581 for num in range(128):
582 given = hexescape(chr(num))
583 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000584 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000585 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000586 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000587 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000588 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000589 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000590 (expect, result))
591 escape_list.append(given)
592 escape_string = ''.join(escape_list)
593 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000594 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000595 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000596 "using unquote(): not all characters escaped: "
597 "%s" % result)
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000598 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
599 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna99e472e2010-08-14 23:12:27 +0000600 with warnings.catch_warnings():
601 warnings.simplefilter('ignore', BytesWarning)
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000602 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000603
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000604 def test_unquoting_badpercent(self):
605 # Test unquoting on bad percent-escapes
606 given = '%xab'
607 expect = given
608 result = urllib.parse.unquote(given)
609 self.assertEqual(expect, result, "using unquote(): %r != %r"
610 % (expect, result))
611 given = '%x'
612 expect = given
613 result = urllib.parse.unquote(given)
614 self.assertEqual(expect, result, "using unquote(): %r != %r"
615 % (expect, result))
616 given = '%'
617 expect = given
618 result = urllib.parse.unquote(given)
619 self.assertEqual(expect, result, "using unquote(): %r != %r"
620 % (expect, result))
621 # unquote_to_bytes
622 given = '%xab'
623 expect = bytes(given, 'ascii')
624 result = urllib.parse.unquote_to_bytes(given)
625 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
626 % (expect, result))
627 given = '%x'
628 expect = bytes(given, 'ascii')
629 result = urllib.parse.unquote_to_bytes(given)
630 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
631 % (expect, result))
632 given = '%'
633 expect = bytes(given, 'ascii')
634 result = urllib.parse.unquote_to_bytes(given)
635 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
636 % (expect, result))
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000637 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
638 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000639
640 def test_unquoting_mixed_case(self):
641 # Test unquoting on mixed-case hex digits in the percent-escapes
642 given = '%Ab%eA'
643 expect = b'\xab\xea'
644 result = urllib.parse.unquote_to_bytes(given)
645 self.assertEqual(expect, result,
646 "using unquote_to_bytes(): %r != %r"
647 % (expect, result))
648
Brett Cannon74bfd702003-04-25 09:39:47 +0000649 def test_unquoting_parts(self):
650 # Make sure unquoting works when have non-quoted characters
651 # interspersed
652 given = 'ab%sd' % hexescape('c')
653 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000655 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000656 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000657 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000658 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000659 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000660
Brett Cannon74bfd702003-04-25 09:39:47 +0000661 def test_unquoting_plus(self):
662 # Test difference between unquote() and unquote_plus()
663 given = "are+there+spaces..."
664 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000665 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000666 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000667 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000668 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000669 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000670 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000671 "using unquote_plus(): %r != %r" % (expect, result))
672
673 def test_unquote_to_bytes(self):
674 given = 'br%C3%BCckner_sapporo_20050930.doc'
675 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
676 result = urllib.parse.unquote_to_bytes(given)
677 self.assertEqual(expect, result,
678 "using unquote_to_bytes(): %r != %r"
679 % (expect, result))
680 # Test on a string with unescaped non-ASCII characters
681 # (Technically an invalid URI; expect those characters to be UTF-8
682 # encoded).
683 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
684 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
685 self.assertEqual(expect, result,
686 "using unquote_to_bytes(): %r != %r"
687 % (expect, result))
688 # Test with a bytes as input
689 given = b'%A2%D8ab%FF'
690 expect = b'\xa2\xd8ab\xff'
691 result = urllib.parse.unquote_to_bytes(given)
692 self.assertEqual(expect, result,
693 "using unquote_to_bytes(): %r != %r"
694 % (expect, result))
695 # Test with a bytes as input, with unescaped non-ASCII bytes
696 # (Technically an invalid URI; expect those bytes to be preserved)
697 given = b'%A2\xd8ab%FF'
698 expect = b'\xa2\xd8ab\xff'
699 result = urllib.parse.unquote_to_bytes(given)
700 self.assertEqual(expect, result,
701 "using unquote_to_bytes(): %r != %r"
702 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000703
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000704 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000705 # Characters in the Latin-1 range, encoded with UTF-8
706 given = 'br%C3%BCckner_sapporo_20050930.doc'
707 expect = 'br\u00fcckner_sapporo_20050930.doc'
708 result = urllib.parse.unquote(given)
709 self.assertEqual(expect, result,
710 "using unquote(): %r != %r" % (expect, result))
711 # Characters in the Latin-1 range, encoded with None (default)
712 result = urllib.parse.unquote(given, encoding=None, errors=None)
713 self.assertEqual(expect, result,
714 "using unquote(): %r != %r" % (expect, result))
715
716 # Characters in the Latin-1 range, encoded with Latin-1
717 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
718 encoding="latin-1")
719 expect = 'br\u00fcckner_sapporo_20050930.doc'
720 self.assertEqual(expect, result,
721 "using unquote(): %r != %r" % (expect, result))
722
723 # Characters in BMP, encoded with UTF-8
724 given = "%E6%BC%A2%E5%AD%97"
725 expect = "\u6f22\u5b57" # "Kanji"
726 result = urllib.parse.unquote(given)
727 self.assertEqual(expect, result,
728 "using unquote(): %r != %r" % (expect, result))
729
730 # Decode with UTF-8, invalid sequence
731 given = "%F3%B1"
732 expect = "\ufffd" # Replacement character
733 result = urllib.parse.unquote(given)
734 self.assertEqual(expect, result,
735 "using unquote(): %r != %r" % (expect, result))
736
737 # Decode with UTF-8, invalid sequence, replace errors
738 result = urllib.parse.unquote(given, errors="replace")
739 self.assertEqual(expect, result,
740 "using unquote(): %r != %r" % (expect, result))
741
742 # Decode with UTF-8, invalid sequence, ignoring errors
743 given = "%F3%B1"
744 expect = ""
745 result = urllib.parse.unquote(given, errors="ignore")
746 self.assertEqual(expect, result,
747 "using unquote(): %r != %r" % (expect, result))
748
749 # A mix of non-ASCII and percent-encoded characters, UTF-8
750 result = urllib.parse.unquote("\u6f22%C3%BC")
751 expect = '\u6f22\u00fc'
752 self.assertEqual(expect, result,
753 "using unquote(): %r != %r" % (expect, result))
754
755 # A mix of non-ASCII and percent-encoded characters, Latin-1
756 # (Note, the string contains non-Latin-1-representable characters)
757 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
758 expect = '\u6f22\u00fc'
759 self.assertEqual(expect, result,
760 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000761
Brett Cannon74bfd702003-04-25 09:39:47 +0000762class urlencode_Tests(unittest.TestCase):
763 """Tests for urlencode()"""
764
765 def help_inputtype(self, given, test_type):
766 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000767
Brett Cannon74bfd702003-04-25 09:39:47 +0000768 'given' must lead to only the pairs:
769 * 1st, 1
770 * 2nd, 2
771 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000772
Brett Cannon74bfd702003-04-25 09:39:47 +0000773 Test cannot assume anything about order. Docs make no guarantee and
774 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000775
Brett Cannon74bfd702003-04-25 09:39:47 +0000776 """
777 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000778 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000779 for expected in expect_somewhere:
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000780 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000781 "testing %s: %s not found in %s" %
782 (test_type, expected, result))
783 self.assertEqual(result.count('&'), 2,
784 "testing %s: expected 2 '&'s; got %s" %
785 (test_type, result.count('&')))
786 amp_location = result.index('&')
787 on_amp_left = result[amp_location - 1]
788 on_amp_right = result[amp_location + 1]
Georg Brandlab91fde2009-08-13 08:51:18 +0000789 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000790 "testing %s: '&' not located in proper place in %s" %
791 (test_type, result))
792 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
793 "testing %s: "
794 "unexpected number of characters: %s != %s" %
795 (test_type, len(result), (5 * 3) + 2))
796
797 def test_using_mapping(self):
798 # Test passing in a mapping object as an argument.
799 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
800 "using dict as input type")
801
802 def test_using_sequence(self):
803 # Test passing in a sequence of two-item sequences as an argument.
804 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
805 "using sequence of two-item tuples as input")
806
807 def test_quoting(self):
808 # Make sure keys and values are quoted using quote_plus()
809 given = {"&":"="}
810 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000811 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000812 self.assertEqual(expect, result)
813 given = {"key name":"A bunch of pluses"}
814 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 self.assertEqual(expect, result)
817
818 def test_doseq(self):
819 # Test that passing True for 'doseq' parameter works correctly
820 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000821 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
822 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000823 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000824 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000825 for value in given["sequence"]:
826 expect = "sequence=%s" % value
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000827 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000828 self.assertEqual(result.count('&'), 2,
829 "Expected 2 '&'s, got %s" % result.count('&'))
830
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000831 def test_empty_sequence(self):
832 self.assertEqual("", urllib.parse.urlencode({}))
833 self.assertEqual("", urllib.parse.urlencode([]))
834
835 def test_nonstring_values(self):
836 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
837 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
838
839 def test_nonstring_seq_values(self):
840 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
841 self.assertEqual("a=None&a=a",
842 urllib.parse.urlencode({"a": [None, "a"]}, True))
843 self.assertEqual("a=a&a=b",
844 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
845
Senthil Kumaranfe1ad152010-07-03 17:55:41 +0000846 def test_urlencode_encoding(self):
847 # ASCII encoding. Expect %3F with errors="replace'
848 given = (('\u00a0', '\u00c1'),)
849 expect = '%3F=%3F'
850 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
851 self.assertEqual(expect, result)
852
853 # Default is UTF-8 encoding.
854 given = (('\u00a0', '\u00c1'),)
855 expect = '%C2%A0=%C3%81'
856 result = urllib.parse.urlencode(given)
857 self.assertEqual(expect, result)
858
859 # Latin-1 encoding.
860 given = (('\u00a0', '\u00c1'),)
861 expect = '%A0=%C1'
862 result = urllib.parse.urlencode(given, encoding="latin-1")
863 self.assertEqual(expect, result)
864
865 def test_urlencode_encoding_doseq(self):
866 # ASCII Encoding. Expect %3F with errors="replace'
867 given = (('\u00a0', '\u00c1'),)
868 expect = '%3F=%3F'
869 result = urllib.parse.urlencode(given, doseq=True,
870 encoding="ASCII", errors="replace")
871 self.assertEqual(expect, result)
872
873 # ASCII Encoding. On a sequence of values.
874 given = (("\u00a0", (1, "\u00c1")),)
875 expect = '%3F=1&%3F=%3F'
876 result = urllib.parse.urlencode(given, True,
877 encoding="ASCII", errors="replace")
878 self.assertEqual(expect, result)
879
880 # Utf-8
881 given = (("\u00a0", "\u00c1"),)
882 expect = '%C2%A0=%C3%81'
883 result = urllib.parse.urlencode(given, True)
884 self.assertEqual(expect, result)
885
886 given = (("\u00a0", (42, "\u00c1")),)
887 expect = '%C2%A0=42&%C2%A0=%C3%81'
888 result = urllib.parse.urlencode(given, True)
889 self.assertEqual(expect, result)
890
891 # latin-1
892 given = (("\u00a0", "\u00c1"),)
893 expect = '%A0=%C1'
894 result = urllib.parse.urlencode(given, True, encoding="latin-1")
895 self.assertEqual(expect, result)
896
897 given = (("\u00a0", (42, "\u00c1")),)
898 expect = '%A0=42&%A0=%C1'
899 result = urllib.parse.urlencode(given, True, encoding="latin-1")
900 self.assertEqual(expect, result)
901
902 def test_urlencode_bytes(self):
903 given = ((b'\xa0\x24', b'\xc1\x24'),)
904 expect = '%A0%24=%C1%24'
905 result = urllib.parse.urlencode(given)
906 self.assertEqual(expect, result)
907 result = urllib.parse.urlencode(given, True)
908 self.assertEqual(expect, result)
909
910 # Sequence of values
911 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
912 expect = '%A0%24=42&%A0%24=%C1%24'
913 result = urllib.parse.urlencode(given, True)
914 self.assertEqual(expect, result)
915
916 def test_urlencode_encoding_safe_parameter(self):
917
918 # Send '$' (\x24) as safe character
919 # Default utf-8 encoding
920
921 given = ((b'\xa0\x24', b'\xc1\x24'),)
922 result = urllib.parse.urlencode(given, safe=":$")
923 expect = '%A0$=%C1$'
924 self.assertEqual(expect, result)
925
926 given = ((b'\xa0\x24', b'\xc1\x24'),)
927 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
928 expect = '%A0$=%C1$'
929 self.assertEqual(expect, result)
930
931 # Safe parameter in sequence
932 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
933 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
934 result = urllib.parse.urlencode(given, True, safe=":$")
935 self.assertEqual(expect, result)
936
937 # Test all above in latin-1 encoding
938
939 given = ((b'\xa0\x24', b'\xc1\x24'),)
940 result = urllib.parse.urlencode(given, safe=":$",
941 encoding="latin-1")
942 expect = '%A0$=%C1$'
943 self.assertEqual(expect, result)
944
945 given = ((b'\xa0\x24', b'\xc1\x24'),)
946 expect = '%A0$=%C1$'
947 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
948 encoding="latin-1")
949
950 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
951 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
952 result = urllib.parse.urlencode(given, True, safe=":$",
953 encoding="latin-1")
954 self.assertEqual(expect, result)
955
Brett Cannon74bfd702003-04-25 09:39:47 +0000956class Pathname_Tests(unittest.TestCase):
957 """Test pathname2url() and url2pathname()"""
958
959 def test_basic(self):
960 # Make sure simple tests pass
961 expected_path = os.path.join("parts", "of", "a", "path")
962 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000963 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000964 self.assertEqual(expected_url, result,
965 "pathname2url() failed; %s != %s" %
966 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000967 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000968 self.assertEqual(expected_path, result,
969 "url2pathame() failed; %s != %s" %
970 (result, expected_path))
971
972 def test_quoting(self):
973 # Test automatic quoting and unquoting works for pathnam2url() and
974 # url2pathname() respectively
975 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000976 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
977 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000978 self.assertEqual(expect, result,
979 "pathname2url() failed; %s != %s" %
980 (expect, result))
981 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000982 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000983 self.assertEqual(expect, result,
984 "url2pathname() failed; %s != %s" %
985 (expect, result))
986 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987 expect = "%s/using_quote" % urllib.parse.quote("make sure")
988 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000989 self.assertEqual(expect, result,
990 "pathname2url() failed; %s != %s" %
991 (expect, result))
992 given = "make+sure/using_unquote"
993 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000994 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000995 self.assertEqual(expect, result,
996 "url2pathname() failed; %s != %s" %
997 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000998
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +0800999 @unittest.skipUnless(sys.platform == 'win32',
1000 'test specific to the urllib.url2path function.')
1001 def test_ntpath(self):
1002 given = ('/C:/', '///C:/', '/C|//')
1003 expect = 'C:\\'
1004 for url in given:
1005 result = urllib.request.url2pathname(url)
1006 self.assertEqual(expect, result,
1007 'urllib.request..url2pathname() failed; %s != %s' %
1008 (expect, result))
1009 given = '///C|/path'
1010 expect = 'C:\\path'
1011 result = urllib.request.url2pathname(given)
1012 self.assertEqual(expect, result,
1013 'urllib.request.url2pathname() failed; %s != %s' %
1014 (expect, result))
1015
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001016class Utility_Tests(unittest.TestCase):
1017 """Testcase to test the various utility functions in the urllib."""
1018
1019 def test_splitpasswd(self):
1020 """Some of password examples are not sensible, but it is added to
1021 confirming to RFC2617 and addressing issue4675.
1022 """
1023 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1024 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1025 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1026 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1027 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1028 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1029 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1030
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001031
1032class URLopener_Tests(unittest.TestCase):
1033 """Testcase to test the open method of URLopener class."""
1034
1035 def test_quoted_open(self):
1036 class DummyURLopener(urllib.request.URLopener):
1037 def open_spam(self, url):
1038 return url
1039
1040 self.assertEqual(DummyURLopener().open(
1041 'spam://example/ /'),'//example/%20/')
1042
Senthil Kumaran0e7e9ae2010-02-20 22:30:21 +00001043 # test the safe characters are not quoted by urlopen
1044 self.assertEqual(DummyURLopener().open(
1045 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1046 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1047
Guido van Rossume7ba4952007-06-06 23:52:48 +00001048# Just commented them out.
1049# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001050# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001051# fail in one of the tests, sometimes in other. I have a linux, and
1052# the tests go ok.
1053# If anybody has one of the problematic enviroments, please help!
1054# . Facundo
1055#
1056# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001057# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001058# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1059# serv.settimeout(3)
1060# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1061# serv.bind(("", 9093))
1062# serv.listen(5)
1063# try:
1064# conn, addr = serv.accept()
1065# conn.send("1 Hola mundo\n")
1066# cantdata = 0
1067# while cantdata < 13:
1068# data = conn.recv(13-cantdata)
1069# cantdata += len(data)
1070# time.sleep(.3)
1071# conn.send("2 No more lines\n")
1072# conn.close()
1073# except socket.timeout:
1074# pass
1075# finally:
1076# serv.close()
1077# evt.set()
1078#
1079# class FTPWrapperTests(unittest.TestCase):
1080#
1081# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001082# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001083# ftplib.FTP.port = 9093
1084# self.evt = threading.Event()
1085# threading.Thread(target=server, args=(self.evt,)).start()
1086# time.sleep(.1)
1087#
1088# def tearDown(self):
1089# self.evt.wait()
1090#
1091# def testBasic(self):
1092# # connects
1093# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001094# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001095#
1096# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001097# # global default timeout is ignored
1098# import socket
Georg Brandlab91fde2009-08-13 08:51:18 +00001099# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001100# socket.setdefaulttimeout(30)
1101# try:
1102# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1103# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001104# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001105# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001106# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001107#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001108# def testTimeoutDefault(self):
1109# # global default timeout is used
1110# import socket
Georg Brandlab91fde2009-08-13 08:51:18 +00001111# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001112# socket.setdefaulttimeout(30)
1113# try:
1114# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1115# finally:
1116# socket.setdefaulttimeout(None)
1117# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1118# ftp.close()
1119#
1120# def testTimeoutValue(self):
1121# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1122# timeout=30)
1123# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1124# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001125
Skip Montanaro080c9972001-01-28 21:12:22 +00001126
1127
Brett Cannon74bfd702003-04-25 09:39:47 +00001128def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001129 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001130 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001131 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001132 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001133 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001134 QuotingTests,
1135 UnquotingTests,
1136 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001137 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001138 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001139 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001140 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001141 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001142
1143
1144
1145if __name__ == '__main__':
1146 test_main()