blob: 2b8852127bc7571edca5b18cb6cd5995622ba95f [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Georg Brandl5a650a22005-08-26 08:51:34 +000012import tempfile
Florent Xicluna99e472e2010-08-14 23:12:27 +000013import warnings
Jeremy Hylton6102e292000-08-31 15:48:10 +000014
Brett Cannon74bfd702003-04-25 09:39:47 +000015def hexescape(char):
16 """Escape char as RFC 2396 specifies"""
17 hex_repr = hex(ord(char))[2:].upper()
18 if len(hex_repr) == 1:
19 hex_repr = "0%s" % hex_repr
20 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000021
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022# Shortcut for testing FancyURLopener
23_urlopener = None
24def urlopen(url, data=None, proxies=None):
25 """urlopen(url [, data]) -> open file-like object"""
26 global _urlopener
27 if proxies is not None:
28 opener = urllib.request.FancyURLopener(proxies=proxies)
29 elif not _urlopener:
30 opener = urllib.request.FancyURLopener()
31 _urlopener = opener
32 else:
33 opener = _urlopener
34 if data is None:
35 return opener.open(url)
36 else:
37 return opener.open(url, data)
38
Brett Cannon74bfd702003-04-25 09:39:47 +000039class urlopen_FileTests(unittest.TestCase):
40 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000041
Brett Cannon74bfd702003-04-25 09:39:47 +000042 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000043 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000046
Brett Cannon74bfd702003-04-25 09:39:47 +000047 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000048 # Create a temp file to use for testing
49 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
50 "ascii")
51 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000052 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000054 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000056 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000058
Brett Cannon74bfd702003-04-25 09:39:47 +000059 def tearDown(self):
60 """Shut down the open object"""
61 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000062 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000063
Brett Cannon74bfd702003-04-25 09:39:47 +000064 def test_interface(self):
65 # Make sure object returned by urlopen() has the specified methods
66 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000067 "close", "info", "geturl", "getcode", "__iter__"):
Georg Brandlab91fde2009-08-13 08:51:18 +000068 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000069 "object returned by urlopen() lacks %s attribute" %
70 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000071
Brett Cannon74bfd702003-04-25 09:39:47 +000072 def test_read(self):
73 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000074
Brett Cannon74bfd702003-04-25 09:39:47 +000075 def test_readline(self):
76 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000077 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000078 "calling readline() after exhausting the file did not"
79 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000080
Brett Cannon74bfd702003-04-25 09:39:47 +000081 def test_readlines(self):
82 lines_list = self.returned_obj.readlines()
83 self.assertEqual(len(lines_list), 1,
84 "readlines() returned the wrong number of lines")
85 self.assertEqual(lines_list[0], self.text,
86 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000087
Brett Cannon74bfd702003-04-25 09:39:47 +000088 def test_fileno(self):
89 file_num = self.returned_obj.fileno()
Georg Brandlab91fde2009-08-13 08:51:18 +000090 self.assertTrue(isinstance(file_num, int),
Brett Cannon74bfd702003-04-25 09:39:47 +000091 "fileno() did not return an int")
92 self.assertEqual(os.read(file_num, len(self.text)), self.text,
93 "Reading on the file descriptor returned by fileno() "
94 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000095
Brett Cannon74bfd702003-04-25 09:39:47 +000096 def test_close(self):
97 # Test close() by calling it hear and then having it be called again
98 # by the tearDown() method for the test
99 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 def test_info(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000102 self.assertTrue(isinstance(self.returned_obj.info(), email.message.Message))
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000103
Brett Cannon74bfd702003-04-25 09:39:47 +0000104 def test_geturl(self):
105 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000106
Christian Heimes9bd667a2008-01-20 15:14:11 +0000107 def test_getcode(self):
Florent Xiclunab4efb3d2010-08-14 18:24:40 +0000108 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000109
Brett Cannon74bfd702003-04-25 09:39:47 +0000110 def test_iter(self):
111 # Test iterator
112 # Don't need to count number of iterations since test would fail the
113 # instant it returned anything beyond the first line from the
114 # comparison
115 for line in self.returned_obj.__iter__():
116 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000117
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000118class ProxyTests(unittest.TestCase):
119
120 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000121 # Records changes to env vars
122 self.env = support.EnvironmentVarGuard()
Benjamin Petersonffeda292010-01-09 18:48:46 +0000123 # Delete all proxy related env vars
Antoine Pitrouf8827ae2010-10-14 18:40:02 +0000124 for k in list(os.environ):
125 if 'proxy' in k.lower():
Benjamin Petersonffeda292010-01-09 18:48:46 +0000126 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000127
128 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000130 self.env.__exit__()
131 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000132
133 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000134 self.env.set('NO_PROXY', 'localhost')
135 proxies = urllib.request.getproxies_environment()
136 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xiclunab4efb3d2010-08-14 18:24:40 +0000137 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000138
139
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000140class urlopen_HttpTests(unittest.TestCase):
141 """Test urlopen() opening a fake http connection."""
142
143 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000144 class FakeSocket(io.BytesIO):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000145 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000146 def makefile(self, *args, **kwds):
147 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000148 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000149 if self.closed: return b""
150 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000151 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000152 if self.closed: return b""
153 return io.BytesIO.readline(self, length)
Georg Brandl24420152008-05-26 16:32:26 +0000154 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000155 def connect(self):
156 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000157 self._connection_class = http.client.HTTPConnection
158 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000159
160 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000161 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000162
163 def test_read(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000164 self.fakehttp(b"Hello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000165 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000166 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000167 self.assertEqual(fp.readline(), b"Hello!")
168 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000169 self.assertEqual(fp.geturl(), 'http://python.org/')
170 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000171 finally:
172 self.unfakehttp()
173
Christian Heimes57dddfb2008-01-02 18:30:52 +0000174 def test_read_bogus(self):
175 # urlopen() should raise IOError for many error codes.
176 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
177Date: Wed, 02 Jan 2008 03:03:54 GMT
178Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
179Connection: close
180Content-Type: text/html; charset=iso-8859-1
181''')
182 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000183 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000184 finally:
185 self.unfakehttp()
186
guido@google.coma119df92011-03-29 11:41:02 -0700187 def test_invalid_redirect(self):
188 # urlopen() should raise IOError for many error codes.
189 self.fakehttp(b'''HTTP/1.1 302 Found
190Date: Wed, 02 Jan 2008 03:03:54 GMT
191Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
192Location: file://guidocomputer.athome.com:/python/license
193Connection: close
194Content-Type: text/html; charset=iso-8859-1
195''')
196 try:
197 self.assertRaises(urllib.error.HTTPError, urlopen,
198 "http://python.org/")
199 finally:
200 self.unfakehttp()
201
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000203 # urlopen() raises IOError if the underlying socket does not send any
204 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000205 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000207 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208 finally:
209 self.unfakehttp()
210
Senthil Kumaranafef78f2010-08-01 17:55:50 +0000211 def test_userpass_inurl(self):
212 self.fakehttp(b"Hello!")
213 try:
214 fp = urlopen("http://user:pass@python.org/")
215 self.assertEqual(fp.readline(), b"Hello!")
216 self.assertEqual(fp.readline(), b"")
217 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
218 self.assertEqual(fp.getcode(), 200)
219 finally:
220 self.unfakehttp()
221
Brett Cannon19691362003-04-29 05:08:06 +0000222class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000223 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000224
Brett Cannon19691362003-04-29 05:08:06 +0000225 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000226 # Create a list of temporary files. Each item in the list is a file
227 # name (absolute path or relative to the current working directory).
228 # All files in this list will be deleted in the tearDown method. Note,
229 # this only helps to makes sure temporary files get deleted, but it
230 # does nothing about trying to close files that may still be open. It
231 # is the responsibility of the developer to properly close files even
232 # when exceptional conditions occur.
233 self.tempFiles = []
234
Brett Cannon19691362003-04-29 05:08:06 +0000235 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000236 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000237 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000238 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000239 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000240 FILE.write(self.text)
241 FILE.close()
242 finally:
243 try: FILE.close()
244 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000245
246 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000247 # Delete the temporary files.
248 for each in self.tempFiles:
249 try: os.remove(each)
250 except: pass
251
252 def constructLocalFileUrl(self, filePath):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000253 return "file://%s" % urllib.request.pathname2url(
254 os.path.abspath(filePath))
Georg Brandl5a650a22005-08-26 08:51:34 +0000255
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000256 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000257 """Creates a new temporary file containing the specified data,
258 registers the file for deletion during the test fixture tear down, and
259 returns the absolute path of the file."""
260
261 newFd, newFilePath = tempfile.mkstemp()
262 try:
263 self.registerFileForCleanUp(newFilePath)
264 newFile = os.fdopen(newFd, "wb")
265 newFile.write(data)
266 newFile.close()
267 finally:
268 try: newFile.close()
269 except: pass
270 return newFilePath
271
272 def registerFileForCleanUp(self, fileName):
273 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000274
275 def test_basic(self):
276 # Make sure that a local file just gets its own location returned and
277 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000278 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000279 self.assertEqual(result[0], support.TESTFN)
Georg Brandlab91fde2009-08-13 08:51:18 +0000280 self.assertTrue(isinstance(result[1], email.message.Message),
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000281 "did not get a email.message.Message instance "
282 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000283
284 def test_copy(self):
285 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000286 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000287 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000288 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000289 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000290 self.assertEqual(second_temp, result[0])
Georg Brandlab91fde2009-08-13 08:51:18 +0000291 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000292 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000293 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000294 try:
295 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000296 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000297 finally:
298 try: FILE.close()
299 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000300 self.assertEqual(self.text, text)
301
302 def test_reporthook(self):
303 # Make sure that the reporthook works.
304 def hooktester(count, block_size, total_size, count_holder=[0]):
Georg Brandlab91fde2009-08-13 08:51:18 +0000305 self.assertTrue(isinstance(count, int))
306 self.assertTrue(isinstance(block_size, int))
307 self.assertTrue(isinstance(total_size, int))
Brett Cannon19691362003-04-29 05:08:06 +0000308 self.assertEqual(count, count_holder[0])
309 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000310 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000311 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000312 urllib.request.urlretrieve(
313 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000314 second_temp, hooktester)
315
316 def test_reporthook_0_bytes(self):
317 # Test on zero length file. Should call reporthook only 1 time.
318 report = []
319 def hooktester(count, block_size, total_size, _report=report):
320 _report.append((count, block_size, total_size))
321 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000322 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000323 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000324 self.assertEqual(len(report), 1)
325 self.assertEqual(report[0][2], 0)
326
327 def test_reporthook_5_bytes(self):
328 # Test on 5 byte file. Should call reporthook only 2 times (once when
329 # the "network connection" is established and once when the block is
330 # read). Since the block size is 8192 bytes, only one block read is
331 # required to read the entire file.
332 report = []
333 def hooktester(count, block_size, total_size, _report=report):
334 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000335 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000336 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000337 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000338 self.assertEqual(len(report), 2)
339 self.assertEqual(report[0][1], 8192)
340 self.assertEqual(report[0][2], 5)
341
342 def test_reporthook_8193_bytes(self):
343 # Test on 8193 byte file. Should call reporthook only 3 times (once
344 # when the "network connection" is established, once for the next 8192
345 # bytes, and once for the last byte).
346 report = []
347 def hooktester(count, block_size, total_size, _report=report):
348 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000349 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000350 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000351 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000352 self.assertEqual(len(report), 3)
353 self.assertEqual(report[0][1], 8192)
354 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000355
Brett Cannon74bfd702003-04-25 09:39:47 +0000356class QuotingTests(unittest.TestCase):
357 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000358
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000359 According to RFC 2396 (Uniform Resource Identifiers), to escape a
360 character you write it as '%' + <2 character US-ASCII hex value>.
361 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
362 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000363
364 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000365
Brett Cannon74bfd702003-04-25 09:39:47 +0000366 Reserved characters : ";/?:@&=+$,"
367 Have special meaning in URIs and must be escaped if not being used for
368 their special meaning
369 Data characters : letters, digits, and "-_.!~*'()"
370 Unreserved and do not need to be escaped; can be, though, if desired
371 Control characters : 0x00 - 0x1F, 0x7F
372 Have no use in URIs so must be escaped
373 space : 0x20
374 Must be escaped
375 Delimiters : '<>#%"'
376 Must be escaped
377 Unwise : "{}|\^[]`"
378 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000379
Brett Cannon74bfd702003-04-25 09:39:47 +0000380 """
381
382 def test_never_quote(self):
383 # Make sure quote() does not quote letters, digits, and "_,.-"
384 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
385 "abcdefghijklmnopqrstuvwxyz",
386 "0123456789",
387 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000388 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000389 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000390 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000391 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000392 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000393 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000394
395 def test_default_safe(self):
396 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000397 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000398
399 def test_safe(self):
400 # Test setting 'safe' parameter does what it should do
401 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000402 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000403 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000404 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000405 result = urllib.parse.quote_plus(quote_by_default,
406 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000407 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000408 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000409 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000410 # Safe expressed as bytes rather than str
411 result = urllib.parse.quote(quote_by_default, safe=b"<>")
412 self.assertEqual(quote_by_default, result,
413 "using quote(): %r != %r" % (quote_by_default, result))
414 # "Safe" non-ASCII characters should have no effect
415 # (Since URIs are not allowed to have non-ASCII characters)
416 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
417 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
418 self.assertEqual(expect, result,
419 "using quote(): %r != %r" %
420 (expect, result))
421 # Same as above, but using a bytes rather than str
422 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
423 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
424 self.assertEqual(expect, result,
425 "using quote(): %r != %r" %
426 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000427
428 def test_default_quoting(self):
429 # Make sure all characters that should be quoted are by default sans
430 # space (separate test for that).
431 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
432 should_quote.append('<>#%"{}|\^[]`')
433 should_quote.append(chr(127)) # For 0x7F
434 should_quote = ''.join(should_quote)
435 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000436 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000437 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000438 "using quote(): "
439 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000440 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000441 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000442 self.assertEqual(hexescape(char), result,
443 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000444 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000445 (char, hexescape(char), result))
446 del should_quote
447 partial_quote = "ab[]cd"
448 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000449 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000450 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000451 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000452 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000453 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000454
455 def test_quoting_space(self):
456 # Make sure quote() and quote_plus() handle spaces as specified in
457 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000458 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000459 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000460 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000461 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000462 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000463 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000464 given = "a b cd e f"
465 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000466 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000467 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000468 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000469 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000470 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000471 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000472 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000473
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000474 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000475 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000476 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000477 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000478 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000479 # Test with bytes
480 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
481 'alpha%2Bbeta+gamma')
482 # Test with safe bytes
483 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
484 'alpha+beta+gamma')
485
486 def test_quote_bytes(self):
487 # Bytes should quote directly to percent-encoded values
488 given = b"\xa2\xd8ab\xff"
489 expect = "%A2%D8ab%FF"
490 result = urllib.parse.quote(given)
491 self.assertEqual(expect, result,
492 "using quote(): %r != %r" % (expect, result))
493 # Encoding argument should raise type error on bytes input
494 self.assertRaises(TypeError, urllib.parse.quote, given,
495 encoding="latin-1")
496 # quote_from_bytes should work the same
497 result = urllib.parse.quote_from_bytes(given)
498 self.assertEqual(expect, result,
499 "using quote_from_bytes(): %r != %r"
500 % (expect, result))
501
502 def test_quote_with_unicode(self):
503 # Characters in Latin-1 range, encoded by default in UTF-8
504 given = "\xa2\xd8ab\xff"
505 expect = "%C2%A2%C3%98ab%C3%BF"
506 result = urllib.parse.quote(given)
507 self.assertEqual(expect, result,
508 "using quote(): %r != %r" % (expect, result))
509 # Characters in Latin-1 range, encoded by with None (default)
510 result = urllib.parse.quote(given, encoding=None, errors=None)
511 self.assertEqual(expect, result,
512 "using quote(): %r != %r" % (expect, result))
513 # Characters in Latin-1 range, encoded with Latin-1
514 given = "\xa2\xd8ab\xff"
515 expect = "%A2%D8ab%FF"
516 result = urllib.parse.quote(given, encoding="latin-1")
517 self.assertEqual(expect, result,
518 "using quote(): %r != %r" % (expect, result))
519 # Characters in BMP, encoded by default in UTF-8
520 given = "\u6f22\u5b57" # "Kanji"
521 expect = "%E6%BC%A2%E5%AD%97"
522 result = urllib.parse.quote(given)
523 self.assertEqual(expect, result,
524 "using quote(): %r != %r" % (expect, result))
525 # Characters in BMP, encoded with Latin-1
526 given = "\u6f22\u5b57"
527 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
528 encoding="latin-1")
529 # Characters in BMP, encoded with Latin-1, with replace error handling
530 given = "\u6f22\u5b57"
531 expect = "%3F%3F" # "??"
532 result = urllib.parse.quote(given, encoding="latin-1",
533 errors="replace")
534 self.assertEqual(expect, result,
535 "using quote(): %r != %r" % (expect, result))
536 # Characters in BMP, Latin-1, with xmlcharref error handling
537 given = "\u6f22\u5b57"
538 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
539 result = urllib.parse.quote(given, encoding="latin-1",
540 errors="xmlcharrefreplace")
541 self.assertEqual(expect, result,
542 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000543
Georg Brandlfaf41492009-05-26 18:31:11 +0000544 def test_quote_plus_with_unicode(self):
545 # Encoding (latin-1) test for quote_plus
546 given = "\xa2\xd8 \xff"
547 expect = "%A2%D8+%FF"
548 result = urllib.parse.quote_plus(given, encoding="latin-1")
549 self.assertEqual(expect, result,
550 "using quote_plus(): %r != %r" % (expect, result))
551 # Errors test for quote_plus
552 given = "ab\u6f22\u5b57 cd"
553 expect = "ab%3F%3F+cd"
554 result = urllib.parse.quote_plus(given, encoding="latin-1",
555 errors="replace")
556 self.assertEqual(expect, result,
557 "using quote_plus(): %r != %r" % (expect, result))
558
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000559
Brett Cannon74bfd702003-04-25 09:39:47 +0000560class UnquotingTests(unittest.TestCase):
561 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000562
Brett Cannon74bfd702003-04-25 09:39:47 +0000563 See the doc string for quoting_Tests for details on quoting and such.
564
565 """
566
567 def test_unquoting(self):
568 # Make sure unquoting of all ASCII values works
569 escape_list = []
570 for num in range(128):
571 given = hexescape(chr(num))
572 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000573 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000574 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000575 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000576 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000577 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000578 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000579 (expect, result))
580 escape_list.append(given)
581 escape_string = ''.join(escape_list)
582 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000583 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000584 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000585 "using unquote(): not all characters escaped: "
586 "%s" % result)
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000587 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
588 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna99e472e2010-08-14 23:12:27 +0000589 with warnings.catch_warnings():
590 warnings.simplefilter('ignore', BytesWarning)
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000591 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000592
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000593 def test_unquoting_badpercent(self):
594 # Test unquoting on bad percent-escapes
595 given = '%xab'
596 expect = given
597 result = urllib.parse.unquote(given)
598 self.assertEqual(expect, result, "using unquote(): %r != %r"
599 % (expect, result))
600 given = '%x'
601 expect = given
602 result = urllib.parse.unquote(given)
603 self.assertEqual(expect, result, "using unquote(): %r != %r"
604 % (expect, result))
605 given = '%'
606 expect = given
607 result = urllib.parse.unquote(given)
608 self.assertEqual(expect, result, "using unquote(): %r != %r"
609 % (expect, result))
610 # unquote_to_bytes
611 given = '%xab'
612 expect = bytes(given, 'ascii')
613 result = urllib.parse.unquote_to_bytes(given)
614 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
615 % (expect, result))
616 given = '%x'
617 expect = bytes(given, 'ascii')
618 result = urllib.parse.unquote_to_bytes(given)
619 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
620 % (expect, result))
621 given = '%'
622 expect = bytes(given, 'ascii')
623 result = urllib.parse.unquote_to_bytes(given)
624 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
625 % (expect, result))
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000626 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
627 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000628
629 def test_unquoting_mixed_case(self):
630 # Test unquoting on mixed-case hex digits in the percent-escapes
631 given = '%Ab%eA'
632 expect = b'\xab\xea'
633 result = urllib.parse.unquote_to_bytes(given)
634 self.assertEqual(expect, result,
635 "using unquote_to_bytes(): %r != %r"
636 % (expect, result))
637
Brett Cannon74bfd702003-04-25 09:39:47 +0000638 def test_unquoting_parts(self):
639 # Make sure unquoting works when have non-quoted characters
640 # interspersed
641 given = 'ab%sd' % hexescape('c')
642 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000643 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000644 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000645 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000646 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000647 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000648 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000649
Brett Cannon74bfd702003-04-25 09:39:47 +0000650 def test_unquoting_plus(self):
651 # Test difference between unquote() and unquote_plus()
652 given = "are+there+spaces..."
653 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000655 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000656 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000657 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000658 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000659 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000660 "using unquote_plus(): %r != %r" % (expect, result))
661
662 def test_unquote_to_bytes(self):
663 given = 'br%C3%BCckner_sapporo_20050930.doc'
664 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
665 result = urllib.parse.unquote_to_bytes(given)
666 self.assertEqual(expect, result,
667 "using unquote_to_bytes(): %r != %r"
668 % (expect, result))
669 # Test on a string with unescaped non-ASCII characters
670 # (Technically an invalid URI; expect those characters to be UTF-8
671 # encoded).
672 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
673 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
674 self.assertEqual(expect, result,
675 "using unquote_to_bytes(): %r != %r"
676 % (expect, result))
677 # Test with a bytes as input
678 given = b'%A2%D8ab%FF'
679 expect = b'\xa2\xd8ab\xff'
680 result = urllib.parse.unquote_to_bytes(given)
681 self.assertEqual(expect, result,
682 "using unquote_to_bytes(): %r != %r"
683 % (expect, result))
684 # Test with a bytes as input, with unescaped non-ASCII bytes
685 # (Technically an invalid URI; expect those bytes to be preserved)
686 given = b'%A2\xd8ab%FF'
687 expect = b'\xa2\xd8ab\xff'
688 result = urllib.parse.unquote_to_bytes(given)
689 self.assertEqual(expect, result,
690 "using unquote_to_bytes(): %r != %r"
691 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000692
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000693 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000694 # Characters in the Latin-1 range, encoded with UTF-8
695 given = 'br%C3%BCckner_sapporo_20050930.doc'
696 expect = 'br\u00fcckner_sapporo_20050930.doc'
697 result = urllib.parse.unquote(given)
698 self.assertEqual(expect, result,
699 "using unquote(): %r != %r" % (expect, result))
700 # Characters in the Latin-1 range, encoded with None (default)
701 result = urllib.parse.unquote(given, encoding=None, errors=None)
702 self.assertEqual(expect, result,
703 "using unquote(): %r != %r" % (expect, result))
704
705 # Characters in the Latin-1 range, encoded with Latin-1
706 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
707 encoding="latin-1")
708 expect = 'br\u00fcckner_sapporo_20050930.doc'
709 self.assertEqual(expect, result,
710 "using unquote(): %r != %r" % (expect, result))
711
712 # Characters in BMP, encoded with UTF-8
713 given = "%E6%BC%A2%E5%AD%97"
714 expect = "\u6f22\u5b57" # "Kanji"
715 result = urllib.parse.unquote(given)
716 self.assertEqual(expect, result,
717 "using unquote(): %r != %r" % (expect, result))
718
719 # Decode with UTF-8, invalid sequence
720 given = "%F3%B1"
721 expect = "\ufffd" # Replacement character
722 result = urllib.parse.unquote(given)
723 self.assertEqual(expect, result,
724 "using unquote(): %r != %r" % (expect, result))
725
726 # Decode with UTF-8, invalid sequence, replace errors
727 result = urllib.parse.unquote(given, errors="replace")
728 self.assertEqual(expect, result,
729 "using unquote(): %r != %r" % (expect, result))
730
731 # Decode with UTF-8, invalid sequence, ignoring errors
732 given = "%F3%B1"
733 expect = ""
734 result = urllib.parse.unquote(given, errors="ignore")
735 self.assertEqual(expect, result,
736 "using unquote(): %r != %r" % (expect, result))
737
738 # A mix of non-ASCII and percent-encoded characters, UTF-8
739 result = urllib.parse.unquote("\u6f22%C3%BC")
740 expect = '\u6f22\u00fc'
741 self.assertEqual(expect, result,
742 "using unquote(): %r != %r" % (expect, result))
743
744 # A mix of non-ASCII and percent-encoded characters, Latin-1
745 # (Note, the string contains non-Latin-1-representable characters)
746 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
747 expect = '\u6f22\u00fc'
748 self.assertEqual(expect, result,
749 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000750
Brett Cannon74bfd702003-04-25 09:39:47 +0000751class urlencode_Tests(unittest.TestCase):
752 """Tests for urlencode()"""
753
754 def help_inputtype(self, given, test_type):
755 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000756
Brett Cannon74bfd702003-04-25 09:39:47 +0000757 'given' must lead to only the pairs:
758 * 1st, 1
759 * 2nd, 2
760 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000761
Brett Cannon74bfd702003-04-25 09:39:47 +0000762 Test cannot assume anything about order. Docs make no guarantee and
763 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000764
Brett Cannon74bfd702003-04-25 09:39:47 +0000765 """
766 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000767 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000768 for expected in expect_somewhere:
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000769 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000770 "testing %s: %s not found in %s" %
771 (test_type, expected, result))
772 self.assertEqual(result.count('&'), 2,
773 "testing %s: expected 2 '&'s; got %s" %
774 (test_type, result.count('&')))
775 amp_location = result.index('&')
776 on_amp_left = result[amp_location - 1]
777 on_amp_right = result[amp_location + 1]
Georg Brandlab91fde2009-08-13 08:51:18 +0000778 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000779 "testing %s: '&' not located in proper place in %s" %
780 (test_type, result))
781 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
782 "testing %s: "
783 "unexpected number of characters: %s != %s" %
784 (test_type, len(result), (5 * 3) + 2))
785
786 def test_using_mapping(self):
787 # Test passing in a mapping object as an argument.
788 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
789 "using dict as input type")
790
791 def test_using_sequence(self):
792 # Test passing in a sequence of two-item sequences as an argument.
793 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
794 "using sequence of two-item tuples as input")
795
796 def test_quoting(self):
797 # Make sure keys and values are quoted using quote_plus()
798 given = {"&":"="}
799 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000800 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000801 self.assertEqual(expect, result)
802 given = {"key name":"A bunch of pluses"}
803 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000805 self.assertEqual(expect, result)
806
807 def test_doseq(self):
808 # Test that passing True for 'doseq' parameter works correctly
809 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000810 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
811 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000812 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000813 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000814 for value in given["sequence"]:
815 expect = "sequence=%s" % value
Florent Xicluna37ddbb82010-08-14 21:06:29 +0000816 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000817 self.assertEqual(result.count('&'), 2,
818 "Expected 2 '&'s, got %s" % result.count('&'))
819
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000820 def test_empty_sequence(self):
821 self.assertEqual("", urllib.parse.urlencode({}))
822 self.assertEqual("", urllib.parse.urlencode([]))
823
824 def test_nonstring_values(self):
825 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
826 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
827
828 def test_nonstring_seq_values(self):
829 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
830 self.assertEqual("a=None&a=a",
831 urllib.parse.urlencode({"a": [None, "a"]}, True))
832 self.assertEqual("a=a&a=b",
833 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
834
Senthil Kumaranfe1ad152010-07-03 17:55:41 +0000835 def test_urlencode_encoding(self):
836 # ASCII encoding. Expect %3F with errors="replace'
837 given = (('\u00a0', '\u00c1'),)
838 expect = '%3F=%3F'
839 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
840 self.assertEqual(expect, result)
841
842 # Default is UTF-8 encoding.
843 given = (('\u00a0', '\u00c1'),)
844 expect = '%C2%A0=%C3%81'
845 result = urllib.parse.urlencode(given)
846 self.assertEqual(expect, result)
847
848 # Latin-1 encoding.
849 given = (('\u00a0', '\u00c1'),)
850 expect = '%A0=%C1'
851 result = urllib.parse.urlencode(given, encoding="latin-1")
852 self.assertEqual(expect, result)
853
854 def test_urlencode_encoding_doseq(self):
855 # ASCII Encoding. Expect %3F with errors="replace'
856 given = (('\u00a0', '\u00c1'),)
857 expect = '%3F=%3F'
858 result = urllib.parse.urlencode(given, doseq=True,
859 encoding="ASCII", errors="replace")
860 self.assertEqual(expect, result)
861
862 # ASCII Encoding. On a sequence of values.
863 given = (("\u00a0", (1, "\u00c1")),)
864 expect = '%3F=1&%3F=%3F'
865 result = urllib.parse.urlencode(given, True,
866 encoding="ASCII", errors="replace")
867 self.assertEqual(expect, result)
868
869 # Utf-8
870 given = (("\u00a0", "\u00c1"),)
871 expect = '%C2%A0=%C3%81'
872 result = urllib.parse.urlencode(given, True)
873 self.assertEqual(expect, result)
874
875 given = (("\u00a0", (42, "\u00c1")),)
876 expect = '%C2%A0=42&%C2%A0=%C3%81'
877 result = urllib.parse.urlencode(given, True)
878 self.assertEqual(expect, result)
879
880 # latin-1
881 given = (("\u00a0", "\u00c1"),)
882 expect = '%A0=%C1'
883 result = urllib.parse.urlencode(given, True, encoding="latin-1")
884 self.assertEqual(expect, result)
885
886 given = (("\u00a0", (42, "\u00c1")),)
887 expect = '%A0=42&%A0=%C1'
888 result = urllib.parse.urlencode(given, True, encoding="latin-1")
889 self.assertEqual(expect, result)
890
891 def test_urlencode_bytes(self):
892 given = ((b'\xa0\x24', b'\xc1\x24'),)
893 expect = '%A0%24=%C1%24'
894 result = urllib.parse.urlencode(given)
895 self.assertEqual(expect, result)
896 result = urllib.parse.urlencode(given, True)
897 self.assertEqual(expect, result)
898
899 # Sequence of values
900 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
901 expect = '%A0%24=42&%A0%24=%C1%24'
902 result = urllib.parse.urlencode(given, True)
903 self.assertEqual(expect, result)
904
905 def test_urlencode_encoding_safe_parameter(self):
906
907 # Send '$' (\x24) as safe character
908 # Default utf-8 encoding
909
910 given = ((b'\xa0\x24', b'\xc1\x24'),)
911 result = urllib.parse.urlencode(given, safe=":$")
912 expect = '%A0$=%C1$'
913 self.assertEqual(expect, result)
914
915 given = ((b'\xa0\x24', b'\xc1\x24'),)
916 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
917 expect = '%A0$=%C1$'
918 self.assertEqual(expect, result)
919
920 # Safe parameter in sequence
921 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
922 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
923 result = urllib.parse.urlencode(given, True, safe=":$")
924 self.assertEqual(expect, result)
925
926 # Test all above in latin-1 encoding
927
928 given = ((b'\xa0\x24', b'\xc1\x24'),)
929 result = urllib.parse.urlencode(given, safe=":$",
930 encoding="latin-1")
931 expect = '%A0$=%C1$'
932 self.assertEqual(expect, result)
933
934 given = ((b'\xa0\x24', b'\xc1\x24'),)
935 expect = '%A0$=%C1$'
936 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
937 encoding="latin-1")
938
939 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
940 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
941 result = urllib.parse.urlencode(given, True, safe=":$",
942 encoding="latin-1")
943 self.assertEqual(expect, result)
944
Brett Cannon74bfd702003-04-25 09:39:47 +0000945class Pathname_Tests(unittest.TestCase):
946 """Test pathname2url() and url2pathname()"""
947
948 def test_basic(self):
949 # Make sure simple tests pass
950 expected_path = os.path.join("parts", "of", "a", "path")
951 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000952 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000953 self.assertEqual(expected_url, result,
954 "pathname2url() failed; %s != %s" %
955 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000956 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000957 self.assertEqual(expected_path, result,
958 "url2pathame() failed; %s != %s" %
959 (result, expected_path))
960
961 def test_quoting(self):
962 # Test automatic quoting and unquoting works for pathnam2url() and
963 # url2pathname() respectively
964 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
966 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000967 self.assertEqual(expect, result,
968 "pathname2url() failed; %s != %s" %
969 (expect, result))
970 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000971 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000972 self.assertEqual(expect, result,
973 "url2pathname() failed; %s != %s" %
974 (expect, result))
975 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000976 expect = "%s/using_quote" % urllib.parse.quote("make sure")
977 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000978 self.assertEqual(expect, result,
979 "pathname2url() failed; %s != %s" %
980 (expect, result))
981 given = "make+sure/using_unquote"
982 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000983 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000984 self.assertEqual(expect, result,
985 "url2pathname() failed; %s != %s" %
986 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000987
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000988class Utility_Tests(unittest.TestCase):
989 """Testcase to test the various utility functions in the urllib."""
990
991 def test_splitpasswd(self):
992 """Some of password examples are not sensible, but it is added to
993 confirming to RFC2617 and addressing issue4675.
994 """
995 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
996 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
997 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
998 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
999 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1000 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1001 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1002
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001003
1004class URLopener_Tests(unittest.TestCase):
1005 """Testcase to test the open method of URLopener class."""
1006
1007 def test_quoted_open(self):
1008 class DummyURLopener(urllib.request.URLopener):
1009 def open_spam(self, url):
1010 return url
1011
1012 self.assertEqual(DummyURLopener().open(
1013 'spam://example/ /'),'//example/%20/')
1014
Senthil Kumaran0e7e9ae2010-02-20 22:30:21 +00001015 # test the safe characters are not quoted by urlopen
1016 self.assertEqual(DummyURLopener().open(
1017 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1018 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1019
Guido van Rossume7ba4952007-06-06 23:52:48 +00001020# Just commented them out.
1021# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001022# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001023# fail in one of the tests, sometimes in other. I have a linux, and
1024# the tests go ok.
1025# If anybody has one of the problematic enviroments, please help!
1026# . Facundo
1027#
1028# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001029# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001030# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1031# serv.settimeout(3)
1032# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1033# serv.bind(("", 9093))
1034# serv.listen(5)
1035# try:
1036# conn, addr = serv.accept()
1037# conn.send("1 Hola mundo\n")
1038# cantdata = 0
1039# while cantdata < 13:
1040# data = conn.recv(13-cantdata)
1041# cantdata += len(data)
1042# time.sleep(.3)
1043# conn.send("2 No more lines\n")
1044# conn.close()
1045# except socket.timeout:
1046# pass
1047# finally:
1048# serv.close()
1049# evt.set()
1050#
1051# class FTPWrapperTests(unittest.TestCase):
1052#
1053# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001054# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001055# ftplib.FTP.port = 9093
1056# self.evt = threading.Event()
1057# threading.Thread(target=server, args=(self.evt,)).start()
1058# time.sleep(.1)
1059#
1060# def tearDown(self):
1061# self.evt.wait()
1062#
1063# def testBasic(self):
1064# # connects
1065# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001066# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001067#
1068# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001069# # global default timeout is ignored
1070# import socket
Georg Brandlab91fde2009-08-13 08:51:18 +00001071# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001072# socket.setdefaulttimeout(30)
1073# try:
1074# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1075# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001076# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001077# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001078# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001079#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001080# def testTimeoutDefault(self):
1081# # global default timeout is used
1082# import socket
Georg Brandlab91fde2009-08-13 08:51:18 +00001083# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001084# socket.setdefaulttimeout(30)
1085# try:
1086# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1087# finally:
1088# socket.setdefaulttimeout(None)
1089# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1090# ftp.close()
1091#
1092# def testTimeoutValue(self):
1093# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1094# timeout=30)
1095# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1096# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001097
Skip Montanaro080c9972001-01-28 21:12:22 +00001098
1099
Brett Cannon74bfd702003-04-25 09:39:47 +00001100def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001101 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001102 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001103 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001104 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001105 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001106 QuotingTests,
1107 UnquotingTests,
1108 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001109 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001110 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001111 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001112 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001113 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001114
1115
1116
1117if __name__ == '__main__':
1118 test_main()