blob: 2775a132ef4855e8ee8968b51420912d7d958de8 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000014
Brett Cannon74bfd702003-04-25 09:39:47 +000015def hexescape(char):
16 """Escape char as RFC 2396 specifies"""
17 hex_repr = hex(ord(char))[2:].upper()
18 if len(hex_repr) == 1:
19 hex_repr = "0%s" % hex_repr
20 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000021
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022# Shortcut for testing FancyURLopener
23_urlopener = None
24def urlopen(url, data=None, proxies=None):
25 """urlopen(url [, data]) -> open file-like object"""
26 global _urlopener
27 if proxies is not None:
28 opener = urllib.request.FancyURLopener(proxies=proxies)
29 elif not _urlopener:
30 opener = urllib.request.FancyURLopener()
31 _urlopener = opener
32 else:
33 opener = _urlopener
34 if data is None:
35 return opener.open(url)
36 else:
37 return opener.open(url, data)
38
Brett Cannon74bfd702003-04-25 09:39:47 +000039class urlopen_FileTests(unittest.TestCase):
40 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000041
Brett Cannon74bfd702003-04-25 09:39:47 +000042 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000043 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000046
Brett Cannon74bfd702003-04-25 09:39:47 +000047 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000048 # Create a temp file to use for testing
49 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
50 "ascii")
51 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000052 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000054 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000056 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000058
Brett Cannon74bfd702003-04-25 09:39:47 +000059 def tearDown(self):
60 """Shut down the open object"""
61 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000062 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000063
Brett Cannon74bfd702003-04-25 09:39:47 +000064 def test_interface(self):
65 # Make sure object returned by urlopen() has the specified methods
66 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000067 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000068 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000069 "object returned by urlopen() lacks %s attribute" %
70 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000071
Brett Cannon74bfd702003-04-25 09:39:47 +000072 def test_read(self):
73 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000074
Brett Cannon74bfd702003-04-25 09:39:47 +000075 def test_readline(self):
76 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000077 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000078 "calling readline() after exhausting the file did not"
79 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000080
Brett Cannon74bfd702003-04-25 09:39:47 +000081 def test_readlines(self):
82 lines_list = self.returned_obj.readlines()
83 self.assertEqual(len(lines_list), 1,
84 "readlines() returned the wrong number of lines")
85 self.assertEqual(lines_list[0], self.text,
86 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000087
Brett Cannon74bfd702003-04-25 09:39:47 +000088 def test_fileno(self):
89 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000090 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000091 self.assertEqual(os.read(file_num, len(self.text)), self.text,
92 "Reading on the file descriptor returned by fileno() "
93 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000094
Brett Cannon74bfd702003-04-25 09:39:47 +000095 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080096 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000097 # by the tearDown() method for the test
98 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000099
Brett Cannon74bfd702003-04-25 09:39:47 +0000100 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000101 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000102
Brett Cannon74bfd702003-04-25 09:39:47 +0000103 def test_geturl(self):
104 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000105
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000107 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000108
Brett Cannon74bfd702003-04-25 09:39:47 +0000109 def test_iter(self):
110 # Test iterator
111 # Don't need to count number of iterations since test would fail the
112 # instant it returned anything beyond the first line from the
113 # comparison
114 for line in self.returned_obj.__iter__():
115 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000116
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000117class ProxyTests(unittest.TestCase):
118
119 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000120 # Records changes to env vars
121 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000122 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000123 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000124 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000125 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000126
127 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000128 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000129 self.env.__exit__()
130 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000131
132 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000133 self.env.set('NO_PROXY', 'localhost')
134 proxies = urllib.request.getproxies_environment()
135 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000136 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000137
138
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000139class urlopen_HttpTests(unittest.TestCase):
140 """Test urlopen() opening a fake http connection."""
141
142 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000143 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000144 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000145 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000146 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000147 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000148 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000149 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000150 if self.closed: return b""
151 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000152 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000153 if self.closed: return b""
154 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000155 def close(self):
156 self.io_refs -= 1
157 if self.io_refs == 0:
158 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000159 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000160 def connect(self):
161 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000162 self._connection_class = http.client.HTTPConnection
163 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000164
165 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000166 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000167
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000168 def check_read(self, ver):
169 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000170 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000171 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000172 self.assertEqual(fp.readline(), b"Hello!")
173 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000174 self.assertEqual(fp.geturl(), 'http://python.org/')
175 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000176 finally:
177 self.unfakehttp()
178
Senthil Kumaran26430412011-04-13 07:01:19 +0800179 def test_url_fragment(self):
180 # Issue #11703: geturl() omits fragments in the original URL.
181 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800182 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800183 try:
184 fp = urllib.request.urlopen(url)
185 self.assertEqual(fp.geturl(), url)
186 finally:
187 self.unfakehttp()
188
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800189 def test_willclose(self):
190 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800191 try:
192 resp = urlopen("http://www.python.org")
193 self.assertTrue(resp.fp.will_close)
194 finally:
195 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800196
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000197 def test_read_0_9(self):
198 # "0.9" response accepted (but not "simple responses" without
199 # a status line)
200 self.check_read(b"0.9")
201
202 def test_read_1_0(self):
203 self.check_read(b"1.0")
204
205 def test_read_1_1(self):
206 self.check_read(b"1.1")
207
Christian Heimes57dddfb2008-01-02 18:30:52 +0000208 def test_read_bogus(self):
209 # urlopen() should raise IOError for many error codes.
210 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
211Date: Wed, 02 Jan 2008 03:03:54 GMT
212Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
213Connection: close
214Content-Type: text/html; charset=iso-8859-1
215''')
216 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000217 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000218 finally:
219 self.unfakehttp()
220
guido@google.coma119df92011-03-29 11:41:02 -0700221 def test_invalid_redirect(self):
222 # urlopen() should raise IOError for many error codes.
223 self.fakehttp(b'''HTTP/1.1 302 Found
224Date: Wed, 02 Jan 2008 03:03:54 GMT
225Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
226Location: file://guidocomputer.athome.com:/python/license
227Connection: close
228Content-Type: text/html; charset=iso-8859-1
229''')
230 try:
231 self.assertRaises(urllib.error.HTTPError, urlopen,
232 "http://python.org/")
233 finally:
234 self.unfakehttp()
235
Guido van Rossumd8faa362007-04-27 19:54:29 +0000236 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000237 # urlopen() raises IOError if the underlying socket does not send any
238 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000239 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000240 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000241 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000242 finally:
243 self.unfakehttp()
244
Senthil Kumarande0eb242010-08-01 17:53:37 +0000245 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000246 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000247 try:
248 fp = urlopen("http://user:pass@python.org/")
249 self.assertEqual(fp.readline(), b"Hello!")
250 self.assertEqual(fp.readline(), b"")
251 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
252 self.assertEqual(fp.getcode(), 200)
253 finally:
254 self.unfakehttp()
255
Brett Cannon19691362003-04-29 05:08:06 +0000256class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000257 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000258
Brett Cannon19691362003-04-29 05:08:06 +0000259 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000260 # Create a list of temporary files. Each item in the list is a file
261 # name (absolute path or relative to the current working directory).
262 # All files in this list will be deleted in the tearDown method. Note,
263 # this only helps to makes sure temporary files get deleted, but it
264 # does nothing about trying to close files that may still be open. It
265 # is the responsibility of the developer to properly close files even
266 # when exceptional conditions occur.
267 self.tempFiles = []
268
Brett Cannon19691362003-04-29 05:08:06 +0000269 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000270 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000271 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000272 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000273 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000274 FILE.write(self.text)
275 FILE.close()
276 finally:
277 try: FILE.close()
278 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000279
280 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000281 # Delete the temporary files.
282 for each in self.tempFiles:
283 try: os.remove(each)
284 except: pass
285
286 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000287 filePath = os.path.abspath(filePath)
288 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000289 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000290 except UnicodeEncodeError:
291 raise unittest.SkipTest("filePath is not encodable to utf8")
292 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000293
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000294 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000295 """Creates a new temporary file containing the specified data,
296 registers the file for deletion during the test fixture tear down, and
297 returns the absolute path of the file."""
298
299 newFd, newFilePath = tempfile.mkstemp()
300 try:
301 self.registerFileForCleanUp(newFilePath)
302 newFile = os.fdopen(newFd, "wb")
303 newFile.write(data)
304 newFile.close()
305 finally:
306 try: newFile.close()
307 except: pass
308 return newFilePath
309
310 def registerFileForCleanUp(self, fileName):
311 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000312
313 def test_basic(self):
314 # Make sure that a local file just gets its own location returned and
315 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000316 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000317 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000318 self.assertIsInstance(result[1], email.message.Message,
319 "did not get a email.message.Message instance "
320 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000321
322 def test_copy(self):
323 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000324 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000325 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000326 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000327 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000328 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000329 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000330 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000331 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000332 try:
333 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000334 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000335 finally:
336 try: FILE.close()
337 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000338 self.assertEqual(self.text, text)
339
340 def test_reporthook(self):
341 # Make sure that the reporthook works.
342 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000343 self.assertIsInstance(count, int)
344 self.assertIsInstance(block_size, int)
345 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000346 self.assertEqual(count, count_holder[0])
347 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000348 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000349 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000350 urllib.request.urlretrieve(
351 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000352 second_temp, hooktester)
353
354 def test_reporthook_0_bytes(self):
355 # Test on zero length file. Should call reporthook only 1 time.
356 report = []
357 def hooktester(count, block_size, total_size, _report=report):
358 _report.append((count, block_size, total_size))
359 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000360 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000361 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000362 self.assertEqual(len(report), 1)
363 self.assertEqual(report[0][2], 0)
364
365 def test_reporthook_5_bytes(self):
366 # Test on 5 byte file. Should call reporthook only 2 times (once when
367 # the "network connection" is established and once when the block is
368 # read). Since the block size is 8192 bytes, only one block read is
369 # required to read the entire file.
370 report = []
371 def hooktester(count, block_size, total_size, _report=report):
372 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000373 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000374 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000375 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000376 self.assertEqual(len(report), 2)
377 self.assertEqual(report[0][1], 8192)
378 self.assertEqual(report[0][2], 5)
379
380 def test_reporthook_8193_bytes(self):
381 # Test on 8193 byte file. Should call reporthook only 3 times (once
382 # when the "network connection" is established, once for the next 8192
383 # bytes, and once for the last byte).
384 report = []
385 def hooktester(count, block_size, total_size, _report=report):
386 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000387 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000388 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000389 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000390 self.assertEqual(len(report), 3)
391 self.assertEqual(report[0][1], 8192)
392 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000393
Brett Cannon74bfd702003-04-25 09:39:47 +0000394class QuotingTests(unittest.TestCase):
395 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000396
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000397 According to RFC 2396 (Uniform Resource Identifiers), to escape a
398 character you write it as '%' + <2 character US-ASCII hex value>.
399 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
400 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000401
402 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000403
Brett Cannon74bfd702003-04-25 09:39:47 +0000404 Reserved characters : ";/?:@&=+$,"
405 Have special meaning in URIs and must be escaped if not being used for
406 their special meaning
407 Data characters : letters, digits, and "-_.!~*'()"
408 Unreserved and do not need to be escaped; can be, though, if desired
409 Control characters : 0x00 - 0x1F, 0x7F
410 Have no use in URIs so must be escaped
411 space : 0x20
412 Must be escaped
413 Delimiters : '<>#%"'
414 Must be escaped
415 Unwise : "{}|\^[]`"
416 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000417
Brett Cannon74bfd702003-04-25 09:39:47 +0000418 """
419
420 def test_never_quote(self):
421 # Make sure quote() does not quote letters, digits, and "_,.-"
422 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
423 "abcdefghijklmnopqrstuvwxyz",
424 "0123456789",
425 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000426 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000427 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000428 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000429 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000430 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000431 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000432
433 def test_default_safe(self):
434 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000435 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000436
437 def test_safe(self):
438 # Test setting 'safe' parameter does what it should do
439 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000440 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000441 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000442 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000443 result = urllib.parse.quote_plus(quote_by_default,
444 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000445 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000446 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000447 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000448 # Safe expressed as bytes rather than str
449 result = urllib.parse.quote(quote_by_default, safe=b"<>")
450 self.assertEqual(quote_by_default, result,
451 "using quote(): %r != %r" % (quote_by_default, result))
452 # "Safe" non-ASCII characters should have no effect
453 # (Since URIs are not allowed to have non-ASCII characters)
454 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
455 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
456 self.assertEqual(expect, result,
457 "using quote(): %r != %r" %
458 (expect, result))
459 # Same as above, but using a bytes rather than str
460 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
461 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
462 self.assertEqual(expect, result,
463 "using quote(): %r != %r" %
464 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000465
466 def test_default_quoting(self):
467 # Make sure all characters that should be quoted are by default sans
468 # space (separate test for that).
469 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
470 should_quote.append('<>#%"{}|\^[]`')
471 should_quote.append(chr(127)) # For 0x7F
472 should_quote = ''.join(should_quote)
473 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000474 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000475 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000476 "using quote(): "
477 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000478 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000479 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000480 self.assertEqual(hexescape(char), result,
481 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000482 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000483 (char, hexescape(char), result))
484 del should_quote
485 partial_quote = "ab[]cd"
486 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000487 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000488 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000489 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000490 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000491 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000492
493 def test_quoting_space(self):
494 # Make sure quote() and quote_plus() handle spaces as specified in
495 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000496 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000497 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000498 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000499 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000500 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000501 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000502 given = "a b cd e f"
503 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000504 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000505 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000506 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000507 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000508 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000509 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000510 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000511
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000512 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000513 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000514 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000515 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000516 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000517 # Test with bytes
518 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
519 'alpha%2Bbeta+gamma')
520 # Test with safe bytes
521 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
522 'alpha+beta+gamma')
523
524 def test_quote_bytes(self):
525 # Bytes should quote directly to percent-encoded values
526 given = b"\xa2\xd8ab\xff"
527 expect = "%A2%D8ab%FF"
528 result = urllib.parse.quote(given)
529 self.assertEqual(expect, result,
530 "using quote(): %r != %r" % (expect, result))
531 # Encoding argument should raise type error on bytes input
532 self.assertRaises(TypeError, urllib.parse.quote, given,
533 encoding="latin-1")
534 # quote_from_bytes should work the same
535 result = urllib.parse.quote_from_bytes(given)
536 self.assertEqual(expect, result,
537 "using quote_from_bytes(): %r != %r"
538 % (expect, result))
539
540 def test_quote_with_unicode(self):
541 # Characters in Latin-1 range, encoded by default in UTF-8
542 given = "\xa2\xd8ab\xff"
543 expect = "%C2%A2%C3%98ab%C3%BF"
544 result = urllib.parse.quote(given)
545 self.assertEqual(expect, result,
546 "using quote(): %r != %r" % (expect, result))
547 # Characters in Latin-1 range, encoded by with None (default)
548 result = urllib.parse.quote(given, encoding=None, errors=None)
549 self.assertEqual(expect, result,
550 "using quote(): %r != %r" % (expect, result))
551 # Characters in Latin-1 range, encoded with Latin-1
552 given = "\xa2\xd8ab\xff"
553 expect = "%A2%D8ab%FF"
554 result = urllib.parse.quote(given, encoding="latin-1")
555 self.assertEqual(expect, result,
556 "using quote(): %r != %r" % (expect, result))
557 # Characters in BMP, encoded by default in UTF-8
558 given = "\u6f22\u5b57" # "Kanji"
559 expect = "%E6%BC%A2%E5%AD%97"
560 result = urllib.parse.quote(given)
561 self.assertEqual(expect, result,
562 "using quote(): %r != %r" % (expect, result))
563 # Characters in BMP, encoded with Latin-1
564 given = "\u6f22\u5b57"
565 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
566 encoding="latin-1")
567 # Characters in BMP, encoded with Latin-1, with replace error handling
568 given = "\u6f22\u5b57"
569 expect = "%3F%3F" # "??"
570 result = urllib.parse.quote(given, encoding="latin-1",
571 errors="replace")
572 self.assertEqual(expect, result,
573 "using quote(): %r != %r" % (expect, result))
574 # Characters in BMP, Latin-1, with xmlcharref error handling
575 given = "\u6f22\u5b57"
576 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
577 result = urllib.parse.quote(given, encoding="latin-1",
578 errors="xmlcharrefreplace")
579 self.assertEqual(expect, result,
580 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000581
Georg Brandlfaf41492009-05-26 18:31:11 +0000582 def test_quote_plus_with_unicode(self):
583 # Encoding (latin-1) test for quote_plus
584 given = "\xa2\xd8 \xff"
585 expect = "%A2%D8+%FF"
586 result = urllib.parse.quote_plus(given, encoding="latin-1")
587 self.assertEqual(expect, result,
588 "using quote_plus(): %r != %r" % (expect, result))
589 # Errors test for quote_plus
590 given = "ab\u6f22\u5b57 cd"
591 expect = "ab%3F%3F+cd"
592 result = urllib.parse.quote_plus(given, encoding="latin-1",
593 errors="replace")
594 self.assertEqual(expect, result,
595 "using quote_plus(): %r != %r" % (expect, result))
596
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000597
Brett Cannon74bfd702003-04-25 09:39:47 +0000598class UnquotingTests(unittest.TestCase):
599 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000600
Brett Cannon74bfd702003-04-25 09:39:47 +0000601 See the doc string for quoting_Tests for details on quoting and such.
602
603 """
604
605 def test_unquoting(self):
606 # Make sure unquoting of all ASCII values works
607 escape_list = []
608 for num in range(128):
609 given = hexescape(chr(num))
610 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000611 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000612 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000613 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000614 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000615 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000616 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000617 (expect, result))
618 escape_list.append(given)
619 escape_string = ''.join(escape_list)
620 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000621 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000622 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000623 "using unquote(): not all characters escaped: "
624 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000625 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
626 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000627 with support.check_warnings(('', BytesWarning), quiet=True):
628 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000629
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000630 def test_unquoting_badpercent(self):
631 # Test unquoting on bad percent-escapes
632 given = '%xab'
633 expect = given
634 result = urllib.parse.unquote(given)
635 self.assertEqual(expect, result, "using unquote(): %r != %r"
636 % (expect, result))
637 given = '%x'
638 expect = given
639 result = urllib.parse.unquote(given)
640 self.assertEqual(expect, result, "using unquote(): %r != %r"
641 % (expect, result))
642 given = '%'
643 expect = given
644 result = urllib.parse.unquote(given)
645 self.assertEqual(expect, result, "using unquote(): %r != %r"
646 % (expect, result))
647 # unquote_to_bytes
648 given = '%xab'
649 expect = bytes(given, 'ascii')
650 result = urllib.parse.unquote_to_bytes(given)
651 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
652 % (expect, result))
653 given = '%x'
654 expect = bytes(given, 'ascii')
655 result = urllib.parse.unquote_to_bytes(given)
656 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
657 % (expect, result))
658 given = '%'
659 expect = bytes(given, 'ascii')
660 result = urllib.parse.unquote_to_bytes(given)
661 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
662 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000663 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
664 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000665
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000666 def test_unquoting_mixed_case(self):
667 # Test unquoting on mixed-case hex digits in the percent-escapes
668 given = '%Ab%eA'
669 expect = b'\xab\xea'
670 result = urllib.parse.unquote_to_bytes(given)
671 self.assertEqual(expect, result,
672 "using unquote_to_bytes(): %r != %r"
673 % (expect, result))
674
Brett Cannon74bfd702003-04-25 09:39:47 +0000675 def test_unquoting_parts(self):
676 # Make sure unquoting works when have non-quoted characters
677 # interspersed
678 given = 'ab%sd' % hexescape('c')
679 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000680 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000681 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000682 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000684 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000685 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000686
Brett Cannon74bfd702003-04-25 09:39:47 +0000687 def test_unquoting_plus(self):
688 # Test difference between unquote() and unquote_plus()
689 given = "are+there+spaces..."
690 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000691 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000692 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000693 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000694 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000695 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000696 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000697 "using unquote_plus(): %r != %r" % (expect, result))
698
699 def test_unquote_to_bytes(self):
700 given = 'br%C3%BCckner_sapporo_20050930.doc'
701 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
702 result = urllib.parse.unquote_to_bytes(given)
703 self.assertEqual(expect, result,
704 "using unquote_to_bytes(): %r != %r"
705 % (expect, result))
706 # Test on a string with unescaped non-ASCII characters
707 # (Technically an invalid URI; expect those characters to be UTF-8
708 # encoded).
709 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
710 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
711 self.assertEqual(expect, result,
712 "using unquote_to_bytes(): %r != %r"
713 % (expect, result))
714 # Test with a bytes as input
715 given = b'%A2%D8ab%FF'
716 expect = b'\xa2\xd8ab\xff'
717 result = urllib.parse.unquote_to_bytes(given)
718 self.assertEqual(expect, result,
719 "using unquote_to_bytes(): %r != %r"
720 % (expect, result))
721 # Test with a bytes as input, with unescaped non-ASCII bytes
722 # (Technically an invalid URI; expect those bytes to be preserved)
723 given = b'%A2\xd8ab%FF'
724 expect = b'\xa2\xd8ab\xff'
725 result = urllib.parse.unquote_to_bytes(given)
726 self.assertEqual(expect, result,
727 "using unquote_to_bytes(): %r != %r"
728 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000729
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000730 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000731 # Characters in the Latin-1 range, encoded with UTF-8
732 given = 'br%C3%BCckner_sapporo_20050930.doc'
733 expect = 'br\u00fcckner_sapporo_20050930.doc'
734 result = urllib.parse.unquote(given)
735 self.assertEqual(expect, result,
736 "using unquote(): %r != %r" % (expect, result))
737 # Characters in the Latin-1 range, encoded with None (default)
738 result = urllib.parse.unquote(given, encoding=None, errors=None)
739 self.assertEqual(expect, result,
740 "using unquote(): %r != %r" % (expect, result))
741
742 # Characters in the Latin-1 range, encoded with Latin-1
743 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
744 encoding="latin-1")
745 expect = 'br\u00fcckner_sapporo_20050930.doc'
746 self.assertEqual(expect, result,
747 "using unquote(): %r != %r" % (expect, result))
748
749 # Characters in BMP, encoded with UTF-8
750 given = "%E6%BC%A2%E5%AD%97"
751 expect = "\u6f22\u5b57" # "Kanji"
752 result = urllib.parse.unquote(given)
753 self.assertEqual(expect, result,
754 "using unquote(): %r != %r" % (expect, result))
755
756 # Decode with UTF-8, invalid sequence
757 given = "%F3%B1"
758 expect = "\ufffd" # Replacement character
759 result = urllib.parse.unquote(given)
760 self.assertEqual(expect, result,
761 "using unquote(): %r != %r" % (expect, result))
762
763 # Decode with UTF-8, invalid sequence, replace errors
764 result = urllib.parse.unquote(given, errors="replace")
765 self.assertEqual(expect, result,
766 "using unquote(): %r != %r" % (expect, result))
767
768 # Decode with UTF-8, invalid sequence, ignoring errors
769 given = "%F3%B1"
770 expect = ""
771 result = urllib.parse.unquote(given, errors="ignore")
772 self.assertEqual(expect, result,
773 "using unquote(): %r != %r" % (expect, result))
774
775 # A mix of non-ASCII and percent-encoded characters, UTF-8
776 result = urllib.parse.unquote("\u6f22%C3%BC")
777 expect = '\u6f22\u00fc'
778 self.assertEqual(expect, result,
779 "using unquote(): %r != %r" % (expect, result))
780
781 # A mix of non-ASCII and percent-encoded characters, Latin-1
782 # (Note, the string contains non-Latin-1-representable characters)
783 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
784 expect = '\u6f22\u00fc'
785 self.assertEqual(expect, result,
786 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000787
Brett Cannon74bfd702003-04-25 09:39:47 +0000788class urlencode_Tests(unittest.TestCase):
789 """Tests for urlencode()"""
790
791 def help_inputtype(self, given, test_type):
792 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000793
Brett Cannon74bfd702003-04-25 09:39:47 +0000794 'given' must lead to only the pairs:
795 * 1st, 1
796 * 2nd, 2
797 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000798
Brett Cannon74bfd702003-04-25 09:39:47 +0000799 Test cannot assume anything about order. Docs make no guarantee and
800 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000801
Brett Cannon74bfd702003-04-25 09:39:47 +0000802 """
803 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000805 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000806 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000807 "testing %s: %s not found in %s" %
808 (test_type, expected, result))
809 self.assertEqual(result.count('&'), 2,
810 "testing %s: expected 2 '&'s; got %s" %
811 (test_type, result.count('&')))
812 amp_location = result.index('&')
813 on_amp_left = result[amp_location - 1]
814 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000815 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 "testing %s: '&' not located in proper place in %s" %
817 (test_type, result))
818 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
819 "testing %s: "
820 "unexpected number of characters: %s != %s" %
821 (test_type, len(result), (5 * 3) + 2))
822
823 def test_using_mapping(self):
824 # Test passing in a mapping object as an argument.
825 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
826 "using dict as input type")
827
828 def test_using_sequence(self):
829 # Test passing in a sequence of two-item sequences as an argument.
830 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
831 "using sequence of two-item tuples as input")
832
833 def test_quoting(self):
834 # Make sure keys and values are quoted using quote_plus()
835 given = {"&":"="}
836 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000837 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000838 self.assertEqual(expect, result)
839 given = {"key name":"A bunch of pluses"}
840 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000841 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000842 self.assertEqual(expect, result)
843
844 def test_doseq(self):
845 # Test that passing True for 'doseq' parameter works correctly
846 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000847 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
848 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000849 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000851 for value in given["sequence"]:
852 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000853 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000854 self.assertEqual(result.count('&'), 2,
855 "Expected 2 '&'s, got %s" % result.count('&'))
856
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000857 def test_empty_sequence(self):
858 self.assertEqual("", urllib.parse.urlencode({}))
859 self.assertEqual("", urllib.parse.urlencode([]))
860
861 def test_nonstring_values(self):
862 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
863 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
864
865 def test_nonstring_seq_values(self):
866 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
867 self.assertEqual("a=None&a=a",
868 urllib.parse.urlencode({"a": [None, "a"]}, True))
869 self.assertEqual("a=a&a=b",
870 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
871
Senthil Kumarandf022da2010-07-03 17:48:22 +0000872 def test_urlencode_encoding(self):
873 # ASCII encoding. Expect %3F with errors="replace'
874 given = (('\u00a0', '\u00c1'),)
875 expect = '%3F=%3F'
876 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
877 self.assertEqual(expect, result)
878
879 # Default is UTF-8 encoding.
880 given = (('\u00a0', '\u00c1'),)
881 expect = '%C2%A0=%C3%81'
882 result = urllib.parse.urlencode(given)
883 self.assertEqual(expect, result)
884
885 # Latin-1 encoding.
886 given = (('\u00a0', '\u00c1'),)
887 expect = '%A0=%C1'
888 result = urllib.parse.urlencode(given, encoding="latin-1")
889 self.assertEqual(expect, result)
890
891 def test_urlencode_encoding_doseq(self):
892 # ASCII Encoding. Expect %3F with errors="replace'
893 given = (('\u00a0', '\u00c1'),)
894 expect = '%3F=%3F'
895 result = urllib.parse.urlencode(given, doseq=True,
896 encoding="ASCII", errors="replace")
897 self.assertEqual(expect, result)
898
899 # ASCII Encoding. On a sequence of values.
900 given = (("\u00a0", (1, "\u00c1")),)
901 expect = '%3F=1&%3F=%3F'
902 result = urllib.parse.urlencode(given, True,
903 encoding="ASCII", errors="replace")
904 self.assertEqual(expect, result)
905
906 # Utf-8
907 given = (("\u00a0", "\u00c1"),)
908 expect = '%C2%A0=%C3%81'
909 result = urllib.parse.urlencode(given, True)
910 self.assertEqual(expect, result)
911
912 given = (("\u00a0", (42, "\u00c1")),)
913 expect = '%C2%A0=42&%C2%A0=%C3%81'
914 result = urllib.parse.urlencode(given, True)
915 self.assertEqual(expect, result)
916
917 # latin-1
918 given = (("\u00a0", "\u00c1"),)
919 expect = '%A0=%C1'
920 result = urllib.parse.urlencode(given, True, encoding="latin-1")
921 self.assertEqual(expect, result)
922
923 given = (("\u00a0", (42, "\u00c1")),)
924 expect = '%A0=42&%A0=%C1'
925 result = urllib.parse.urlencode(given, True, encoding="latin-1")
926 self.assertEqual(expect, result)
927
928 def test_urlencode_bytes(self):
929 given = ((b'\xa0\x24', b'\xc1\x24'),)
930 expect = '%A0%24=%C1%24'
931 result = urllib.parse.urlencode(given)
932 self.assertEqual(expect, result)
933 result = urllib.parse.urlencode(given, True)
934 self.assertEqual(expect, result)
935
936 # Sequence of values
937 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
938 expect = '%A0%24=42&%A0%24=%C1%24'
939 result = urllib.parse.urlencode(given, True)
940 self.assertEqual(expect, result)
941
942 def test_urlencode_encoding_safe_parameter(self):
943
944 # Send '$' (\x24) as safe character
945 # Default utf-8 encoding
946
947 given = ((b'\xa0\x24', b'\xc1\x24'),)
948 result = urllib.parse.urlencode(given, safe=":$")
949 expect = '%A0$=%C1$'
950 self.assertEqual(expect, result)
951
952 given = ((b'\xa0\x24', b'\xc1\x24'),)
953 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
954 expect = '%A0$=%C1$'
955 self.assertEqual(expect, result)
956
957 # Safe parameter in sequence
958 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
959 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
960 result = urllib.parse.urlencode(given, True, safe=":$")
961 self.assertEqual(expect, result)
962
963 # Test all above in latin-1 encoding
964
965 given = ((b'\xa0\x24', b'\xc1\x24'),)
966 result = urllib.parse.urlencode(given, safe=":$",
967 encoding="latin-1")
968 expect = '%A0$=%C1$'
969 self.assertEqual(expect, result)
970
971 given = ((b'\xa0\x24', b'\xc1\x24'),)
972 expect = '%A0$=%C1$'
973 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
974 encoding="latin-1")
975
976 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
977 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
978 result = urllib.parse.urlencode(given, True, safe=":$",
979 encoding="latin-1")
980 self.assertEqual(expect, result)
981
Brett Cannon74bfd702003-04-25 09:39:47 +0000982class Pathname_Tests(unittest.TestCase):
983 """Test pathname2url() and url2pathname()"""
984
985 def test_basic(self):
986 # Make sure simple tests pass
987 expected_path = os.path.join("parts", "of", "a", "path")
988 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000989 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000990 self.assertEqual(expected_url, result,
991 "pathname2url() failed; %s != %s" %
992 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000993 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000994 self.assertEqual(expected_path, result,
995 "url2pathame() failed; %s != %s" %
996 (result, expected_path))
997
998 def test_quoting(self):
999 # Test automatic quoting and unquoting works for pathnam2url() and
1000 # url2pathname() respectively
1001 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001002 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1003 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001004 self.assertEqual(expect, result,
1005 "pathname2url() failed; %s != %s" %
1006 (expect, result))
1007 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001008 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001009 self.assertEqual(expect, result,
1010 "url2pathname() failed; %s != %s" %
1011 (expect, result))
1012 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001013 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1014 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001015 self.assertEqual(expect, result,
1016 "pathname2url() failed; %s != %s" %
1017 (expect, result))
1018 given = "make+sure/using_unquote"
1019 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001021 self.assertEqual(expect, result,
1022 "url2pathname() failed; %s != %s" %
1023 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001024
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001025 @unittest.skipUnless(sys.platform == 'win32',
1026 'test specific to the urllib.url2path function.')
1027 def test_ntpath(self):
1028 given = ('/C:/', '///C:/', '/C|//')
1029 expect = 'C:\\'
1030 for url in given:
1031 result = urllib.request.url2pathname(url)
1032 self.assertEqual(expect, result,
1033 'urllib.request..url2pathname() failed; %s != %s' %
1034 (expect, result))
1035 given = '///C|/path'
1036 expect = 'C:\\path'
1037 result = urllib.request.url2pathname(given)
1038 self.assertEqual(expect, result,
1039 'urllib.request.url2pathname() failed; %s != %s' %
1040 (expect, result))
1041
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001042class Utility_Tests(unittest.TestCase):
1043 """Testcase to test the various utility functions in the urllib."""
1044
1045 def test_splitpasswd(self):
1046 """Some of password examples are not sensible, but it is added to
1047 confirming to RFC2617 and addressing issue4675.
1048 """
1049 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1050 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1051 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1052 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1053 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1054 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1055 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1056
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001057
1058class URLopener_Tests(unittest.TestCase):
1059 """Testcase to test the open method of URLopener class."""
1060
1061 def test_quoted_open(self):
1062 class DummyURLopener(urllib.request.URLopener):
1063 def open_spam(self, url):
1064 return url
1065
1066 self.assertEqual(DummyURLopener().open(
1067 'spam://example/ /'),'//example/%20/')
1068
Senthil Kumaran734f0592010-02-20 22:19:04 +00001069 # test the safe characters are not quoted by urlopen
1070 self.assertEqual(DummyURLopener().open(
1071 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1072 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1073
Guido van Rossume7ba4952007-06-06 23:52:48 +00001074# Just commented them out.
1075# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001076# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001077# fail in one of the tests, sometimes in other. I have a linux, and
1078# the tests go ok.
1079# If anybody has one of the problematic enviroments, please help!
1080# . Facundo
1081#
1082# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001083# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001084# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1085# serv.settimeout(3)
1086# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1087# serv.bind(("", 9093))
1088# serv.listen(5)
1089# try:
1090# conn, addr = serv.accept()
1091# conn.send("1 Hola mundo\n")
1092# cantdata = 0
1093# while cantdata < 13:
1094# data = conn.recv(13-cantdata)
1095# cantdata += len(data)
1096# time.sleep(.3)
1097# conn.send("2 No more lines\n")
1098# conn.close()
1099# except socket.timeout:
1100# pass
1101# finally:
1102# serv.close()
1103# evt.set()
1104#
1105# class FTPWrapperTests(unittest.TestCase):
1106#
1107# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001108# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001109# ftplib.FTP.port = 9093
1110# self.evt = threading.Event()
1111# threading.Thread(target=server, args=(self.evt,)).start()
1112# time.sleep(.1)
1113#
1114# def tearDown(self):
1115# self.evt.wait()
1116#
1117# def testBasic(self):
1118# # connects
1119# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001120# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001121#
1122# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001123# # global default timeout is ignored
1124# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001125# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001126# socket.setdefaulttimeout(30)
1127# try:
1128# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1129# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001130# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001131# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001132# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001133#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001134# def testTimeoutDefault(self):
1135# # global default timeout is used
1136# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001137# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001138# socket.setdefaulttimeout(30)
1139# try:
1140# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1141# finally:
1142# socket.setdefaulttimeout(None)
1143# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1144# ftp.close()
1145#
1146# def testTimeoutValue(self):
1147# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1148# timeout=30)
1149# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1150# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001151
Skip Montanaro080c9972001-01-28 21:12:22 +00001152
1153
Brett Cannon74bfd702003-04-25 09:39:47 +00001154def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001155 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001156 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001157 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001158 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001159 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001160 QuotingTests,
1161 UnquotingTests,
1162 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001163 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001164 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001165 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001166 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001167 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001168
1169
1170
1171if __name__ == '__main__':
1172 test_main()