blob: 3fcf9a3f0c26ae3430e1fa19423eeaafb536f26b [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000014
Brett Cannon74bfd702003-04-25 09:39:47 +000015def hexescape(char):
16 """Escape char as RFC 2396 specifies"""
17 hex_repr = hex(ord(char))[2:].upper()
18 if len(hex_repr) == 1:
19 hex_repr = "0%s" % hex_repr
20 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000021
Jeremy Hylton1afc1692008-06-18 20:49:58 +000022# Shortcut for testing FancyURLopener
23_urlopener = None
24def urlopen(url, data=None, proxies=None):
25 """urlopen(url [, data]) -> open file-like object"""
26 global _urlopener
27 if proxies is not None:
28 opener = urllib.request.FancyURLopener(proxies=proxies)
29 elif not _urlopener:
30 opener = urllib.request.FancyURLopener()
31 _urlopener = opener
32 else:
33 opener = _urlopener
34 if data is None:
35 return opener.open(url)
36 else:
37 return opener.open(url, data)
38
Brett Cannon74bfd702003-04-25 09:39:47 +000039class urlopen_FileTests(unittest.TestCase):
40 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000041
Brett Cannon74bfd702003-04-25 09:39:47 +000042 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000043 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000044
Brett Cannon74bfd702003-04-25 09:39:47 +000045 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000046
Brett Cannon74bfd702003-04-25 09:39:47 +000047 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000048 # Create a temp file to use for testing
49 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
50 "ascii")
51 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000052 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +000054 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000056 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +000057 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000058
Brett Cannon74bfd702003-04-25 09:39:47 +000059 def tearDown(self):
60 """Shut down the open object"""
61 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +000062 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000063
Brett Cannon74bfd702003-04-25 09:39:47 +000064 def test_interface(self):
65 # Make sure object returned by urlopen() has the specified methods
66 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +000067 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000068 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000069 "object returned by urlopen() lacks %s attribute" %
70 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000071
Brett Cannon74bfd702003-04-25 09:39:47 +000072 def test_read(self):
73 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000074
Brett Cannon74bfd702003-04-25 09:39:47 +000075 def test_readline(self):
76 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +000077 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +000078 "calling readline() after exhausting the file did not"
79 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000080
Brett Cannon74bfd702003-04-25 09:39:47 +000081 def test_readlines(self):
82 lines_list = self.returned_obj.readlines()
83 self.assertEqual(len(lines_list), 1,
84 "readlines() returned the wrong number of lines")
85 self.assertEqual(lines_list[0], self.text,
86 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000087
Brett Cannon74bfd702003-04-25 09:39:47 +000088 def test_fileno(self):
89 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +000090 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000091 self.assertEqual(os.read(file_num, len(self.text)), self.text,
92 "Reading on the file descriptor returned by fileno() "
93 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000094
Brett Cannon74bfd702003-04-25 09:39:47 +000095 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +080096 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +000097 # by the tearDown() method for the test
98 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000099
Brett Cannon74bfd702003-04-25 09:39:47 +0000100 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000101 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000102
Brett Cannon74bfd702003-04-25 09:39:47 +0000103 def test_geturl(self):
104 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000105
Christian Heimes9bd667a2008-01-20 15:14:11 +0000106 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000107 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000108
Brett Cannon74bfd702003-04-25 09:39:47 +0000109 def test_iter(self):
110 # Test iterator
111 # Don't need to count number of iterations since test would fail the
112 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200113 # comparison.
114 # Use the iterator in the usual implicit way to test for ticket #4608.
115 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000116 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000117
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000118class ProxyTests(unittest.TestCase):
119
120 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000121 # Records changes to env vars
122 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000123 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000124 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000125 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000126 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000127
128 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000129 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000130 self.env.__exit__()
131 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000132
133 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000134 self.env.set('NO_PROXY', 'localhost')
135 proxies = urllib.request.getproxies_environment()
136 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000137 self.assertEqual('localhost', proxies['no'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000138
139
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000140class urlopen_HttpTests(unittest.TestCase):
141 """Test urlopen() opening a fake http connection."""
142
143 def fakehttp(self, fakedata):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000144 class FakeSocket(io.BytesIO):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000145 io_refs = 1
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000146 def sendall(self, str): pass
Nick Coghlan598c3a82009-02-08 04:01:00 +0000147 def makefile(self, *args, **kwds):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000148 self.io_refs += 1
Nick Coghlan598c3a82009-02-08 04:01:00 +0000149 return self
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000150 def read(self, amt=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000151 if self.closed: return b""
152 return io.BytesIO.read(self, amt)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000153 def readline(self, length=None):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000154 if self.closed: return b""
155 return io.BytesIO.readline(self, length)
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000156 def close(self):
157 self.io_refs -= 1
158 if self.io_refs == 0:
159 io.BytesIO.close(self)
Georg Brandl24420152008-05-26 16:32:26 +0000160 class FakeHTTPConnection(http.client.HTTPConnection):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000161 def connect(self):
162 self.sock = FakeSocket(fakedata)
Georg Brandl24420152008-05-26 16:32:26 +0000163 self._connection_class = http.client.HTTPConnection
164 http.client.HTTPConnection = FakeHTTPConnection
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000165
166 def unfakehttp(self):
Georg Brandl24420152008-05-26 16:32:26 +0000167 http.client.HTTPConnection = self._connection_class
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000168
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000169 def check_read(self, ver):
170 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000171 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000172 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000173 self.assertEqual(fp.readline(), b"Hello!")
174 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000175 self.assertEqual(fp.geturl(), 'http://python.org/')
176 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000177 finally:
178 self.unfakehttp()
179
Senthil Kumaran26430412011-04-13 07:01:19 +0800180 def test_url_fragment(self):
181 # Issue #11703: geturl() omits fragments in the original URL.
182 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800183 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800184 try:
185 fp = urllib.request.urlopen(url)
186 self.assertEqual(fp.geturl(), url)
187 finally:
188 self.unfakehttp()
189
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800190 def test_willclose(self):
191 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800192 try:
193 resp = urlopen("http://www.python.org")
194 self.assertTrue(resp.fp.will_close)
195 finally:
196 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800197
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000198 def test_read_0_9(self):
199 # "0.9" response accepted (but not "simple responses" without
200 # a status line)
201 self.check_read(b"0.9")
202
203 def test_read_1_0(self):
204 self.check_read(b"1.0")
205
206 def test_read_1_1(self):
207 self.check_read(b"1.1")
208
Christian Heimes57dddfb2008-01-02 18:30:52 +0000209 def test_read_bogus(self):
210 # urlopen() should raise IOError for many error codes.
211 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
212Date: Wed, 02 Jan 2008 03:03:54 GMT
213Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
214Connection: close
215Content-Type: text/html; charset=iso-8859-1
216''')
217 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000218 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000219 finally:
220 self.unfakehttp()
221
guido@google.coma119df92011-03-29 11:41:02 -0700222 def test_invalid_redirect(self):
223 # urlopen() should raise IOError for many error codes.
224 self.fakehttp(b'''HTTP/1.1 302 Found
225Date: Wed, 02 Jan 2008 03:03:54 GMT
226Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
227Location: file://guidocomputer.athome.com:/python/license
228Connection: close
229Content-Type: text/html; charset=iso-8859-1
230''')
231 try:
232 self.assertRaises(urllib.error.HTTPError, urlopen,
233 "http://python.org/")
234 finally:
235 self.unfakehttp()
236
Guido van Rossumd8faa362007-04-27 19:54:29 +0000237 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000238 # urlopen() raises IOError if the underlying socket does not send any
239 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000240 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000241 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000242 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000243 finally:
244 self.unfakehttp()
245
Senthil Kumarande0eb242010-08-01 17:53:37 +0000246 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000247 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000248 try:
249 fp = urlopen("http://user:pass@python.org/")
250 self.assertEqual(fp.readline(), b"Hello!")
251 self.assertEqual(fp.readline(), b"")
252 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
253 self.assertEqual(fp.getcode(), 200)
254 finally:
255 self.unfakehttp()
256
Brett Cannon19691362003-04-29 05:08:06 +0000257class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000258 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000259
Brett Cannon19691362003-04-29 05:08:06 +0000260 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000261 # Create a list of temporary files. Each item in the list is a file
262 # name (absolute path or relative to the current working directory).
263 # All files in this list will be deleted in the tearDown method. Note,
264 # this only helps to makes sure temporary files get deleted, but it
265 # does nothing about trying to close files that may still be open. It
266 # is the responsibility of the developer to properly close files even
267 # when exceptional conditions occur.
268 self.tempFiles = []
269
Brett Cannon19691362003-04-29 05:08:06 +0000270 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000271 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000272 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000273 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000274 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000275 FILE.write(self.text)
276 FILE.close()
277 finally:
278 try: FILE.close()
279 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000280
281 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000282 # Delete the temporary files.
283 for each in self.tempFiles:
284 try: os.remove(each)
285 except: pass
286
287 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000288 filePath = os.path.abspath(filePath)
289 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000290 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000291 except UnicodeEncodeError:
292 raise unittest.SkipTest("filePath is not encodable to utf8")
293 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000294
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000295 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000296 """Creates a new temporary file containing the specified data,
297 registers the file for deletion during the test fixture tear down, and
298 returns the absolute path of the file."""
299
300 newFd, newFilePath = tempfile.mkstemp()
301 try:
302 self.registerFileForCleanUp(newFilePath)
303 newFile = os.fdopen(newFd, "wb")
304 newFile.write(data)
305 newFile.close()
306 finally:
307 try: newFile.close()
308 except: pass
309 return newFilePath
310
311 def registerFileForCleanUp(self, fileName):
312 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000313
314 def test_basic(self):
315 # Make sure that a local file just gets its own location returned and
316 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000317 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000318 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000319 self.assertIsInstance(result[1], email.message.Message,
320 "did not get a email.message.Message instance "
321 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000322
323 def test_copy(self):
324 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000325 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000326 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000327 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000328 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000329 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000330 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000331 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000332 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000333 try:
334 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000335 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000336 finally:
337 try: FILE.close()
338 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000339 self.assertEqual(self.text, text)
340
341 def test_reporthook(self):
342 # Make sure that the reporthook works.
343 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottie9615932010-01-24 19:26:24 +0000344 self.assertIsInstance(count, int)
345 self.assertIsInstance(block_size, int)
346 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000347 self.assertEqual(count, count_holder[0])
348 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000349 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000350 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000351 urllib.request.urlretrieve(
352 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000353 second_temp, hooktester)
354
355 def test_reporthook_0_bytes(self):
356 # Test on zero length file. Should call reporthook only 1 time.
357 report = []
358 def hooktester(count, block_size, total_size, _report=report):
359 _report.append((count, block_size, total_size))
360 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000361 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000362 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000363 self.assertEqual(len(report), 1)
364 self.assertEqual(report[0][2], 0)
365
366 def test_reporthook_5_bytes(self):
367 # Test on 5 byte file. Should call reporthook only 2 times (once when
368 # the "network connection" is established and once when the block is
369 # read). Since the block size is 8192 bytes, only one block read is
370 # required to read the entire file.
371 report = []
372 def hooktester(count, block_size, total_size, _report=report):
373 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000374 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000375 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000376 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000377 self.assertEqual(len(report), 2)
378 self.assertEqual(report[0][1], 8192)
379 self.assertEqual(report[0][2], 5)
380
381 def test_reporthook_8193_bytes(self):
382 # Test on 8193 byte file. Should call reporthook only 3 times (once
383 # when the "network connection" is established, once for the next 8192
384 # bytes, and once for the last byte).
385 report = []
386 def hooktester(count, block_size, total_size, _report=report):
387 _report.append((count, block_size, total_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000388 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000389 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000390 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000391 self.assertEqual(len(report), 3)
392 self.assertEqual(report[0][1], 8192)
393 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000394
Brett Cannon74bfd702003-04-25 09:39:47 +0000395class QuotingTests(unittest.TestCase):
396 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000397
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000398 According to RFC 2396 (Uniform Resource Identifiers), to escape a
399 character you write it as '%' + <2 character US-ASCII hex value>.
400 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
401 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000402
403 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000404
Brett Cannon74bfd702003-04-25 09:39:47 +0000405 Reserved characters : ";/?:@&=+$,"
406 Have special meaning in URIs and must be escaped if not being used for
407 their special meaning
408 Data characters : letters, digits, and "-_.!~*'()"
409 Unreserved and do not need to be escaped; can be, though, if desired
410 Control characters : 0x00 - 0x1F, 0x7F
411 Have no use in URIs so must be escaped
412 space : 0x20
413 Must be escaped
414 Delimiters : '<>#%"'
415 Must be escaped
416 Unwise : "{}|\^[]`"
417 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000418
Brett Cannon74bfd702003-04-25 09:39:47 +0000419 """
420
421 def test_never_quote(self):
422 # Make sure quote() does not quote letters, digits, and "_,.-"
423 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
424 "abcdefghijklmnopqrstuvwxyz",
425 "0123456789",
426 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000427 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000428 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000429 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000430 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000431 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000432 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000433
434 def test_default_safe(self):
435 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000436 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000437
438 def test_safe(self):
439 # Test setting 'safe' parameter does what it should do
440 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000441 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000442 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000443 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000444 result = urllib.parse.quote_plus(quote_by_default,
445 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000446 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000447 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000448 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000449 # Safe expressed as bytes rather than str
450 result = urllib.parse.quote(quote_by_default, safe=b"<>")
451 self.assertEqual(quote_by_default, result,
452 "using quote(): %r != %r" % (quote_by_default, result))
453 # "Safe" non-ASCII characters should have no effect
454 # (Since URIs are not allowed to have non-ASCII characters)
455 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
456 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
457 self.assertEqual(expect, result,
458 "using quote(): %r != %r" %
459 (expect, result))
460 # Same as above, but using a bytes rather than str
461 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
462 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
463 self.assertEqual(expect, result,
464 "using quote(): %r != %r" %
465 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000466
467 def test_default_quoting(self):
468 # Make sure all characters that should be quoted are by default sans
469 # space (separate test for that).
470 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
471 should_quote.append('<>#%"{}|\^[]`')
472 should_quote.append(chr(127)) # For 0x7F
473 should_quote = ''.join(should_quote)
474 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000475 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000476 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000477 "using quote(): "
478 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000479 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000480 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000481 self.assertEqual(hexescape(char), result,
482 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000483 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000484 (char, hexescape(char), result))
485 del should_quote
486 partial_quote = "ab[]cd"
487 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000489 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000490 "using quote(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000491 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000492 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000493
494 def test_quoting_space(self):
495 # Make sure quote() and quote_plus() handle spaces as specified in
496 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000497 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000498 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000499 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000500 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000501 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000502 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000503 given = "a b cd e f"
504 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000505 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000506 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000507 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000508 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000509 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000510 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000511 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000512
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000513 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000514 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000515 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000516 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000517 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000518 # Test with bytes
519 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
520 'alpha%2Bbeta+gamma')
521 # Test with safe bytes
522 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
523 'alpha+beta+gamma')
524
525 def test_quote_bytes(self):
526 # Bytes should quote directly to percent-encoded values
527 given = b"\xa2\xd8ab\xff"
528 expect = "%A2%D8ab%FF"
529 result = urllib.parse.quote(given)
530 self.assertEqual(expect, result,
531 "using quote(): %r != %r" % (expect, result))
532 # Encoding argument should raise type error on bytes input
533 self.assertRaises(TypeError, urllib.parse.quote, given,
534 encoding="latin-1")
535 # quote_from_bytes should work the same
536 result = urllib.parse.quote_from_bytes(given)
537 self.assertEqual(expect, result,
538 "using quote_from_bytes(): %r != %r"
539 % (expect, result))
540
541 def test_quote_with_unicode(self):
542 # Characters in Latin-1 range, encoded by default in UTF-8
543 given = "\xa2\xd8ab\xff"
544 expect = "%C2%A2%C3%98ab%C3%BF"
545 result = urllib.parse.quote(given)
546 self.assertEqual(expect, result,
547 "using quote(): %r != %r" % (expect, result))
548 # Characters in Latin-1 range, encoded by with None (default)
549 result = urllib.parse.quote(given, encoding=None, errors=None)
550 self.assertEqual(expect, result,
551 "using quote(): %r != %r" % (expect, result))
552 # Characters in Latin-1 range, encoded with Latin-1
553 given = "\xa2\xd8ab\xff"
554 expect = "%A2%D8ab%FF"
555 result = urllib.parse.quote(given, encoding="latin-1")
556 self.assertEqual(expect, result,
557 "using quote(): %r != %r" % (expect, result))
558 # Characters in BMP, encoded by default in UTF-8
559 given = "\u6f22\u5b57" # "Kanji"
560 expect = "%E6%BC%A2%E5%AD%97"
561 result = urllib.parse.quote(given)
562 self.assertEqual(expect, result,
563 "using quote(): %r != %r" % (expect, result))
564 # Characters in BMP, encoded with Latin-1
565 given = "\u6f22\u5b57"
566 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
567 encoding="latin-1")
568 # Characters in BMP, encoded with Latin-1, with replace error handling
569 given = "\u6f22\u5b57"
570 expect = "%3F%3F" # "??"
571 result = urllib.parse.quote(given, encoding="latin-1",
572 errors="replace")
573 self.assertEqual(expect, result,
574 "using quote(): %r != %r" % (expect, result))
575 # Characters in BMP, Latin-1, with xmlcharref error handling
576 given = "\u6f22\u5b57"
577 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
578 result = urllib.parse.quote(given, encoding="latin-1",
579 errors="xmlcharrefreplace")
580 self.assertEqual(expect, result,
581 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000582
Georg Brandlfaf41492009-05-26 18:31:11 +0000583 def test_quote_plus_with_unicode(self):
584 # Encoding (latin-1) test for quote_plus
585 given = "\xa2\xd8 \xff"
586 expect = "%A2%D8+%FF"
587 result = urllib.parse.quote_plus(given, encoding="latin-1")
588 self.assertEqual(expect, result,
589 "using quote_plus(): %r != %r" % (expect, result))
590 # Errors test for quote_plus
591 given = "ab\u6f22\u5b57 cd"
592 expect = "ab%3F%3F+cd"
593 result = urllib.parse.quote_plus(given, encoding="latin-1",
594 errors="replace")
595 self.assertEqual(expect, result,
596 "using quote_plus(): %r != %r" % (expect, result))
597
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000598
Brett Cannon74bfd702003-04-25 09:39:47 +0000599class UnquotingTests(unittest.TestCase):
600 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000601
Brett Cannon74bfd702003-04-25 09:39:47 +0000602 See the doc string for quoting_Tests for details on quoting and such.
603
604 """
605
606 def test_unquoting(self):
607 # Make sure unquoting of all ASCII values works
608 escape_list = []
609 for num in range(128):
610 given = hexescape(chr(num))
611 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000612 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000613 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000614 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000615 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000616 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000617 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000618 (expect, result))
619 escape_list.append(given)
620 escape_string = ''.join(escape_list)
621 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000622 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000623 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000624 "using unquote(): not all characters escaped: "
625 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000626 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
627 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000628 with support.check_warnings(('', BytesWarning), quiet=True):
629 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000630
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000631 def test_unquoting_badpercent(self):
632 # Test unquoting on bad percent-escapes
633 given = '%xab'
634 expect = given
635 result = urllib.parse.unquote(given)
636 self.assertEqual(expect, result, "using unquote(): %r != %r"
637 % (expect, result))
638 given = '%x'
639 expect = given
640 result = urllib.parse.unquote(given)
641 self.assertEqual(expect, result, "using unquote(): %r != %r"
642 % (expect, result))
643 given = '%'
644 expect = given
645 result = urllib.parse.unquote(given)
646 self.assertEqual(expect, result, "using unquote(): %r != %r"
647 % (expect, result))
648 # unquote_to_bytes
649 given = '%xab'
650 expect = bytes(given, 'ascii')
651 result = urllib.parse.unquote_to_bytes(given)
652 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
653 % (expect, result))
654 given = '%x'
655 expect = bytes(given, 'ascii')
656 result = urllib.parse.unquote_to_bytes(given)
657 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
658 % (expect, result))
659 given = '%'
660 expect = bytes(given, 'ascii')
661 result = urllib.parse.unquote_to_bytes(given)
662 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
663 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000664 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
665 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000666
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000667 def test_unquoting_mixed_case(self):
668 # Test unquoting on mixed-case hex digits in the percent-escapes
669 given = '%Ab%eA'
670 expect = b'\xab\xea'
671 result = urllib.parse.unquote_to_bytes(given)
672 self.assertEqual(expect, result,
673 "using unquote_to_bytes(): %r != %r"
674 % (expect, result))
675
Brett Cannon74bfd702003-04-25 09:39:47 +0000676 def test_unquoting_parts(self):
677 # Make sure unquoting works when have non-quoted characters
678 # interspersed
679 given = 'ab%sd' % hexescape('c')
680 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000681 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000682 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000683 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000684 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000685 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000686 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000687
Brett Cannon74bfd702003-04-25 09:39:47 +0000688 def test_unquoting_plus(self):
689 # Test difference between unquote() and unquote_plus()
690 given = "are+there+spaces..."
691 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000692 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000693 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000694 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000695 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000696 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000697 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000698 "using unquote_plus(): %r != %r" % (expect, result))
699
700 def test_unquote_to_bytes(self):
701 given = 'br%C3%BCckner_sapporo_20050930.doc'
702 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
703 result = urllib.parse.unquote_to_bytes(given)
704 self.assertEqual(expect, result,
705 "using unquote_to_bytes(): %r != %r"
706 % (expect, result))
707 # Test on a string with unescaped non-ASCII characters
708 # (Technically an invalid URI; expect those characters to be UTF-8
709 # encoded).
710 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
711 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
712 self.assertEqual(expect, result,
713 "using unquote_to_bytes(): %r != %r"
714 % (expect, result))
715 # Test with a bytes as input
716 given = b'%A2%D8ab%FF'
717 expect = b'\xa2\xd8ab\xff'
718 result = urllib.parse.unquote_to_bytes(given)
719 self.assertEqual(expect, result,
720 "using unquote_to_bytes(): %r != %r"
721 % (expect, result))
722 # Test with a bytes as input, with unescaped non-ASCII bytes
723 # (Technically an invalid URI; expect those bytes to be preserved)
724 given = b'%A2\xd8ab%FF'
725 expect = b'\xa2\xd8ab\xff'
726 result = urllib.parse.unquote_to_bytes(given)
727 self.assertEqual(expect, result,
728 "using unquote_to_bytes(): %r != %r"
729 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000730
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000731 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000732 # Characters in the Latin-1 range, encoded with UTF-8
733 given = 'br%C3%BCckner_sapporo_20050930.doc'
734 expect = 'br\u00fcckner_sapporo_20050930.doc'
735 result = urllib.parse.unquote(given)
736 self.assertEqual(expect, result,
737 "using unquote(): %r != %r" % (expect, result))
738 # Characters in the Latin-1 range, encoded with None (default)
739 result = urllib.parse.unquote(given, encoding=None, errors=None)
740 self.assertEqual(expect, result,
741 "using unquote(): %r != %r" % (expect, result))
742
743 # Characters in the Latin-1 range, encoded with Latin-1
744 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
745 encoding="latin-1")
746 expect = 'br\u00fcckner_sapporo_20050930.doc'
747 self.assertEqual(expect, result,
748 "using unquote(): %r != %r" % (expect, result))
749
750 # Characters in BMP, encoded with UTF-8
751 given = "%E6%BC%A2%E5%AD%97"
752 expect = "\u6f22\u5b57" # "Kanji"
753 result = urllib.parse.unquote(given)
754 self.assertEqual(expect, result,
755 "using unquote(): %r != %r" % (expect, result))
756
757 # Decode with UTF-8, invalid sequence
758 given = "%F3%B1"
759 expect = "\ufffd" # Replacement character
760 result = urllib.parse.unquote(given)
761 self.assertEqual(expect, result,
762 "using unquote(): %r != %r" % (expect, result))
763
764 # Decode with UTF-8, invalid sequence, replace errors
765 result = urllib.parse.unquote(given, errors="replace")
766 self.assertEqual(expect, result,
767 "using unquote(): %r != %r" % (expect, result))
768
769 # Decode with UTF-8, invalid sequence, ignoring errors
770 given = "%F3%B1"
771 expect = ""
772 result = urllib.parse.unquote(given, errors="ignore")
773 self.assertEqual(expect, result,
774 "using unquote(): %r != %r" % (expect, result))
775
776 # A mix of non-ASCII and percent-encoded characters, UTF-8
777 result = urllib.parse.unquote("\u6f22%C3%BC")
778 expect = '\u6f22\u00fc'
779 self.assertEqual(expect, result,
780 "using unquote(): %r != %r" % (expect, result))
781
782 # A mix of non-ASCII and percent-encoded characters, Latin-1
783 # (Note, the string contains non-Latin-1-representable characters)
784 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
785 expect = '\u6f22\u00fc'
786 self.assertEqual(expect, result,
787 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000788
Brett Cannon74bfd702003-04-25 09:39:47 +0000789class urlencode_Tests(unittest.TestCase):
790 """Tests for urlencode()"""
791
792 def help_inputtype(self, given, test_type):
793 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000794
Brett Cannon74bfd702003-04-25 09:39:47 +0000795 'given' must lead to only the pairs:
796 * 1st, 1
797 * 2nd, 2
798 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000799
Brett Cannon74bfd702003-04-25 09:39:47 +0000800 Test cannot assume anything about order. Docs make no guarantee and
801 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000802
Brett Cannon74bfd702003-04-25 09:39:47 +0000803 """
804 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000805 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000806 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000807 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000808 "testing %s: %s not found in %s" %
809 (test_type, expected, result))
810 self.assertEqual(result.count('&'), 2,
811 "testing %s: expected 2 '&'s; got %s" %
812 (test_type, result.count('&')))
813 amp_location = result.index('&')
814 on_amp_left = result[amp_location - 1]
815 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000816 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000817 "testing %s: '&' not located in proper place in %s" %
818 (test_type, result))
819 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
820 "testing %s: "
821 "unexpected number of characters: %s != %s" %
822 (test_type, len(result), (5 * 3) + 2))
823
824 def test_using_mapping(self):
825 # Test passing in a mapping object as an argument.
826 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
827 "using dict as input type")
828
829 def test_using_sequence(self):
830 # Test passing in a sequence of two-item sequences as an argument.
831 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
832 "using sequence of two-item tuples as input")
833
834 def test_quoting(self):
835 # Make sure keys and values are quoted using quote_plus()
836 given = {"&":"="}
837 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000838 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000839 self.assertEqual(expect, result)
840 given = {"key name":"A bunch of pluses"}
841 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000842 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000843 self.assertEqual(expect, result)
844
845 def test_doseq(self):
846 # Test that passing True for 'doseq' parameter works correctly
847 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000848 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
849 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000850 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000851 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +0000852 for value in given["sequence"]:
853 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +0000854 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000855 self.assertEqual(result.count('&'), 2,
856 "Expected 2 '&'s, got %s" % result.count('&'))
857
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000858 def test_empty_sequence(self):
859 self.assertEqual("", urllib.parse.urlencode({}))
860 self.assertEqual("", urllib.parse.urlencode([]))
861
862 def test_nonstring_values(self):
863 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
864 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
865
866 def test_nonstring_seq_values(self):
867 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
868 self.assertEqual("a=None&a=a",
869 urllib.parse.urlencode({"a": [None, "a"]}, True))
870 self.assertEqual("a=a&a=b",
871 urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
872
Senthil Kumarandf022da2010-07-03 17:48:22 +0000873 def test_urlencode_encoding(self):
874 # ASCII encoding. Expect %3F with errors="replace'
875 given = (('\u00a0', '\u00c1'),)
876 expect = '%3F=%3F'
877 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
878 self.assertEqual(expect, result)
879
880 # Default is UTF-8 encoding.
881 given = (('\u00a0', '\u00c1'),)
882 expect = '%C2%A0=%C3%81'
883 result = urllib.parse.urlencode(given)
884 self.assertEqual(expect, result)
885
886 # Latin-1 encoding.
887 given = (('\u00a0', '\u00c1'),)
888 expect = '%A0=%C1'
889 result = urllib.parse.urlencode(given, encoding="latin-1")
890 self.assertEqual(expect, result)
891
892 def test_urlencode_encoding_doseq(self):
893 # ASCII Encoding. Expect %3F with errors="replace'
894 given = (('\u00a0', '\u00c1'),)
895 expect = '%3F=%3F'
896 result = urllib.parse.urlencode(given, doseq=True,
897 encoding="ASCII", errors="replace")
898 self.assertEqual(expect, result)
899
900 # ASCII Encoding. On a sequence of values.
901 given = (("\u00a0", (1, "\u00c1")),)
902 expect = '%3F=1&%3F=%3F'
903 result = urllib.parse.urlencode(given, True,
904 encoding="ASCII", errors="replace")
905 self.assertEqual(expect, result)
906
907 # Utf-8
908 given = (("\u00a0", "\u00c1"),)
909 expect = '%C2%A0=%C3%81'
910 result = urllib.parse.urlencode(given, True)
911 self.assertEqual(expect, result)
912
913 given = (("\u00a0", (42, "\u00c1")),)
914 expect = '%C2%A0=42&%C2%A0=%C3%81'
915 result = urllib.parse.urlencode(given, True)
916 self.assertEqual(expect, result)
917
918 # latin-1
919 given = (("\u00a0", "\u00c1"),)
920 expect = '%A0=%C1'
921 result = urllib.parse.urlencode(given, True, encoding="latin-1")
922 self.assertEqual(expect, result)
923
924 given = (("\u00a0", (42, "\u00c1")),)
925 expect = '%A0=42&%A0=%C1'
926 result = urllib.parse.urlencode(given, True, encoding="latin-1")
927 self.assertEqual(expect, result)
928
929 def test_urlencode_bytes(self):
930 given = ((b'\xa0\x24', b'\xc1\x24'),)
931 expect = '%A0%24=%C1%24'
932 result = urllib.parse.urlencode(given)
933 self.assertEqual(expect, result)
934 result = urllib.parse.urlencode(given, True)
935 self.assertEqual(expect, result)
936
937 # Sequence of values
938 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
939 expect = '%A0%24=42&%A0%24=%C1%24'
940 result = urllib.parse.urlencode(given, True)
941 self.assertEqual(expect, result)
942
943 def test_urlencode_encoding_safe_parameter(self):
944
945 # Send '$' (\x24) as safe character
946 # Default utf-8 encoding
947
948 given = ((b'\xa0\x24', b'\xc1\x24'),)
949 result = urllib.parse.urlencode(given, safe=":$")
950 expect = '%A0$=%C1$'
951 self.assertEqual(expect, result)
952
953 given = ((b'\xa0\x24', b'\xc1\x24'),)
954 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
955 expect = '%A0$=%C1$'
956 self.assertEqual(expect, result)
957
958 # Safe parameter in sequence
959 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
960 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
961 result = urllib.parse.urlencode(given, True, safe=":$")
962 self.assertEqual(expect, result)
963
964 # Test all above in latin-1 encoding
965
966 given = ((b'\xa0\x24', b'\xc1\x24'),)
967 result = urllib.parse.urlencode(given, safe=":$",
968 encoding="latin-1")
969 expect = '%A0$=%C1$'
970 self.assertEqual(expect, result)
971
972 given = ((b'\xa0\x24', b'\xc1\x24'),)
973 expect = '%A0$=%C1$'
974 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
975 encoding="latin-1")
976
977 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
978 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
979 result = urllib.parse.urlencode(given, True, safe=":$",
980 encoding="latin-1")
981 self.assertEqual(expect, result)
982
Brett Cannon74bfd702003-04-25 09:39:47 +0000983class Pathname_Tests(unittest.TestCase):
984 """Test pathname2url() and url2pathname()"""
985
986 def test_basic(self):
987 # Make sure simple tests pass
988 expected_path = os.path.join("parts", "of", "a", "path")
989 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000990 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +0000991 self.assertEqual(expected_url, result,
992 "pathname2url() failed; %s != %s" %
993 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000994 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +0000995 self.assertEqual(expected_path, result,
996 "url2pathame() failed; %s != %s" %
997 (result, expected_path))
998
999 def test_quoting(self):
1000 # Test automatic quoting and unquoting works for pathnam2url() and
1001 # url2pathname() respectively
1002 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001003 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1004 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001005 self.assertEqual(expect, result,
1006 "pathname2url() failed; %s != %s" %
1007 (expect, result))
1008 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001009 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001010 self.assertEqual(expect, result,
1011 "url2pathname() failed; %s != %s" %
1012 (expect, result))
1013 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001014 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1015 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001016 self.assertEqual(expect, result,
1017 "pathname2url() failed; %s != %s" %
1018 (expect, result))
1019 given = "make+sure/using_unquote"
1020 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001022 self.assertEqual(expect, result,
1023 "url2pathname() failed; %s != %s" %
1024 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001025
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001026 @unittest.skipUnless(sys.platform == 'win32',
1027 'test specific to the urllib.url2path function.')
1028 def test_ntpath(self):
1029 given = ('/C:/', '///C:/', '/C|//')
1030 expect = 'C:\\'
1031 for url in given:
1032 result = urllib.request.url2pathname(url)
1033 self.assertEqual(expect, result,
1034 'urllib.request..url2pathname() failed; %s != %s' %
1035 (expect, result))
1036 given = '///C|/path'
1037 expect = 'C:\\path'
1038 result = urllib.request.url2pathname(given)
1039 self.assertEqual(expect, result,
1040 'urllib.request.url2pathname() failed; %s != %s' %
1041 (expect, result))
1042
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001043class Utility_Tests(unittest.TestCase):
1044 """Testcase to test the various utility functions in the urllib."""
1045
1046 def test_splitpasswd(self):
1047 """Some of password examples are not sensible, but it is added to
1048 confirming to RFC2617 and addressing issue4675.
1049 """
1050 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1051 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1052 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1053 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1054 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1055 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1056 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
1057
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001058
1059class URLopener_Tests(unittest.TestCase):
1060 """Testcase to test the open method of URLopener class."""
1061
1062 def test_quoted_open(self):
1063 class DummyURLopener(urllib.request.URLopener):
1064 def open_spam(self, url):
1065 return url
1066
1067 self.assertEqual(DummyURLopener().open(
1068 'spam://example/ /'),'//example/%20/')
1069
Senthil Kumaran734f0592010-02-20 22:19:04 +00001070 # test the safe characters are not quoted by urlopen
1071 self.assertEqual(DummyURLopener().open(
1072 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1073 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1074
Guido van Rossume7ba4952007-06-06 23:52:48 +00001075# Just commented them out.
1076# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001077# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001078# fail in one of the tests, sometimes in other. I have a linux, and
1079# the tests go ok.
1080# If anybody has one of the problematic enviroments, please help!
1081# . Facundo
1082#
1083# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001084# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001085# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1086# serv.settimeout(3)
1087# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1088# serv.bind(("", 9093))
1089# serv.listen(5)
1090# try:
1091# conn, addr = serv.accept()
1092# conn.send("1 Hola mundo\n")
1093# cantdata = 0
1094# while cantdata < 13:
1095# data = conn.recv(13-cantdata)
1096# cantdata += len(data)
1097# time.sleep(.3)
1098# conn.send("2 No more lines\n")
1099# conn.close()
1100# except socket.timeout:
1101# pass
1102# finally:
1103# serv.close()
1104# evt.set()
1105#
1106# class FTPWrapperTests(unittest.TestCase):
1107#
1108# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001109# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001110# ftplib.FTP.port = 9093
1111# self.evt = threading.Event()
1112# threading.Thread(target=server, args=(self.evt,)).start()
1113# time.sleep(.1)
1114#
1115# def tearDown(self):
1116# self.evt.wait()
1117#
1118# def testBasic(self):
1119# # connects
1120# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001121# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001122#
1123# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001124# # global default timeout is ignored
1125# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001126# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001127# socket.setdefaulttimeout(30)
1128# try:
1129# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1130# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001131# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001132# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001133# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001134#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001135# def testTimeoutDefault(self):
1136# # global default timeout is used
1137# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001138# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001139# socket.setdefaulttimeout(30)
1140# try:
1141# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1142# finally:
1143# socket.setdefaulttimeout(None)
1144# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1145# ftp.close()
1146#
1147# def testTimeoutValue(self):
1148# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1149# timeout=30)
1150# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1151# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001152
Skip Montanaro080c9972001-01-28 21:12:22 +00001153
1154
Brett Cannon74bfd702003-04-25 09:39:47 +00001155def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001156 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001157 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001158 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001159 urlretrieve_FileTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001160 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001161 QuotingTests,
1162 UnquotingTests,
1163 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001164 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001165 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001166 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001167 #FTPWrapperTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001168 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001169
1170
1171
1172if __name__ == '__main__':
1173 test_main()